#! /bin/bash

PKG=cuda
MAJOR=10.2
MINOR=89
VERSION=$MAJOR.$MINOR
BUILD=1
DRIVERVERSION=440.33.01

CUDNNVER=7.6.5.32
CUDNNFILE=cudnn-$MAJOR-linux-x64-v$CUDNNVER.tgz

TENSORRTVER=7.0.0.11
TENSORRTFILE=TensorRT-$TENSORRTVER.Ubuntu-18.04.x86_64-gnu.cuda-10.2.cudnn7.6.tar.gz

NCCLVER=2.5.6-2
NCCLFILE=nccl_$NCCLVER+cuda10.2_x86_64.txz

PREFIX=/pkg/$PKG-$VERSION-$BUILD
if [ -n "$TESTING" ]; then PREFIX=/dev/shm/$PKG-$VERSION-$BUILD ; fi

set -e
umask 022

# be carefull on machines with 8Gig and less
BUILD_TMPDIR=/dev/shm/$PKG-$VERSION-$BUILD.build.tmp
test -d $BUILD_TMPDIR && rm -rf $BUILD_TMPDIR
mkdir -p $BUILD_TMPDIR/home
export TMPDIR=$BUILD_TMPDIR
export HOME=$BUILD_TMPDIR/home

exec </dev/null

mkdir -p $PREFIX
cat >$PREFIX/profile <<-EOF
	PATH=$PREFIX/bin:\$PATH
	export CUDA_PATH=$PREFIX
	LD_LIBRARY_PATH=$PREFIX/lib\${LD_LIBRARY_PATH:+:}\$LD_LIBRARY_PATH
	if [ -d $PREFIX/.compatlibs ]; then LD_LIBRARY_PATH=$PREFIX/.compatlibs:\$LD_LIBRARY_PATH; fi
	export LD_LIBRARY_PATH
EOF
. $PREFIX/profile

cd $PREFIX
BUILDDIR=$PREFIX/build

mkdir -p $BUILDDIR
cd $BUILDDIR

INSTALLFILE=cuda_${VERSION}_${DRIVERVERSION}_linux.run
URL=https://developer.nvidia.com/compute/cuda/$MAJOR/Prod/local_installers/$INSTALLFILE

# in case it was downloaded before
if [ ! -e $INSTALLFILE -a -e /package/cuda/src/$INSTALLFILE ]; then
  cp -vp /package/cuda/src/$INSTALLFILE $INSTALLFILE
fi

if [ ! -e $INSTALLFILE ]; then
  wget $URL
fi

rm -rf cuda_tmp
mkdir -p cuda_tmp
bash $INSTALLFILE --extract=$BUILDDIR/cuda_tmp --nox11

cd cuda_tmp

### reorganize the tree a bit ###
cd cuda-toolkit

# dump javabased nsight profiler
rm -rf nsight-compute-* nsight-systems-* libnsight nsightee_plugins

# stuff some of the 'extras' into 'share', but keep
# - nvvm (nvidia virtual machine, see bin/nvcc.profile)
# - extras (or tensorflow 2.1.0 would need extra tweaks, nerve)
mv doc libnvvp nvml tools share

# 'elevate' libs and includes
rm include lib64
mv targets/x86_64-linux/* .

rm -r targets

# adjust nvcc.profile for TARGET_DIR and TARGET_SIZE
sed -e 's,$(_TARGET_SIZE_),,g' \
    -e 's,/$(_TARGET_DIR_),,g' \
    -i bin/nvcc.profile

### return ###
cd $BUILDDIR/cuda_tmp

# obviously a name change was overlooked
sed -i -e '/^SAMPLES_SRC/ s,samples,../cuda-samples,' cuda-samples/bin/cuda-install-samples-10.2.sh

# we don't need this distro aware handler for finding glut & co
for D in $(find -name findgllib.mk -exec dirname {} \;) ; do
  test -e $D/Makefile && sed -i -e '/findgllib.mk/ s/include/# include/' $D/Makefile
done

# install and merge cublas (which is pretty redundant)
mv cuda-toolkit/* $PREFIX
mv cublas/lib64 cublas/lib
cp -au cublas/* $PREFIX
rm -r cublas/*
mv cuda-samples $PREFIX/share

# cudNN
cd $BUILDDIR
test -e $CUDNNFILE || cp /package/cuda/src/$CUDNNFILE .
mkdir -p cudnn
cd cudnn
tar -xf ../$CUDNNFILE
mv cuda/include/cudnn.h $PREFIX/include
mv cuda/lib64/libcudnn* $PREFIX/lib
mv cuda/NVIDIA_SLA_cuDNN_Support.txt $PREFIX/share/doc

# TensorRT
cd $BUILDDIR
test -e $TENSORRTFILE || cp /package/cuda/src/$TENSORRTFILE .
mkdir -p TensorRT
cd TensorRT
tar -xf ../$TENSORRTFILE
mv TensorRT-$TENSORRTVER/bin/* $PREFIX/bin
mv TensorRT-$TENSORRTVER/include/* $PREFIX/include
mv TensorRT-$TENSORRTVER/lib/stubs/* $PREFIX/lib/stubs
rmdir TensorRT-$TENSORRTVER/lib/stubs
mv TensorRT-$TENSORRTVER/lib/* $PREFIX/lib

# nccl - aka 'nickel'
cd $BUILDDIR
test -e $NCCLFILE || cp /package/cuda/src/$NCCLFILE .
mkdir -p nccl
cd nccl
tar -xf ../$NCCLFILE
mv nccl_$NCCLVER+cuda10.2_x86_64/include/* $PREFIX/include
mv nccl_$NCCLVER+cuda10.2_x86_64/lib/lib* $PREFIX/lib
mv nccl_$NCCLVER+cuda10.2_x86_64/LICENSE.txt $PREFIX/share/doc/nccl_LICENSE.txt

exit