Skip to content

Commit

Permalink
cuda: Add version 11.8.0
Browse files Browse the repository at this point in the history
Recent tensorflow is not ready for cuda 12.
Also cuDNN needs to be way old (8.7.0.84), a
more current would have been 8.9.1.23 (sigh).
  • Loading branch information
thomas committed Jun 9, 2023
1 parent 2950cf3 commit 72c2f63
Showing 1 changed file with 186 additions and 0 deletions.
186 changes: 186 additions & 0 deletions cuda-11.8.0-0.build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#! /bin/bash

# COOKIE=$(mcookie|cut -c-8); grep -v V_GREP_ME $0 > /dev/shm/runme-$COOKIE.sh ; sleep 0.3; exec bash /dev/shm/runme-$COOKIE.sh
# TESTING=1

# Download shortcuts, only cuda-toolkit-archive doesn't need an nvidia account
# https://developer.nvidia.com/cuda-toolkit-archive
# https://developer.nvidia.com/rdp/cudnn-archive
# https://developer.nvidia.com/nvidia-tensorrt-8x-download
# https://developer.nvidia.com/nccl/nccl-download
# https://developer.nvidia.com/hpc-sdk-downloads

PKG=cuda
MAJOR=11.8
TOPMAJOR=${MAJOR%.*}
MINOR=0
VERSION=$MAJOR.$MINOR
BUILD=0
DRIVERVERSION=520.61.05
REPO=/package/cuda/src


CUDAFILE=cuda_${VERSION}_${DRIVERVERSION}_linux.run

CUDNNVER=8.7.0.84
CUDNNFILE=cudnn-linux-x86_64-${CUDNNVER}_cuda$TOPMAJOR-archive.tar.xz

TENSORRTVER=8.6.1.6
TENSORRTFILE=TensorRT-$TENSORRTVER.Linux.x86_64-gnu.cuda-11.8.tar.gz

NCCLVER=2.18.1-1
NCCLFILE=nccl_$NCCLVER+cuda${TOPMAJOR}.0_x86_64.txz

PREFIX=/pkg/$PKG-$VERSION-$BUILD
if [ -n "$TESTING" ]; then PREFIX=/scratch/local2/$PKG-$VERSION-$BUILD ; fi

set -e
umask 022

# make sure we are not run as root, otherwise the cuda-installer will certainly screw up the machine
test $UID = 0 && echo '# Will not run as root!' && exit 1

# this will turn out as showstopper if owned by somebody else, thus fail early...
rm -vf /tmp/cuda-installer.log

# check the given archive names early ...
for F in $CUDAFILE $CUDNNFILE $TENSORRTFILE $NCCLFILE; do
if [ ! -e $REPO/$F ]; then
echo "# '$F' not found in '$REPO', check for typos..."
exit
fi
done

BUILD_TMPDIR=/scratch/local2/$PKG-$VERSION-$BUILD.$USER.build.tmp
test -d $BUILD_TMPDIR && rm -rf $BUILD_TMPDIR
mkdir -p $BUILD_TMPDIR/home
mkdir -p $BUILD_TMPDIR/.local/share/applications
export TMPDIR=$BUILD_TMPDIR
export HOME=$BUILD_TMPDIR/home

exec </dev/null

mkdir -p $PREFIX
cat >$PREFIX/profile <<-EOF
PATH=$PREFIX/bin:\$PATH
export CUDA_PATH=$PREFIX
LD_LIBRARY_PATH=$PREFIX/lib\${LD_LIBRARY_PATH:+:}\$LD_LIBRARY_PATH
if [ -d $PREFIX/.compatlibs ]; then LD_LIBRARY_PATH=$PREFIX/.compatlibs:\$LD_LIBRARY_PATH; fi
export LD_LIBRARY_PATH
PKG_CONFIG_PATH=$PREFIX/lib/pkgconfig\${PKG_CONFIG_PATH:+:}\$PKG_CONFIG_PATH
export PKG_CONFIG_PATH
EOF
. $PREFIX/profile

cd $PREFIX
BUILDDIR=$PREFIX/build

mkdir -p $BUILDDIR
cd $BUILDDIR


echo "# Working on: $CUDAFILE"
test -e $CUDAFILE || cp -vp $REPO/$CUDAFILE .

# $PREFIX/cuda_tmp_dir: this is owed to the demented cuda installer, which happily fails with
# "Unable to write to directory: /pkg", "Permission denied. Unable to write to /pkg/cuda-11.0.3-0/" - what?
# OTOH the installer bundles its libraries and includes a bit better than before
bash $CUDAFILE \
--installpath=$PREFIX/cuda_tmp_dir \
--tmpdir=$TMPDIR \
--toolkit \
--samples \
--silent \
--nox11

### reorganize the tree a bit ###
cd $PREFIX/cuda_tmp_dir

# fix pc-files (step one)
sed \
-e '/^cudaroot/ s,/cuda_tmp_dir,,' \
-i pkgconfig/*.pc


# Maybe nsight is of some use, keep it
mv nsight* extras

# Sanitizer is now at toplevel, put it back to the former location
mv compute-sanitizer extras

# stuff some of the other 'extras' into 'share', but keep
# - nvvm (nvidia virtual machine, see bin/nvcc.profile)
# - extras (or tensorflow would need extra tweaks, nerve)
mkdir -p share/doc
mv libnvvp nvml tools share

# fall back to decent locations for lib and include, the other packages do just as different as possible
rm include lib64
mv targets/x86_64-linux/* .
rm -r targets

mv pkgconfig lib
# and fix the pc files (step two)
sed \
-e '/^cudaroot/ s,/$,,' \
-e 's,/targets/x86_64-linux,,' \
-i lib/pkgconfig/*.pc

# adjust nvcc.profile for TARGET_DIR and TARGET_SIZE
sed -e 's,$(_TARGET_SIZE_),,g' \
-e 's,/$(_TARGET_DIR_),,g' \
-i bin/nvcc.profile

# and up one level
mv -v * .. ; cd .. ; rmdir cuda_tmp_dir
mv -v gds* extras

### LIBRARIES ### https://developer.nvidia.com/gpu-accelerated-libraries

# cudNN
cd $BUILDDIR

echo "# Working on: $CUDNNFILE"
test -e $CUDNNFILE || cp -vp $REPO/$CUDNNFILE .
mkdir -p cudnn
cd cudnn
tar -xf ../$CUDNNFILE

ARCHDIR=${CUDNNFILE%.tar.xz}
mv $ARCHDIR/include/cudnn*.h $PREFIX/include
mv $ARCHDIR/lib/libcudnn* $PREFIX/lib

# TensorRT
cd $BUILDDIR

echo "# Working on: $TENSORRTFILE"
test -e $TENSORRTFILE || cp -vp $REPO/$TENSORRTFILE .
mkdir -p TensorRT
cd TensorRT
tar -xf ../$TENSORRTFILE
mv TensorRT-$TENSORRTVER/bin/* $PREFIX/bin
mv TensorRT-$TENSORRTVER/include/* $PREFIX/include
mv TensorRT-$TENSORRTVER/lib/stubs/* $PREFIX/lib/stubs
rmdir TensorRT-$TENSORRTVER/lib/stubs
mv TensorRT-$TENSORRTVER/lib/* $PREFIX/lib

# nccl - aka 'nickel'
cd $BUILDDIR

echo "# Working on: $NCCLFILE"
test -e $NCCLFILE || cp -vp $REPO/$NCCLFILE .
mkdir -p nccl
cd nccl
tar -xf ../$NCCLFILE
mv nccl_$NCCLVER+cuda${TOPMAJOR}.0_x86_64/include/* $PREFIX/include
mv nccl_$NCCLVER+cuda${TOPMAJOR}.0_x86_64/lib/lib* $PREFIX/lib
# and here we have a pkgconfig too :)
sed -e "/^prefix/ s,/usr/local,$PREFIX," -i nccl_$NCCLVER+cuda${TOPMAJOR}.0_x86_64/lib/pkgconfig/nccl.pc
mv nccl_$NCCLVER+cuda${TOPMAJOR}.0_x86_64/lib/pkgconfig/nccl.pc $PREFIX/lib/pkgconfig
mv nccl_$NCCLVER+cuda${TOPMAJOR}.0_x86_64/LICENSE.txt $PREFIX/share/doc/nccl_LICENSE.txt

# cosmetics ...
chmod -c -x $PREFIX/include/*.{h,hpp}
test -e /tmp/cuda-installer.log && mv -v /tmp/cuda-installer.log $PREFIX

exit

0 comments on commit 72c2f63

Please sign in to comment.