Skip to content

Commit

Permalink
dunno what im doing
Browse files Browse the repository at this point in the history
  • Loading branch information
clee2000 committed Mar 8, 2024
1 parent 5dab11a commit 1e3d6f8
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 10 deletions.
50 changes: 40 additions & 10 deletions .ci/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,48 @@
FROM 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9:ba7f4e752ac8ffdec18f81381ed9c25d2962c074
FROM ubuntu:20.04

USER root
ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update || sudo apt-get install libgnutls30
RUN apt-get update
RUN apt-get install -y --no-install-recommends unzip p7zip-full sox libsox-dev libsox-fmt-all rsync
# Install common dependencies (so that this step can be cached separately)
COPY ./install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

COPY ./common.sh common.sh
COPY ./requirements.txt /opt/conda
RUN bash ./common.sh && rm common.sh /opt/conda/requirements.txt
# Install user
COPY ./install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh

COPY ./download_data.sh download_data.sh
RUN bash download_data.sh && rm download_data.sh
COPY ./install_docs_reqs.sh install_docs_reqs.sh
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh

# Install conda and other packages (e.g., numpy, pytest)
ENV ANACONDA_PYTHON_VERSION 3.10
ENV CONDA_CMAKE yes
ENV DOCS yes
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY ./requirements.txt /opt/conda/
COPY ./install_conda.sh install_conda.sh
COPY ./common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements.txt


# Install gcc
ENV GCC_VERSION 9
COPY ./install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh


# Install cuda and cudnn
ENV CUDA_VERSION 12.1.1
RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/install_cuda.sh -O install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH

# Install CUDNN
ENV CUDNN_VERSION 8
COPY ./install_cudnn.sh install_cudnn.sh
RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
RUN rm install_cudnn.sh

USER jenkins
CMD ["bash"]
26 changes: 26 additions & 0 deletions .ci/docker/common_utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Work around bug where devtoolset replaces sudo and breaks it.
as_jenkins() {
# NB: unsetting the environment variables works around a conda bug
# https://github.com/conda/conda/issues/6576
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
# NB: This must be run from a directory that jenkins has access to,
# works around https://github.com/conda/conda-package-handling/pull/34
sudo -E -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
}

conda_install() {
# Ensure that the install command don't upgrade/downgrade Python
# This should be called as
# conda_install pkg1 pkg2 ... [-c channel]
as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
}

conda_run() {
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
}

pip_install() {
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
}
42 changes: 42 additions & 0 deletions .ci/docker/install_base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker

set -ex

install_ubuntu() {
# Install common dependencies
apt-get update
# TODO: Some of these may not be necessary
apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake3=3.16* \
curl \
git \
wget \
sudo \
vim \
jq \
vim \
unzip \
gdb \
rsync \
libssl-dev

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}


# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
73 changes: 73 additions & 0 deletions .ci/docker/install_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash

set -ex

# Optionally install conda
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
BASE_URL="https://repo.anaconda.com/miniconda"

MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)

CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"

mkdir -p /opt/conda
chown jenkins:jenkins /opt/conda

source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"

pushd /tmp
wget -q "${BASE_URL}/${CONDA_FILE}"
# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
popd

# NB: Don't do this, rely on the rpath to get it right
#echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
#ldconfig
sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
export PATH="/opt/conda/bin:$PATH"

# Ensure we run conda in a directory that jenkins has write access to
pushd /opt/conda

# Prevent conda from updating to 4.14.0, which causes docker build failures
# See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
# Uncomment the below when resolved to track the latest conda update
# as_jenkins conda update -y -n base conda

# Install correct Python version
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION"

# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"

# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
# and libpython-static for torch deploy
conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"

# Use conda cmake in some cases. Conda cmake will be newer than our supported
# min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
# following builds that we know should use conda. Specifically, Ubuntu bionic
# and focal cannot find conda mkl with stock cmake, so we need a cmake from conda
conda_install cmake

conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch

# Install some other packages, including those needed for Python test reporting
pip_install -r /opt/conda/requirements.txt

apt-get update
apt-get -y install expect-dev

# HACK HACK HACK
# gcc-9 for ubuntu-18.04 from http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu
# Pulls llibstdc++6 13.1.0-8ubuntu1~18.04 which is too new for conda
# So remove libstdc++6.so.3.29 installed by https://anaconda.org/anaconda/libstdcxx-ng/files?version=11.2.0
# Same is true for gcc-12 from Ubuntu-22.04
if grep -e [12][82].04.[623] /etc/issue >/dev/null; then
rm /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/libstdc++.so.6
fi

popd
fi
24 changes: 24 additions & 0 deletions .ci/docker/install_cudnn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

if [[ ${CUDNN_VERSION} == 8 ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn
pushd tmp_cudnn
if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
else
print "Unsupported CUDA version ${CUDA_VERSION}"
exit 1
fi

tar xf ${CUDNN_NAME}.tar.xz
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cudnn
ldconfig
fi
22 changes: 22 additions & 0 deletions .ci/docker/install_docs_reqs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker
set -ex

apt-get update
# Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
apt-get install -y gpg-agent || :

curl --retry 3 -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
sudo apt-get install -y nodejs

curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list

apt-get update
apt-get install -y --no-install-recommends yarn
yarn global add katex --prefix /usr/local

sudo apt-get -y install doxygen

apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
16 changes: 16 additions & 0 deletions .ci/docker/install_gcc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -ex

# Need the official toolchain repo to get alternate packages
add-apt-repository ppa:ubuntu-toolchain-r/test
apt-get update
apt-get install -y g++-$GCC_VERSION
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50


# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
26 changes: 26 additions & 0 deletions .ci/docker/install_user.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker
set -ex

# Mirror jenkins user in container
# jenkins user as ec2-user should have the same user-id
echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
echo "jenkins:x:1000:" >> /etc/group
# Needed on focal or newer
echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow

# Create $HOME
mkdir -p /var/lib/jenkins
chown jenkins:jenkins /var/lib/jenkins
mkdir -p /var/lib/jenkins/.ccache
chown jenkins:jenkins /var/lib/jenkins/.ccache

# Allow writing to /usr/local (for make install)
chown jenkins:jenkins /usr/local

# Allow sudo
# TODO: Maybe we shouldn't
echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins

# Test that sudo works
sudo -u jenkins sudo -v

0 comments on commit 1e3d6f8

Please sign in to comment.