diff --git a/huggingface/pytorch/inference/buildspec-2-1-0.yml b/huggingface/pytorch/inference/buildspec-2-1-0.yml new file mode 100644 index 000000000000..a677b85870ea --- /dev/null +++ b/huggingface/pytorch/inference/buildspec-2-1-0.yml @@ -0,0 +1,58 @@ +account_id: &ACCOUNT_ID +region: ®ION +base_framework: &BASE_FRAMEWORK pytorch +framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK] +version: &VERSION 2.1.0 +short_version: &SHORT_VERSION "2.1" +contributor: huggingface +arch_type: x86 + +repository_info: + inference_repository: &INFERENCE_REPOSITORY + image_type: &INFERENCE_IMAGE_TYPE inference + root: !join [ "huggingface/", *BASE_FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ] + repository_name: &REPOSITORY_NAME !join ["pr", "-", "huggingface", "-", *BASE_FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE] + repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ] + +context: + inference_context: &INFERENCE_CONTEXT + mms-entrypoint: + source: ../../build_artifacts/inference/mms-entrypoint.py + target: mms-entrypoint.py + config: + source: ../../build_artifacts/inference/config.properties + target: config.properties + deep_learning_container: + source: ../../../src/deep_learning_container.py + target: deep_learning_container.py + +images: + BuildHuggingFacePytorchCpuPy310InferenceDockerImage: + <<: *INFERENCE_REPOSITORY + build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false + image_size_baseline: 15000 + device_type: &DEVICE_TYPE cpu + python_version: &DOCKER_PYTHON_VERSION py3 + tag_python_version: &TAG_PYTHON_VERSION py310 + os_version: &OS_VERSION ubuntu22.04 + transformers_version: &TRANSFORMERS_VERSION 4.37.0 + tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *OS_VERSION ] + docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ] + context: + <<: *INFERENCE_CONTEXT + BuildHuggingFacePytorchGpuPy310Cu118InferenceDockerImage: + <<: *INFERENCE_REPOSITORY + build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false + image_size_baseline: &IMAGE_SIZE_BASELINE 15000 + device_type: &DEVICE_TYPE gpu + python_version: &DOCKER_PYTHON_VERSION py3 + tag_python_version: &TAG_PYTHON_VERSION py310 + cuda_version: &CUDA_VERSION cu118 + os_version: &OS_VERSION ubuntu20.04 + transformers_version: &TRANSFORMERS_VERSION 4.37.0 + tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', + *CUDA_VERSION, '-', *OS_VERSION ] + docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, + *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ] + context: + <<: *INFERENCE_CONTEXT diff --git a/huggingface/pytorch/inference/buildspec.yml b/huggingface/pytorch/inference/buildspec.yml index a677b85870ea..3ab4ff9f31cf 100644 --- a/huggingface/pytorch/inference/buildspec.yml +++ b/huggingface/pytorch/inference/buildspec.yml @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID region: ®ION base_framework: &BASE_FRAMEWORK pytorch framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK] -version: &VERSION 2.1.0 -short_version: &SHORT_VERSION "2.1" +version: &VERSION 2.3.0 +short_version: &SHORT_VERSION "2.3" contributor: huggingface arch_type: x86 @@ -27,29 +27,29 @@ context: target: deep_learning_container.py images: - BuildHuggingFacePytorchCpuPy310InferenceDockerImage: + BuildHuggingFacePytorchCpuPy311InferenceDockerImage: <<: *INFERENCE_REPOSITORY build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false image_size_baseline: 15000 device_type: &DEVICE_TYPE cpu python_version: &DOCKER_PYTHON_VERSION py3 - tag_python_version: &TAG_PYTHON_VERSION py310 + tag_python_version: &TAG_PYTHON_VERSION py311 os_version: &OS_VERSION ubuntu22.04 - transformers_version: &TRANSFORMERS_VERSION 4.37.0 + transformers_version: &TRANSFORMERS_VERSION 4.46.1 tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *OS_VERSION ] docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ] context: <<: *INFERENCE_CONTEXT - BuildHuggingFacePytorchGpuPy310Cu118InferenceDockerImage: + BuildHuggingFacePytorchGpuPy311Cu121InferenceDockerImage: <<: *INFERENCE_REPOSITORY build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false image_size_baseline: &IMAGE_SIZE_BASELINE 15000 device_type: &DEVICE_TYPE gpu python_version: &DOCKER_PYTHON_VERSION py3 - tag_python_version: &TAG_PYTHON_VERSION py310 - cuda_version: &CUDA_VERSION cu118 + tag_python_version: &TAG_PYTHON_VERSION py311 + cuda_version: &CUDA_VERSION cu121 os_version: &OS_VERSION ubuntu20.04 - transformers_version: &TRANSFORMERS_VERSION 4.37.0 + transformers_version: &TRANSFORMERS_VERSION 4.46.1 tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *CUDA_VERSION, '-', *OS_VERSION ] docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, diff --git a/huggingface/pytorch/inference/docker/2.3/py3/Dockerfile.cpu b/huggingface/pytorch/inference/docker/2.3/py3/Dockerfile.cpu new file mode 100644 index 000000000000..631921de5888 --- /dev/null +++ b/huggingface/pytorch/inference/docker/2.3/py3/Dockerfile.cpu @@ -0,0 +1,249 @@ +FROM ubuntu:22.04 AS base_image + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.11.9 +ARG MINIFORGE3_VERSION=23.11.0-0 +ARG OPEN_MPI_VERSION=4.1.5 +ARG MMS_VERSION=1.1.11 + +# PyTorch Binaries and versions. +ARG TORCH_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torch-2.3.0%2Bcpu-cp311-cp311-linux_x86_64.whl +ARG TORCHVISION_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torchvision-0.18.0%2Bcpu-cp311-cp311-linux_x86_64.whl +ARG TORCHAUDIO_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torchaudio-2.3.0%2Bcpu-cp311-cp311-linux_x86_64.whl + +# HF ARGS +ARG TRANSFORMERS_VERSION +ARG HUGGINGFACE_HUB_VERSION=0.25.1 +ARG DIFFUSERS_VERSION=0.31.0 +ARG PEFT_VERSION=0.13.2 +ARG ACCELERATE_VERSION=1.1.0 + +# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20 +ENV DEBIAN_FRONTEND=noninteractive + +# Python won’t try to write .pyc or .pyo files on the import of source modules +# Force stdin, stdout and stderr to be totally unbuffered. Good for logging +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" +ENV PYTHONIOENCODING=UTF-8 +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 +ENV PATH=/opt/conda/bin:$PATH + +# Set Env Variables for the images +ENV TEMP=/tmp +ENV MKL_THREADING_LAYER=GNU + +ENV DLC_CONTAINER_TYPE=inference + +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get install -y --no-install-recommends \ + software-properties-common \ + build-essential \ + ca-certificates \ + ccache \ + numactl \ + gcc-12 \ + g++-12 \ + make \ + cmake \ + curl \ + emacs \ + git \ + jq \ + libcurl4-openssl-dev \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libsm6 \ + libssl-dev \ + libxext6 \ + libxrender-dev \ + openjdk-17-jdk \ + openssl \ + unzip \ + vim \ + wget \ + libjpeg-dev \ + libpng-dev \ + zlib1g-dev \ + libsndfile1-dev \ + ffmpeg \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 \ + && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 \ + && update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 \ + && update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 \ + && apt-get clean + +# Install OpenMPI +RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPEN_MPI_VERSION}.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +# The ENV variables declared below are changed in the previous section +# Grouping these ENV variables in the first section causes +# ompi_info to fail. This is only observed in CPU containers +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" +RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value + +# Install CondaForge miniconda +RUN curl -L -o ~/miniforge3.sh https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE3_VERSION}/Miniforge3-${MINIFORGE3_VERSION}-Linux-x86_64.sh \ + && chmod +x ~/miniforge3.sh \ + && ~/miniforge3.sh -b -p /opt/conda \ + && rm ~/miniforge3.sh \ + && /opt/conda/bin/conda install -c conda-forge \ + python=${PYTHON_VERSION} \ + cython \ + "mkl<2024.1.0" \ + mkl-include \ + parso \ + scipy \ + typing \ + h5py \ + requests \ + libgcc \ + cmake \ + packaging \ + "awscli<2" \ + boto3 \ + pyyaml \ + conda-content-trust \ + charset-normalizer \ + requests \ + "idna>=3.7" \ + "tqdm>=4.66.3" \ + "zstandard>=0.22.0" \ + && /opt/conda/bin/conda clean -afy \ + && rm -rf /etc/apt/sources.list.d/* + +# symlink pip for OS use +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 + +# Install Common python packages +RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -U \ + opencv-python \ + "pyopenssl>=24.0.0" \ + "cryptography>=42.0.5" \ + "ipython>=8.10.0,<9.0" \ + "awscli<2" \ + "urllib3>=1.26.18,<2" \ + "prompt-toolkit<3.0.39" \ + "setuptools>=70.0.0" + +# Ensure PyTorch did not get installed from Conda or pip, prior to now +# is CPU image, removing nvgpu +# Any Nvidia installs for the DLC will be below, removing nvidia and cuda packages from pip here +# Even the GPU image would not have nvidia or cuda packages in PIP. +RUN pip uninstall -y torch torchvision torchaudio multi-model-server + +# Install AWS-PyTorch, and other torch packages +RUN pip install --no-cache-dir -U \ + enum-compat==0.0.3 \ + "Pillow>=9.0.0" \ + ${TORCH_URL} \ + ${TORCHVISION_URL} \ + ${TORCHAUDIO_URL} + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +# Patches +# py-vuln: 71064 +RUN pip install --no-cache-dir -U "requests>=2.32.3" + +# add necessary certificate for aws sdk cpp download +RUN mkdir -p /etc/pki/tls/certs && cp /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt + +# create user and folders +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp /opt/ml/model \ + && chown -R model-server /home/model-server /opt/ml/model + +# add MMS entrypoint +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /etc/sagemaker-mms.properties +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +# add telemetry +COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py +RUN chmod +x /usr/local/bin/deep_learning_container.py + +################################# +# Hugging Face specific section # +################################# + +# install Hugging Face libraries and its dependencies +RUN pip install --no-cache-dir \ + # hf_transfer will be a built-in feature, remove the extra then + huggingface_hub[hf_transfer]==${HUGGINGFACE_HUB_VERSION} \ + transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \ + diffusers==${DIFFUSERS_VERSION} \ + peft==${PEFT_VERSION} \ + accelerate==${ACCELERATE_VERSION} \ + "protobuf>=3.19.5,<=3.20.2" \ + "sagemaker-huggingface-inference-toolkit==2.4.1" + +# hf_transfer will be a built-in feature, remove the env variavle then +ENV HF_HUB_ENABLE_HF_TRANSFER="1" + +##################### +# IPEX installation # +##################### + +# Skip ipex installation for now due to error: 0.18.0+cpu, the required version for compiling is 0.18.0+cpu... +# Install IPEx and its dependencies +# from source is mandatory for cutomized AWS PyTorch binaries: https://github.com/intel/intel-extension-for-pytorch/issues/317 +# RUN pip install --no-cache-dir intel-openmp tbb pyyaml +# RUN cd /opt/ \ +# && mkdir -p ipex \ +# && cd /opt/ipex \ +# && wget https://github.com/intel/intel-extension-for-pytorch/raw/v2.3.0%2Bcpu/scripts/compile_bundle.sh \ +# && MODE=3 bash compile_bundle.sh \ +# && rm -rf /opt/ipex && cd /opt/ + +# IPEx installation installs the numpy==1.25.1. That causes a pip check failure due to incompatibility with numba. +# Re-installing numpy after IPEx installation to get the appropriate numpy version and fix pip checks. +# RUN pip install --no-cache-dir \ +# "numpy<1.25" \ +# "pyyaml>=5.4" + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.3/license.txt + +## Cleanup ## +RUN pip cache purge \ + && rm -rf /tmp/tmp* \ + && rm -iRf /root/.cache \ + && rm -rf /opt/llvm-project \ + && rm -rf opt/intel-extension-for-pytorch + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["serve"] \ No newline at end of file diff --git a/huggingface/pytorch/inference/docker/2.3/py3/cu121/Dockerfile.gpu b/huggingface/pytorch/inference/docker/2.3/py3/cu121/Dockerfile.gpu new file mode 100644 index 000000000000..26c6da73d603 --- /dev/null +++ b/huggingface/pytorch/inference/docker/2.3/py3/cu121/Dockerfile.gpu @@ -0,0 +1,265 @@ +FROM nvidia/cuda:12.1.1-base-ubuntu20.04 AS base_image + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true +LABEL com.amazonaws.sagemaker.inference.cuda.verified_versions=12.2 + +ARG MMS_VERSION=1.1.11 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.11.9 +ARG MINIFORGE3_VERSION=23.11.0-0 +ARG OPEN_MPI_VERSION=4.1.5 + +# Nvidia software versions +ARG CUBLAS_VERSION=12.1.3.1 +ARG CUDNN_VERSION=8.9.2.26 +ARG NCCL_VERSION=2.20.5 +ARG NVML_VERSION=12.1.55 + +# PyTorch Binaries and versions. +ARG TORCH_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cuda12.1.1/torch-2.3.0%2Bcu121-cp311-cp311-linux_x86_64.whl +ARG TORCHVISION_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cuda12.1.1/torchvision-0.18.0%2Bcu121-cp311-cp311-linux_x86_64.whl +ARG TORCHAUDIO_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cuda12.1.1/torchaudio-2.3.0%2Bcu121-cp311-cp311-linux_x86_64.whl +ARG TRITON_VERSION=2.3.0 + +# HF ARGS +ARG TRANSFORMERS_VERSION +ARG HUGGINGFACE_HUB_VERSION=0.25.1 +ARG DIFFUSERS_VERSION=0.31.0 +ARG PEFT_VERSION=0.13.2 +ARG ACCELERATE_VERSION=1.0.1 + +# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20 +ENV DEBIAN_FRONTEND=noninteractive + +# Python won’t try to write .pyc or .pyo files on the import of source modules +# Force stdin, stdout and stderr to be totally unbuffered. Good for logging +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}" +ENV PYTHONIOENCODING=UTF-8 +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 +ENV PATH=/opt/conda/bin:$PATH + +# Set Env Variables for the images +ENV TEMP=/tmp +ENV MKL_THREADING_LAYER=GNU + +# Cuda Arch List setting Options +ENV TORCH_CUDA_ARCH_LIST="5.0 7.0+PTX 7.5+PTX 8.0 8.6 9.0" + +ENV DLC_CONTAINER_TYPE=inference + +WORKDIR / + +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ + build-essential \ + ca-certificates \ + cmake \ + libgssapi-krb5-2 \ + libcurl4-openssl-dev \ + cuda-cudart-12-1 \ + cuda-cudart-dev-12-1 \ + cuda-libraries-12-1 \ + cuda-libraries-dev-12-1 \ + cuda-command-line-tools-12-1 \ + cuda-nvcc-12-1 \ + libcublas-12-1=${CUBLAS_VERSION}-1 \ + libcublas-dev-12-1=${CUBLAS_VERSION}-1 \ + cuda-nvml-dev-12-1=${NVML_VERSION}-1 \ + libcudnn8=${CUDNN_VERSION}-1+cuda12.1 \ + curl \ + emacs \ + git \ + jq \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libgomp1 \ + libibverbs-dev \ + libnuma1 \ + libnuma-dev \ + libsm6 \ + libssl1.1 \ + libssl-dev \ + libxext6 \ + libxrender-dev \ + openjdk-17-jdk \ + openssl \ + vim \ + wget \ + unzip \ + libjpeg-dev \ + libpng-dev \ + zlib1g-dev \ + openssh-client \ + openssh-server \ + python3-dev \ + libsndfile1-dev \ + ffmpeg \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +# Install NCCL +RUN cd /tmp \ + && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION}-1 \ + && cd nccl \ + && make -j64 src.build BUILDDIR=/usr/local \ + && rm -rf /tmp/nccl +# preload system nccl for PyTorch to use if it is dynamically linking NCCL +ENV LD_PRELOAD="/usr/local/lib/libnccl.so" + +# Install OpenMPI +RUN wget --quiet https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPEN_MPI_VERSION}.tar.gz \ + && gunzip -c openmpi-${OPEN_MPI_VERSION}.tar.gz | tar xf - \ + && cd openmpi-${OPEN_MPI_VERSION} \ + && ./configure --prefix=/home/.openmpi --with-cuda \ + && make all install \ + && cd .. \ + && rm openmpi-${OPEN_MPI_VERSION}.tar.gz \ + && rm -rf openmpi-${OPEN_MPI_VERSION} + +# The ENV variables declared below are changed in the previous section +# Grouping these ENV variables in the first section causes +# ompi_info to fail. This is only observed in CPU containers +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" +RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value + +# Install CondaForge miniconda +RUN curl -L -o ~/miniforge3.sh https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE3_VERSION}/Miniforge3-${MINIFORGE3_VERSION}-Linux-x86_64.sh \ + && chmod +x ~/miniforge3.sh \ + && ~/miniforge3.sh -b -p /opt/conda \ + && rm ~/miniforge3.sh + +# Install common conda packages +RUN /opt/conda/bin/conda install -y -c conda-forge \ + python=${PYTHON_VERSION} \ + cython \ + "mkl<2024.1.0" \ + mkl-include \ + parso \ + scipy \ + numpy \ + pandas \ + pyarrow \ + typing \ + h5py \ + libgcc \ + cmake \ + packaging \ + "awscli<2" \ + boto3 \ + pyyaml \ + packaging \ + conda-content-trust \ + charset-normalizer \ + requests \ + "idna>=3.7"\ + "tqdm>=4.66.3" \ + "zstandard>=0.22.0" \ + && /opt/conda/bin/conda clean -afy \ + && rm -rf /etc/apt/sources.list.d/* + +# symlink pip for OS use +RUN pip install --upgrade pip --no-cache-dir --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 + +# Install Common python packages +RUN pip install --no-cache-dir -U \ + opencv-python \ + # "nvgpu" is a dependency of TS but is disabled in SM DLC build, + # via ENV Variable "TS_DISABLE_SYSTEM_METRICS=true" in the SM section of this file. + # due to incompatibility with SM hosts + nvgpu \ + "pyopenssl>=24.0.0" \ + enum-compat==0.0.3 \ + captum \ + "Pillow>=9.0.0" \ + "cryptography>=42.0.5" \ + "ipython>=8.10.0,<9.0" \ + "urllib3>=1.26.18,<2" \ + "prompt-toolkit<3.0.39" \ + "setuptools>=70.0.0" + +# Ensure PyTorch did not get installed from Conda or pip, prior to now +# Any Nvidia installs for the DLC will be below, removing nvidia and cuda packages from pip here +# Even the GPU image would not have nvidia or cuda packages in PIP. +RUN pip uninstall -y torch torchvision torchaudio torchdata model-archiver multi-model-server + +# Install AWS-PyTorch, and other torch packages +RUN pip install --no-cache-dir -U \ + # triton required for torch inductor + triton==${TRITON_VERSION} \ + ${TORCH_URL} \ + ${TORCHVISION_URL} \ + ${TORCHAUDIO_URL} + +# Patches +# py-vuln: 71064 +RUN pip install --no-cache-dir -U "requests>=2.32.3" + +# add necessary certificate for aws sdk cpp download +RUN mkdir -p /etc/pki/tls/certs && cp /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt + +# Install MMS +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +# create user and folders +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp /opt/ml/model \ + && chown -R model-server /home/model-server /opt/ml/model + +# add MMS entrypoint +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /etc/sagemaker-mms.properties +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +# add telemetry +COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py +RUN chmod +x /usr/local/bin/deep_learning_container.py + +################################# +# Hugging Face specific section # +################################# + +# install Hugging Face libraries and its dependencies +RUN pip install --no-cache-dir \ + # hf_transfer will be a built-in feature, remove the extra then + huggingface_hub[hf_transfer]==${HUGGINGFACE_HUB_VERSION} \ + transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \ + diffusers==${DIFFUSERS_VERSION} \ + peft==${PEFT_VERSION} \ + accelerate==${ACCELERATE_VERSION} \ + "sagemaker-huggingface-inference-toolkit==2.4.1" + +# hf_transfer will be a built-in feature, remove the env variavle then +ENV HF_HUB_ENABLE_HF_TRANSFER="1" + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.3/license.txt + +## Cleanup ## +RUN pip cache purge \ + && rm -rf /tmp/tmp* \ + && rm -iRf /root/.cache + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["serve"] \ No newline at end of file diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_diffusers_model.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_diffusers_model.py index 0358fc1ea026..b0b1a59a72c1 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_diffusers_model.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_diffusers_model.py @@ -65,7 +65,7 @@ def test_diffusers_gpu_hosting( framework, _ = get_framework_and_version_from_tag(ecr_image) if "pytorch" not in framework: pytest.skip(f"Skipping test for non-pytorch image - {ecr_image}") - instance_type = instance_type or "ml.p3.2xlarge" + instance_type = instance_type or "ml.g5.4xlarge" invoke_sm_endpoint_helper_function( ecr_image=ecr_image, sagemaker_regions=sagemaker_regions, diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_ipex_inference.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_ipex_inference.py index 4d22191d3c20..61c6609324b6 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_ipex_inference.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_ipex_inference.py @@ -32,6 +32,7 @@ @pytest.mark.processor("cpu") @pytest.mark.cpu_test @pytest.mark.team("sagemaker-1p-algorithms") +@pytest.mark.skip("Skip for pytorch 2.3, since ipex installation failed.") def test_ipex_hosting(framework_version, ecr_image, instance_type, sagemaker_regions, py_version): framework, _ = get_framework_and_version_from_tag(ecr_image) if "pytorch" not in framework: diff --git a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_torch_compile.py b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_torch_compile.py index 10063bccbb07..5fd3b999d5d0 100644 --- a/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_torch_compile.py +++ b/test/sagemaker_tests/huggingface/inference/integration/sagemaker/test_torch_compile.py @@ -67,7 +67,7 @@ def test_torch_compile_gpu_hosting( pytest.skip(f"Skipping test for non-pytorch image - {ecr_image}") if Version(framework_version) < Version("2.0"): pytest.skip("Skipping torch compile tests for PT 1.X") - instance_type = instance_type or "ml.p3.2xlarge" + instance_type = instance_type or "ml.g5.4xlarge" invoke_sm_endpoint_helper_function( ecr_image=ecr_image, sagemaker_regions=sagemaker_regions, diff --git a/test/sagemaker_tests/huggingface/inference/requirements.txt b/test/sagemaker_tests/huggingface/inference/requirements.txt index 0ed2dd6ba8f4..c2676a72a6bf 100644 --- a/test/sagemaker_tests/huggingface/inference/requirements.txt +++ b/test/sagemaker_tests/huggingface/inference/requirements.txt @@ -27,5 +27,3 @@ fabric invoke gitpython toml -huggingface_hub==0.23.2 -transformers==4.28.1