diff --git a/.github/workflows/build-ami.yml b/.github/workflows/build-ami.yml new file mode 100644 index 000000000..e41c5f712 --- /dev/null +++ b/.github/workflows/build-ami.yml @@ -0,0 +1,82 @@ +# The workflow file for building the AWS Neuron AMI using Packer +# It can be triggered by push and pull request to main when changes made to infrastructure/ami folder, manually and scheduler. +name: Build AWS Neuron AMI +on: + push: + branches: + - main + paths: + - 'infrastructure/ami/**' + pull_request: + branches: + - main + paths: + - 'infrastructure/ami/**' + workflow_dispatch: + inputs: + tag: + description: 'Tag to use for the AMI build' + default: 'main' + schedule: + # Schedule the workflow to run every second day at midnight UTC + - cron: '0 0 */2 * *' + +jobs: + build-ami: + defaults: + run: + working-directory: infrastructure/ami + runs-on: ubuntu-latest + env: + AWS_REGION: us-east-1 + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + # If the workflow is triggered manually or by schedule, uses the tag, otherwise uses the current branch + ref: ${{ github.event.inputs.tag || github.ref }} + + - name: Setup Packer + uses: hashicorp/setup-packer@main + + - name: configure aws credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID_BUILD_AMI }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_KEY_SECRET_BUILD_AMI }} + aws-region: ${{ env.AWS_REGION }} + + - name: Packer format + id: format + run: packer fmt hcl2-files + continue-on-error: true + + - name: Packer Init + id: init + run: packer init hcl2-files + continue-on-error: true + + - name: Packer Validate + id: validate + # If the workflow is triggered manually or scheduled, uses the tag, otherwise uses the main branch of optimum-neuron repo for building the AMI + run: packer validate -var "optimum_version=${{ github.event.inputs.tag || github.event.repository.default_branch }}" -var "region=${{ env.AWS_REGION }}" hcl2-files + continue-on-error: true + + - name: Packer Build + id: build + # If the workflow is triggered manually or scheduled, uses the tag, otherwise uses the main branch of optimum-neuron repo for building the AMI + run: | + packer build -var "optimum_version=${{ github.event.inputs.tag || github.event.repository.default_branch }}" -var "region=${{ env.AWS_REGION }}" hcl2-files + + - name: Slack Notification on Failure + id: slack + uses: slackapi/slack-github-action@v1.25.0 + if: ${{ failure() && github.event_name == 'schedule' }} + with: + channel-id: 'C06GAEQJLNN' #copied from slack channel + payload: | + { + "text": "GitHub Action HuggingFace Neuron AMI Build result: ${{job.status}}" + } + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} diff --git a/infrastructure/ami/README.md b/infrastructure/ami/README.md new file mode 100644 index 000000000..006812b14 --- /dev/null +++ b/infrastructure/ami/README.md @@ -0,0 +1,78 @@ +# Building AMI with Packer + +This directory contains the files for building AMI using [Packer](https://github.com/hashicorp/packer) that is later published as a AWS Marketplace asset. + + +## Folder Structure + +- [hcl2-files](./hcl2-files/) - Includes different files which are used by a Packer pipeline to build an AMI. The files are: + - [build.pkr.hcl](./hcl2-files/build.pkr.hcl): contains the [build](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/blocks/build) block, defining the builders to start, provisioning them using [provisioner](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/blocks/build/provisioner), and specifying actions to take with the built artifacts using `post-process`. + - [variables.pkr.hcl](./hcl2-files/variables.pkr.hcl): contains the [variables](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/blocks/variable) block, defining variables within your Packer configuration. + - [sources.pkr.hcl](./hcl2-files/sources.pkr.hcl): contains the [source](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/blocks/source) block, defining reusable builder configuration blocks. + - [packer.pkr.hcl](./hcl2-files/packer.pkr.hcl): contains the [packer](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/blocks/packer) block, used to configure some behaviors of Packer itself, such as the minimum required Packer version needed to apply to your configuration. +- [scripts](./scripts): contains scripts used by [provisioner](https://developer.hashicorp.com/packer/docs/templates/hcl_templates/blocks/build/provisioner) for installing additonal packages/softwares. + + +### Prerequisites + - [Packer](https://developer.hashicorp.com/packer/docs/intro): Packer is an open source tool for creating identical machine images for multiple platforms from a single source configuration. + + - AWS Credentials: You need to have AWS credentials configured on your machine. You can configure AWS credentials using [AWS CLI](https://github.com/aws/aws-cli) or by setting environment variables. + + #### Install Packer on Ubuntu/Debian + ```bash + curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add - + sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main" + sudo apt-get update && sudo apt-get install packer + ``` + +You can also install Packer for other OS from [here](https://developer.hashicorp.com/packer/tutorials/docker-get-started/get-started-install-cli). + +#### Configure AWS Credentials + +Using Environment Variables: +```bash +export AWS_ACCESS_KEY_ID= +export AWS_SECRET_ACCESS_KEY= +``` + +Using AWS CLI: +```bash +aws configure sso +``` + +There are other ways to configure AWS credentials. You can read more about it [here](https://github.com/aws/aws-cli?tab=readme-ov-file#configuration). + +### Build AMI + +#### Format Packer blocks +You can format your HCL2 files locally. This command will update your files in place. + +Format a single file: +```bash +packer fmt build.pkr.hcl +``` + +Format all files in a directory: +```bash +packer fmt ./hcl2-files +``` + +#### Validate Packer blocks +You can validate the syntax and configuration of your files locally. This command will return a zero exit status on success, and a non-zero exit status on failure. + +```bash +packer validate -var 'region=us-west-2' -var 'optimum_version=v0.0.17' ./hcl2-files +``` + +#### Run Packer build +You can run Packer locally. This command will build the AMI and upload it to AWS. + +You need to set variables with no default values using `-var` flag. For example: +```bash +packer build -var 'region=us-west-2' -var 'optimum_version=v0.0.17' ./hcl2-files +``` + +To trigger a github action workflow manually, you can use GitHub CLI: +```bash +gh workflow run build-ami.yml -f tag= +``` diff --git a/infrastructure/ami/hcl2-files/build.pkr.hcl b/infrastructure/ami/hcl2-files/build.pkr.hcl new file mode 100644 index 000000000..f9327dacf --- /dev/null +++ b/infrastructure/ami/hcl2-files/build.pkr.hcl @@ -0,0 +1,29 @@ +build { + name = "build-hf-dl-neuron" + sources = [ + "source.amazon-ebs.ubuntu" + ] + provisioner "shell" { + script = "scripts/validate-neuron.sh" + } + provisioner "shell" { + script = "scripts/install-huggingface-libraries.sh" + environment_vars = [ + "TRANSFORMERS_VERSION=${var.transformers_version}", + "OPTIMUM_VERSION=${var.optimum_version}", + ] + } + provisioner "shell" { + inline = ["echo 'source /opt/aws_neuron_venv_pytorch/bin/activate' >> /home/ubuntu/.bashrc"] + } + provisioner "file" { + source = "scripts/welcome-msg.sh" + destination = "/tmp/99-custom-message" + } + provisioner "shell" { + inline = [ + "sudo mv /tmp/99-custom-message /etc/update-motd.d/", + "sudo chmod +x /etc/update-motd.d/99-custom-message", + ] + } +} \ No newline at end of file diff --git a/infrastructure/ami/hcl2-files/packer.pkr.hcl b/infrastructure/ami/hcl2-files/packer.pkr.hcl new file mode 100644 index 000000000..fa4a5e49e --- /dev/null +++ b/infrastructure/ami/hcl2-files/packer.pkr.hcl @@ -0,0 +1,8 @@ +packer { + required_plugins { + amazon = { + version = ">= 1.2.8" + source = "github.com/hashicorp/amazon" + } + } +} \ No newline at end of file diff --git a/infrastructure/ami/hcl2-files/sources.pkr.hcl b/infrastructure/ami/hcl2-files/sources.pkr.hcl new file mode 100644 index 000000000..fe3d5c22b --- /dev/null +++ b/infrastructure/ami/hcl2-files/sources.pkr.hcl @@ -0,0 +1,15 @@ +source "amazon-ebs" "ubuntu" { + ami_name = "huggingface-neuron-{{isotime \"2006-01-02T15-04-05Z\"}}" + instance_type = var.instance_type + region = var.region + source_ami = var.source_ami + ssh_username = var.ssh_username + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 512 + volume_type = "gp2" + delete_on_termination = true + } + ami_users = var.ami_users + ami_regions = var.ami_regions +} \ No newline at end of file diff --git a/infrastructure/ami/hcl2-files/variables.pkr.hcl b/infrastructure/ami/hcl2-files/variables.pkr.hcl new file mode 100644 index 000000000..57e7214a8 --- /dev/null +++ b/infrastructure/ami/hcl2-files/variables.pkr.hcl @@ -0,0 +1,54 @@ +variable "region" { + description = "The AWS region" + type = string +} + +variable "instance_type" { + default = "trn1.2xlarge" + description = "EC2 machine type for building AMI" + type = string +} + +variable "source_ami" { + default = "ami-0fbea04d7389bcd4e" + description = "Base Image" + type = string + /* + To get latest value, run the following command: + aws ec2 describe-images \ + --region us-east-1 \ + --owners amazon \ + --filters 'Name=name,Values=Deep Learning AMI Neuron PyTorch 1.13 (Ubuntu 20.04) ????????' 'Name=state,Values=available' \ + --query 'reverse(sort_by(Images, &CreationDate))[:1].ImageId' \ + --output text + */ +} + +variable "ssh_username" { + default = "ubuntu" + description = "Username to connect to SSH with" + type = string +} + +variable "optimum_version" { + description = "Optimum Neuron version to install" + type = string +} + +variable "transformers_version" { + default = "4.36.2" + description = "Transformers version to install" + type = string +} + +variable "ami_users" { + default = ["754289655784", "558105141721"] + description = "AWS accounts to share AMI with" + type = list(string) +} + +variable "ami_regions" { + default = ["eu-west-1"] + description = "AWS regions to share AMI with" + type = list(string) +} \ No newline at end of file diff --git a/infrastructure/ami/scripts/install-huggingface-libraries.sh b/infrastructure/ami/scripts/install-huggingface-libraries.sh new file mode 100644 index 000000000..b89c08822 --- /dev/null +++ b/infrastructure/ami/scripts/install-huggingface-libraries.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Activate the neuron virtual environment +source /opt/aws_neuron_venv_pytorch/bin/activate + +echo "Step: install-hugging-face-libraries" + +echo "TRANSFORMERS_VERSION: $TRANSFORMERS_VERSION" +echo "OPTIMUM_VERSION: $OPTIMUM_VERSION" + +pip install --upgrade --no-cache-dir \ + "transformers[sklearn,sentencepiece,vision]==$TRANSFORMERS_VERSION" \ + "datasets==2.16.1" \ + "accelerate==0.23.0" \ + "diffusers==0.25.0" \ + "evaluate==0.4.1" \ + "requests==2.31.0" \ + "notebook==7.0.6" \ + "markupsafe==2.1.1" \ + "jinja2==3.1.2" \ + "attrs==23.1.0" + +echo 'export PATH="${HOME}/.local/bin:$PATH"' >> "${HOME}/.bashrc" + +echo "Step: install-and-copy-optimum-neuron-examples" +git clone -b $OPTIMUM_VERSION https://github.com/huggingface/optimum-neuron.git + +cd optimum-neuron +python setup.py install +cd .. + +mkdir /home/ubuntu/huggingface-neuron-samples/ /home/ubuntu/huggingface-neuron-notebooks/ +mv optimum-neuron/examples/* /home/ubuntu/huggingface-neuron-samples/ +mv optimum-neuron/notebooks/* /home/ubuntu/huggingface-neuron-notebooks/ +rm -rf optimum-neuron +chmod -R 777 /home/ubuntu/huggingface-neuron-samples /home/ubuntu/huggingface-neuron-notebooks + +echo "Step: validate-imports-of-huggingface-libraries" +bash -c 'python -c "import transformers;import datasets;import accelerate;import evaluate;import tensorboard; import torch;"' \ No newline at end of file diff --git a/infrastructure/ami/scripts/validate-neuron.sh b/infrastructure/ami/scripts/validate-neuron.sh new file mode 100644 index 000000000..c2fdcb7de --- /dev/null +++ b/infrastructure/ami/scripts/validate-neuron.sh @@ -0,0 +1,13 @@ +#!/bin/bash +echo "Step: validate-neuron-devices" +neuron-ls + +# Activate the neuron virtual environment +source /opt/aws_neuron_venv_pytorch/bin/activate + +python -c 'import torch' +python -c 'import torch_neuronx' + +echo "Installing Tensorboard Plugin for Neuron" +pip install --upgrade --no-cache-dir \ + "tensorboard-plugin-neuronx" \ No newline at end of file diff --git a/infrastructure/ami/scripts/welcome-msg.sh b/infrastructure/ami/scripts/welcome-msg.sh new file mode 100644 index 000000000..256228200 --- /dev/null +++ b/infrastructure/ami/scripts/welcome-msg.sh @@ -0,0 +1,11 @@ +#!/bin/bash +printf "=============================================================================\n" +printf " __| __|_ )\n" +printf " _| ( / HuggingFace Deep Learning Neuron AMI (Ubuntu 20.04)\n" +printf " ___|\___|___|\n" +printf "=============================================================================\n" +printf "Welcome to the HuggingFace Deep Learning Neuron AMI (Ubuntu 20.04)\n" +printf "* Examples: /home/ubuntu/huggingface-neuron-samples \n" +printf "* Notebooks: /home/ubuntu/huggingface-neuron-notebooks \n" +printf "* Documentation: https://huggingface.co/docs/optimum-neuron/ \n" +printf "=============================================================================\n"