-
Notifications
You must be signed in to change notification settings - Fork 87
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AWS lambda for automatic whl metadata upload (#6159)
Write an AWS lambda for extracting and uploading the metadata file for pep658 for whls. I don't think we have terraform for the pytorch AWS account so I think this is the next best bet for having this in source control Do this instead of the way in pytorch/pytorch since this will apply to all packages in the index so we can also do it for domains and other packages without code duplication
- Loading branch information
Showing
8 changed files
with
214 additions
and
0 deletions.
There are no files selected for viewing
48 changes: 48 additions & 0 deletions
48
.github/workflows/deploy_lambda_whl_metadata_upload_pep658.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
name: Deploy whl_metadata_upload_pep658 to pytorch AWS account | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- aws/lambda/whl_metadata_upload_pep658/** | ||
push: | ||
branches: | ||
- main | ||
paths: | ||
- .github/workflows/deploy_lambda_whl_metadata_upload_pep658.yml | ||
- aws/lambda/whl_metadata_upload_pep658/** | ||
|
||
defaults: | ||
run: | ||
working-directory: aws/lambda/whl_metadata_upload_pep658/ | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-22.04 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.13' | ||
cache: pip | ||
- run: pip install -r requirements.txt | ||
- run: python test_lambda_function.py | ||
|
||
deploy: | ||
needs: test | ||
runs-on: ubuntu-22.04 | ||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} | ||
permissions: | ||
id-token: write | ||
contents: read | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.13' | ||
cache: pip | ||
- name: configure aws credentials | ||
uses: aws-actions/[email protected] | ||
with: | ||
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_whl_metadata_upload_pep658 | ||
aws-region: us-east-1 | ||
- run: make deploy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
prepare: clean | ||
mkdir -p ./packages | ||
pip3 install --target ./packages -r requirements.txt | ||
cd packages && zip -r ../whl_metadata_upload_pep658.zip . | ||
zip -g whl_metadata_upload_pep658.zip lambda_function.py | ||
|
||
deploy: prepare | ||
aws lambda update-function-code --function-name whl_metadata_upload_pep658 --zip-file fileb://whl_metadata_upload_pep658.zip | ||
|
||
clean: | ||
rm -rf whl_metadata_upload_pep658.zip packages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
This lambda is used on the pytorch AWS account to upload metadata files from whl | ||
to be used in [pep658]. They are then added to the index by | ||
[s3_management/manage.py][managepy]. | ||
|
||
This account does not use terraform, so this is the source of truth for the | ||
code, and the configuration should be: | ||
* time limit: at least 30s? | ||
* ephemeral memory: at least size of the largest whl we want to upload metadata for | ||
* Triggers: | ||
* s3: put object events from pytorch bucket with suffix `.whl` | ||
|
||
### Deployment | ||
|
||
A new version of the lambda can be deployed using `make deploy`. It is also | ||
done automatically in CI in | ||
`.github/workflows/deploy_lambda_whl_metadata_upload_pep658.yml`. | ||
|
||
### Testing + Backfill | ||
|
||
Please see `test_lambda_function.py`. | ||
|
||
[pep658]: https://peps.python.org/pep-0658/ | ||
[managepy]: https://github.com/pytorch/test-infra/blob/73eea9088162354f937230cb518f19f50f557062/s3_management/manage.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import os | ||
import zipfile | ||
from functools import cache | ||
from typing import Any | ||
from urllib.parse import unquote | ||
|
||
import boto3 # type: ignore[import] | ||
from botocore import UNSIGNED | ||
from botocore.config import Config | ||
|
||
|
||
@cache | ||
def get_client(read_only: bool) -> Any: | ||
if read_only: | ||
return boto3.client("s3", config=Config(signature_version=UNSIGNED)) | ||
return boto3.client("s3") | ||
|
||
|
||
def upload_s3(bucket: str, key: str, filename: str, dry_run: bool) -> None: | ||
print(f"Uploading to {bucket}/{key}") | ||
if not dry_run: | ||
get_client(False).upload_file( | ||
filename, | ||
bucket, | ||
key, | ||
ExtraArgs={"ChecksumAlgorithm": "sha256", "ACL": "public-read"}, | ||
) | ||
|
||
|
||
def lambda_handler(event: Any, context: Any, dry_run: bool = False) -> None: | ||
zip_location = "/tmp/wheel.zip" | ||
metadata_location = "/tmp/METADATA" | ||
for record in event["Records"]: | ||
bucket = record["s3"]["bucket"]["name"] | ||
key = unquote(record["s3"]["object"]["key"]) | ||
if not key.endswith(".whl"): | ||
print(f"Skipping {bucket}/{key} as it is not a wheel") | ||
continue | ||
print(f"Processing {bucket}/{key}") | ||
|
||
if os.path.exists(zip_location): | ||
os.remove(zip_location) | ||
|
||
get_client(dry_run).download_file(bucket, key, zip_location) | ||
|
||
if os.path.exists(metadata_location): | ||
os.remove(metadata_location) | ||
|
||
with zipfile.ZipFile(zip_location, "r") as zip_ref: | ||
for filename in zip_ref.infolist(): | ||
if filename.filename.endswith(".dist-info/METADATA"): | ||
filename.filename = "METADATA" | ||
zip_ref.extract(filename, "/tmp") | ||
upload_s3(bucket, f"{key}.metadata", metadata_location, dry_run) | ||
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
boto3==1.35.96 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"Records": [ | ||
{ | ||
"s3": { | ||
"bucket": { | ||
"name": "pytorch" | ||
}, | ||
"object": { | ||
"key": "whl/cpu_pypi_pkg/torch_no_python-2.6.0.dev20240914+cpu-py3-none-any.whl" | ||
} | ||
} | ||
} | ||
] | ||
} |
60 changes: 60 additions & 0 deletions
60
aws/lambda/whl_metadata_upload_pep658/test_lambda_function.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import argparse | ||
import json | ||
import os | ||
from pathlib import Path | ||
from typing import Generator | ||
|
||
from lambda_function import get_client, lambda_handler | ||
|
||
GENERATE_EVENT_HELP_TEXT = """ | ||
Generate an test_event.json for all files in this s3 path and test the lambda | ||
function with this new test_event.json. The test_event.json does not have | ||
complete data, only known attributes that are needed for the lambda function. | ||
Format should be `<bucket>/<key prefix>`, ex `pytorch/whl/nightly`. Note that | ||
you will need more permissions to list objects in the bucket. | ||
""" | ||
|
||
|
||
def parse_args() -> argparse.Namespace: | ||
parser = argparse.ArgumentParser() | ||
# Default to dry run (not uploading) | ||
parser.add_argument("--no-dry-run", action="store_true") | ||
parser.add_argument( | ||
"--generate-event", | ||
metavar="BUCKET/KEY_PREFIX", | ||
type=str, | ||
help=GENERATE_EVENT_HELP_TEXT, | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def get_all_keys(bucket: str, key_prefix: str) -> Generator[str, None, None]: | ||
paginator = get_client(False).get_paginator("list_objects_v2") | ||
for page in paginator.paginate(Bucket=bucket, Prefix=key_prefix): | ||
for obj in page["Contents"]: | ||
if obj["Key"].endswith(".whl"): | ||
yield obj["Key"] | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
test_file = Path(__file__).parent / "test_event.json" | ||
|
||
with open(test_file) as f: | ||
event = json.load(f) | ||
if args.generate_event: | ||
bucket = args.generate_event.split("/")[0] | ||
key = args.generate_event[len(bucket) + 1 :] | ||
|
||
event["Records"] = [ | ||
{ | ||
"s3": { | ||
"bucket": {"name": bucket}, | ||
"object": {"key": key}, | ||
} | ||
} | ||
for key in get_all_keys(bucket, key) | ||
] | ||
json.dump(event, open(test_file, "w"), indent=2) | ||
|
||
lambda_handler(event, None, dry_run=not args.no_dry_run) |