diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..60db58b --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,150 @@ +name: Build + +on: + push: + +jobs: + package: + name: Python Package + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + cache: pip + cache-dependency-path: | + **/pyproject.toml + **/requirements*.txt + + - name: Prepare Python env + run: | + python -m pip install -U pip setuptools wheel + + - name: Create build info + run: | + bash scripts/build-info.sh + + - name: Install dependencies + run: | + pip install -r requirements.txt + + - name: Install package + run: | + pip install . + + - name: Build package sdist + run: | + python setup.py sdist + + - name: Build package bdist (wheel) + run: | + python setup.py bdist_wheel + + + docker: + name: Docker + runs-on: ubuntu-latest + + env: + PUBLIC_IMAGE_PREFIX: 'datastewardshipwizard' + DOCKER_IMAGE_NAME: 'smp-submission-service' + DOCKER_META_CONTEXT: '.' + DOCKER_META_FILE: 'Dockerfile' + DOCKER_META_PLATFORMS: 'linux/amd64,linux/arm64' + + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + + - name: Create build info + run: | + bash scripts/build-info.sh + + # TEST DOCKER IMAGE BUILD + - name: Docker meta [test] + id: meta-test + uses: docker/metadata-action@v4 + with: + images: | + ${{ env.PUBLIC_IMAGE_PREFIX }}/${{ env.DOCKER_IMAGE_NAME }} + tags: | + type=sha + + - name: Docker build [test] + uses: docker/build-push-action@v4 + with: + context: ${{ env.DOCKER_META_CONTEXT }} + file: ${{ env.DOCKER_META_FILE }} + platforms: ${{ env.DOCKER_META_PLATFORMS }} + push: false + tags: ${{ steps.meta-test.outputs.tags }} + labels: ${{ steps.meta-test.outputs.labels }} + + # PREPARE + - name: Docker login [docker.io] + if: github.event_name != 'pull_request' + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_HUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_PASSWORD }} + + # DEVELOPMENT IMAGES + - name: Docker meta [dev] + id: meta-dev + if: github.event_name != 'pull_request' + uses: docker/metadata-action@v4 + with: + images: | + ${{ secrets.DOCKER_HUB_USERNAME }}/${{ env.DOCKER_IMAGE_NAME }} + tags: | + type=ref,event=branch + + - name: Docker build+push [dev] + uses: docker/build-push-action@v4 + if: github.event_name != 'pull_request' && steps.meta-dev.outputs.tags != '' + with: + context: ${{ env.DOCKER_META_CONTEXT }} + file: ${{ env.DOCKER_META_FILE }} + platforms: ${{ env.DOCKER_META_PLATFORMS }} + push: true + tags: ${{ steps.meta-dev.outputs.tags }} + labels: ${{ steps.meta-dev.outputs.labels }} + + # PUBLIC IMAGES + - name: Docker meta [public] + id: meta-public + if: github.event_name != 'pull_request' + uses: docker/metadata-action@v4 + with: + images: | + ${{ env.PUBLIC_IMAGE_PREFIX }}/${{ env.DOCKER_IMAGE_NAME }} + tags: | + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}},enable=${{ !startsWith(github.ref, 'refs/tags/v0.') }} + + - name: Docker build+push [public] + uses: docker/build-push-action@v4 + if: github.event_name != 'pull_request' && steps.meta-public.outputs.tags != '' + with: + context: ${{ env.DOCKER_META_CONTEXT }} + file: ${{ env.DOCKER_META_FILE }} + platforms: ${{ env.DOCKER_META_PLATFORMS }} + push: true + tags: ${{ steps.meta-public.outputs.tags }} + labels: ${{ steps.meta-public.outputs.labels }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8f818ab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +FROM datastewardshipwizard/python-base:3.11-alpine as builder + +WORKDIR /app + +COPY . /app + +RUN python -m pip wheel --no-cache-dir --wheel-dir=/app/wheels -r /app/requirements.txt \ + && python -m pip wheel --no-cache-dir --no-deps --wheel-dir=/app/wheels /app + + +FROM datastewardshipwizard/python-base:3.11-alpine + +ENV PATH "/home/user/.local/bin:$PATH" + +# Setup non-root user +USER user + +# Prepare dirs +WORKDIR /home/user +RUN mkdir -p /home/user/data + +RUN pip install uvicorn + +# Install Python packages +COPY --from=builder --chown=user:user /app/wheels /home/user/wheels +RUN python -m pip install --user --no-cache --no-index /home/user/wheels/* \ + && rm -rf /home/user/wheels + +# Run +CMD ["uvicorn", "smp_submitter:app", "--proxy-headers", "--forwarded-allow-ips=*", "--host", "0.0.0.0", "--port", "8000"] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1c20f63 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +requires = ['setuptools'] +build-backend = 'setuptools.build_meta' + +[project] +name = 'smp-submission-service' +version = '0.1.0' +description = 'Submission service of maSMPs for Software Management Wizard' +readme = 'README.md' +keywords = ['dsw', 'smp', 'masmp', 'import', 'mapping'] +license = { text = 'Apache License 2.0' } +authors = [ + { name = 'Marek Suchánek', email = 'marek.suchanek@ds-wizard.org' } +] +classifiers = [ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Topic :: Text Processing', + 'Topic :: Utilities', +] +requires-python = '>=3.10, <4' +dependencies = [ + 'fastapi', + 'httpx', + 'PyYAML', + 'rdflib', +] + +[project.urls] +Homepage = 'https://smw.ds-wizard.org' +Repository = 'https://github.com/ds-wizard/smp-submission-service' + +[tool.setuptools] +zip-safe = false + +[tool.setuptools.packages.find] +namespaces = true +where = ['src'] + +[tool.setuptools.package-data] +'*' = ['*.css', '*.js', '*.j2', '*.png'] + +[tool.distutils.bdist_wheel] +universal = true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5a3dc40 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +annotated-types==0.6.0 +anyio==3.7.1 +certifi==2023.7.22 +fastapi==0.104.1 +h11==0.14.0 +httpcore==0.18.0 +httpx==0.25.0 +idna==3.4 +isodate==0.6.1 +pydantic==2.4.2 +pydantic_core==2.10.1 +pyparsing==3.1.1 +PyYAML==6.0.1 +rdflib==7.0.0 +six==1.16.0 +sniffio==1.3.0 +starlette==0.27.0 +typing_extensions==4.8.0 diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..6ade154 --- /dev/null +++ b/run.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +export $(grep -v '^#' .env | xargs) + +uvicorn smp_submitter:app --reload diff --git a/scripts/build-info.sh b/scripts/build-info.sh new file mode 100644 index 0000000..21a18a5 --- /dev/null +++ b/scripts/build-info.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +# File with build info +BUILD_INFO_FILE=src/smp_submitter/consts.py + +# Create version based on git tag or branch +branch=$(git rev-parse --abbrev-ref HEAD) +commit=$(git rev-parse --short HEAD) +version="$branch~$commit" +gittag=$(git tag -l --contains HEAD | head -n 1) +if test -n "$gittag" +then + version="$gittag~$commit" +fi + +# Get build timestamp +builtAt=$(date +"%Y-%m-%d %TZ") + +cat $BUILD_INFO_FILE +# Replace values +sed -i.bak "s#--BUILT_AT--#$version#" $BUILD_INFO_FILE && rm $BUILD_INFO_FILE".bak" +sed -i.bak "s#--VERSION--#$builtAt#" $BUILD_INFO_FILE && rm $BUILD_INFO_FILE".bak" + +cat $BUILD_INFO_FILE diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b908cbe --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +import setuptools + +setuptools.setup() diff --git a/src/smp_submitter/__init__.py b/src/smp_submitter/__init__.py new file mode 100644 index 0000000..8a1c803 --- /dev/null +++ b/src/smp_submitter/__init__.py @@ -0,0 +1,3 @@ +from .app import app + +__all__ = ['app'] diff --git a/src/smp_submitter/app.py b/src/smp_submitter/app.py new file mode 100644 index 0000000..6b0d049 --- /dev/null +++ b/src/smp_submitter/app.py @@ -0,0 +1,90 @@ +import logging + +import fastapi +import fastapi.responses + +from typing import Tuple + +from .config import Config +from .consts import NICE_NAME, VERSION, BUILD_INFO, DEFAULT_ENCODING +from .logic import process + + +LOG = logging.getLogger(__name__) + + +app = fastapi.FastAPI( + title=NICE_NAME, + version=VERSION, +) + + +def _valid_token(request: fastapi.Request) -> bool: + if Config.API_TOKEN is None: + LOG.debug('Security disabled, authorized directly') + return True + auth = request.headers.get('Authorization', '') # type: str + if not auth.startswith('Bearer '): + LOG.debug('Invalid token (missing or without "Bearer " prefix') + return False + token = auth.split(' ', maxsplit=1)[1] + return token == Config.API_TOKEN + + +def _extract_content_type(header: str) -> Tuple[str, str]: + type_headers = header.lower().split(';') + input_format = type_headers[0] + if len(type_headers) == 0: + return input_format, DEFAULT_ENCODING + encoding_header = type_headers[0].strip() + if encoding_header.startswith('charset='): + return input_format, encoding_header[9:] + return input_format, DEFAULT_ENCODING + + +@app.get('/', response_class=fastapi.responses.HTMLResponse) +async def get_index(request: fastapi.Request): + return fastapi.responses.JSONResponse(content=BUILD_INFO) + + +@app.post('/submit', response_class=fastapi.responses.JSONResponse) +async def post_submit(request: fastapi.Request): + # (1) Verify authorization + if not _valid_token(request=request): + return fastapi.responses.PlainTextResponse( + status_code=fastapi.status.HTTP_401_UNAUTHORIZED, + content='Unauthorized submission request.\n\n' + 'The submission service is not configured properly.\n' + ) + # (2) Get data + content_type, encoding = _extract_content_type( + header=request.headers.get('Content-Type', ''), + ) + content = await request.body() + content = content.decode(DEFAULT_ENCODING) + # (3) Return response + try: + pr_link = await process( + content=content, + content_type=content_type, + ) + + return fastapi.responses.JSONResponse( + headers={ + 'Location': pr_link, + }, + status_code=fastapi.status.HTTP_201_CREATED, + content={ + 'message': 'Notification sent successfully!', + } + ) + except Exception as e: + return fastapi.responses.PlainTextResponse( + status_code=fastapi.status.HTTP_400_BAD_REQUEST, + content=str(e), + ) + + +@app.on_event("startup") +async def app_init(): + Config.check() diff --git a/src/smp_submitter/config.py b/src/smp_submitter/config.py new file mode 100644 index 0000000..dff3a15 --- /dev/null +++ b/src/smp_submitter/config.py @@ -0,0 +1,29 @@ +import logging +import os +import sys + +from .consts import DEFAULT_LOG_LEVEL, DEFAULT_LOG_FORMAT + + +LOG = logging.getLogger(__name__) + + +class Config: + GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', None) + GITHUB_NAME = os.environ.get('GITHUB_NAME', None) + GITHUB_EMAIL = os.environ.get('GITHUB_EMAIL', None) + API_TOKEN = os.environ.get('API_TOKEN', None) + LOG_LEVEL = os.environ.get('LOG_LEVEL', DEFAULT_LOG_LEVEL) + LOG_FORMAT = os.environ.get('LOG_FORMAT', DEFAULT_LOG_FORMAT) + + @classmethod + def check(cls): + if cls.GITHUB_TOKEN is None: + print('GITHUB_TOKEN env variable is missing!') + sys.exit(1) + if cls.GITHUB_NAME is None: + print('GITHUB_NAME env variable is missing!') + sys.exit(1) + if cls.GITHUB_EMAIL is None: + print('GITHUB_EMAIL env variable is missing!') + sys.exit(1) diff --git a/src/smp_submitter/consts.py b/src/smp_submitter/consts.py new file mode 100644 index 0000000..594220c --- /dev/null +++ b/src/smp_submitter/consts.py @@ -0,0 +1,21 @@ +PACKAGE_NAME = 'smp_submitter' +NICE_NAME = 'DSW SMP Submission Service' +PACKAGE_VERSION = '0.1.0' +LOGGER_NAME = 'DSW_SUBMITTER' + +_DEFAULT_BUILT_AT = 'BUILT_AT' +BUILT_AT = '--BUILT_AT--' +_DEFAULT_VERSION = 'VERSION' +VERSION = '--VERSION--' + +DEFAULT_ENCODING = 'utf-8' +DEFAULT_LOG_LEVEL = 'INFO' +DEFAULT_LOG_FORMAT = '%(asctime)s | %(levelname)s | %(module)s: %(message)s' + + +BUILD_INFO = { + 'name': NICE_NAME, + 'packageVersion': PACKAGE_VERSION, + 'version': VERSION if VERSION != f'--{_DEFAULT_VERSION}--' else 'unknown', + 'builtAt': BUILT_AT if BUILT_AT != f'--{_DEFAULT_BUILT_AT}--' else 'unknown', +} diff --git a/src/smp_submitter/logic.py b/src/smp_submitter/logic.py new file mode 100644 index 0000000..99f028a --- /dev/null +++ b/src/smp_submitter/logic.py @@ -0,0 +1,114 @@ +import asyncio +import base64 +import datetime + +import httpx +import rdflib + + +from .config import Config + +GH_API = 'https://api.github.com' + + +async def process(content: str, content_type: str) -> str: + repo_name = get_github_reponame(content, content_type) + try: + return await create_fork_pr(repo_name, content) + except Exception as e: + raise RuntimeError(f'Failed to create a fork and submit PR:\n\n{str(e)}') + + +async def create_fork_pr(repo_name: str, content: str) -> str: + headers = { + 'Accept': 'application/vnd.github+json', + 'Authorization': f'Bearer {Config.GITHUB_TOKEN}', + 'X-GitHub-Api-Version': '2022-11-28', + } + async with httpx.AsyncClient(headers=headers) as client: + # create a fork with branch based on UUID + timestamp = datetime.datetime.now().strftime(f'%Y%m%d-%H%M%S') + fork_name = f'{repo_name.replace("/", "-")}-{timestamp}' + r = await client.post( + url=f'{GH_API}/repos/{repo_name}/forks', + json={ + 'name': fork_name, + 'default_branch_only': True, + }, + ) + r.raise_for_status() + fork_repo_name = r.json()['full_name'] + username = r.json()['owner']['login'] + branch = r.json()['default_branch'] + + # wait till fork is created (async) + await asyncio.sleep(2) + while True: + r = await client.get( + url=f'{GH_API}/repos/{fork_repo_name}', + ) + if r.status_code != 404: + r.raise_for_status() + break + await asyncio.sleep(5) + + # submit the file there via GitHub API + r = await client.put( + url=f'/repos/{fork_repo_name}/contents/metadata.json', + json={ + 'content': base64.b64encode(content.encode('utf-8')), + 'name': Config.GITHUB_NAME, + 'email': Config.GITHUB_EMAIL, + 'message': 'Update metadata from maSMP', + }, + ) + r.raise_for_status() + + # create a PR via GitHub API + r = await client.post( + url=f'/repos/{repo_name}/pulls', + json={ + 'title': 'Update metadata from maSMP', + 'body': 'Hey! This metadata has been submitted from the Software Management Wizard via maSMP.', + 'head': f'{username}:{branch}', + 'head_repo': fork_repo_name, + 'base': branch, + 'maintainer_can_modify': True, + } + ) + r.raise_for_status() + return r.json()['html_url'] + + +def get_github_reponame(content: str, content_type: str) -> str: + g = create_rdf_graph(content, content_type) + repos = g.objects( + predicate=rdflib.URIRef('https://schema.org/codeRepository'), + unique=True, + ) + for repo in repos: + repo = str(repo) + if not repo.startswith('https://github.com/'): + continue + repo = repo[19:] + if repo.count('/') != 1: + continue + return repo + raise RuntimeError('No valid GitHub repo found as schema:codeRepository') + + +def create_rdf_graph(content: str, content_type: str) -> rdflib.Graph: + context = { + '@context': { + 'schema': 'https://schema.org/', + } + } + rdf_format = None + if content_type == 'application/ld+json': + rdf_format = 'json-ld' + else: + rdf_format = 'json-ld' + + g = rdflib.Graph() + g.parse(data=content, format=rdf_format, context=context) + return g