From 0d6667a48d66e95aec568246fa51852ff31ccd06 Mon Sep 17 00:00:00 2001 From: Micah Halter Date: Thu, 12 Sep 2024 09:47:30 -0400 Subject: [PATCH] refactor: align repo with new ETL template --- .editorconfig | 5 + .github/workflows/cape.yml | 15 +++ .github/workflows/release.yml | 14 ++ .gitignore | 222 +++++++++++++++++++++++++++---- .prettierrc.yaml | 8 ++ etl_gphl_cre_alert.py => main.py | 9 +- pyproject.toml | 9 ++ pyrightconfig.json | 4 + 8 files changed, 259 insertions(+), 27 deletions(-) create mode 100644 .editorconfig create mode 100644 .github/workflows/cape.yml create mode 100644 .github/workflows/release.yml create mode 100644 .prettierrc.yaml rename etl_gphl_cre_alert.py => main.py (96%) create mode 100644 pyproject.toml create mode 100644 pyrightconfig.json diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d9aeb5b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root = true +[*] +indent_style = space +indent_size = 4 +max_line_length = 80 diff --git a/.github/workflows/cape.yml b/.github/workflows/cape.yml new file mode 100644 index 0000000..273b936 --- /dev/null +++ b/.github/workflows/cape.yml @@ -0,0 +1,15 @@ +name: CAPE +on: + push: + branches: [main] + pull_request: + +jobs: + python: + name: Python + uses: cape-ph/.github/.github/workflows/python_checks.yml@main + with: + pytest: false + general: + name: General + uses: cape-ph/.github/.github/workflows/general_checks.yml@main diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8fc79f3 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,14 @@ +name: Release + +on: + push: + tags: ["**"] + workflow_dispatch: + +permissions: + contents: write + +jobs: + Release: + uses: cape-ph/.github/.github/workflows/release.yml@main + secrets: inherit diff --git a/.gitignore b/.gitignore index 48d199b..9175c41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,198 @@ -# OS -# =========== -.DS_Store -ehthumbs.db -Icon? -Thumbs.db +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class -# General -# ======= +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: *.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# JetBrains +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf +.idea/**/aws.xml +.idea/**/contentModel.xml +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml +.idea/**/gradle.xml +.idea/**/libraries +cmake-build-*/ +.idea/**/mongoSettings.xml +*.iws +out/ +.idea_modules/ +atlassian-ide-plugin.xml +.idea/replstate.xml +.idea/sonarlint/ +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties +.idea/httpRequests +.idea/caches/build_file_checksums.ser + +# Vagrant/Ansible/CI +.vagrant +playbook.retry +.vault +ansible.out + +# General *.dat *.out *.pid @@ -19,23 +204,12 @@ Thumbs.db logs/ build/ -# IDEs -# ==== -*.iml -*.idea -.vscode -*.code-workspace - -# Vagrant/Ansible/CI related -# ========================== -.vagrant -playbook.retry -.vault -ansible.out +# OS +.DS_Store +ehthumbs.db +Icon? +Thumbs.db # Other -# ===== sslcerts.pfx scripts/*gz -__pycache__/ -*.py[cod] diff --git a/.prettierrc.yaml b/.prettierrc.yaml new file mode 100644 index 0000000..66fff2e --- /dev/null +++ b/.prettierrc.yaml @@ -0,0 +1,8 @@ +proseWrap: always +tabWidth: 4 +useTabs: false + +overrides: + - files: "*.md" + options: + parser: "markdown" diff --git a/etl_gphl_cre_alert.py b/main.py similarity index 96% rename from etl_gphl_cre_alert.py rename to main.py index 7a13936..d8b5d07 100644 --- a/etl_gphl_cre_alert.py +++ b/main.py @@ -2,6 +2,7 @@ import io import sys +from pathlib import Path import boto3 as boto3 import dateutil.parser as dparser @@ -42,7 +43,7 @@ # NOTE: for now we'll take the alert object key and change out the file # extension for the clean data (leaving all namespacing and such). this # will probably need to change -clean_obj_key = alert_obj_key.replace(".docx", ".csv") +clean_obj_key = str(Path(alert_obj_key).with_suffix(".csv")) # NOTE: May need some creds here s3_client = boto3.client("s3") @@ -105,7 +106,9 @@ # include it in the AR query, but we still have it if we end up # needing it for anything else interim = pd.DataFrame() -interim["Mechanism (*Submitters Report)"] = data["Anti-Microbial Resistance RT-PCR"] +interim["Mechanism (*Submitters Report)"] = data[ + "Anti-Microbial Resistance RT-PCR" +] interim["Organism"] = data["Organism ID"] interim["Date Received"] = date_received interim["Date Reported"] = date_received @@ -119,7 +122,7 @@ interim["Facility of Origin"] = data["Received from"] interim["State_Lab_ID"] = data["Lab ID"] -# write out the transofrmed data +# write out the transformed data with io.StringIO() as csv_buff: interim.to_csv(csv_buff, index=False) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..470b53c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[tool.black] +line-length = 80 + +[tool.isort] +profile = "black" +line_length = 80 + +[tool.ruff] +line-length = 80 diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..c4cb691 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "autoImportCompletions": true, + "typeCheckingMode": "basic" +}