From 0ecbe0653791c51f6354e1625483132994decdc7 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 3 Jan 2025 19:33:05 -0500 Subject: [PATCH] style: update ruff --- .pre-commit-config.yaml | 2 +- pyproject.toml | 20 ++++++++++----- src/therapy/cli.py | 8 +++--- src/therapy/database/dynamodb.py | 42 +++++++++++++++++--------------- src/therapy/etl/__init__.py | 2 +- src/therapy/etl/base.py | 6 ++--- src/therapy/etl/hemonc.py | 4 +-- src/therapy/etl/merge.py | 10 +++++--- src/therapy/etl/rules.py | 6 ++++- tests/conftest.py | 6 ++--- 10 files changed, 61 insertions(+), 45 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f766878..bfa1f2b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: - id: mixed-line-ending args: [ --fix=lf ] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.0 # ruff version + rev: v0.8.4 # ruff version hooks: - id: ruff-format - id: ruff diff --git a/pyproject.toml b/pyproject.toml index 2a3d08e7..3f1cb576 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ etl = [ "pyyaml" ] test = ["pytest", "pytest-cov", "pytest-mock", "isodate"] -dev = ["pre-commit>=3.7.1", "ruff==0.5.0", "lxml", "xmlformatter", "types-pyyaml"] +dev = ["pre-commit>=3.7.1", "ruff==0.8.4", "lxml", "xmlformatter", "types-pyyaml"] [project.urls] Homepage = "https://github.com/cancervariants/therapy-normalization" @@ -79,6 +79,7 @@ branch = true [tool.ruff] src = ["src"] +exclude = ["analysis"] [tool.ruff.lint] select = [ @@ -107,10 +108,14 @@ select = [ "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret "SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf + "SLOT", # https://docs.astral.sh/ruff/rules/#flake8-slots-slot "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "PLC", # https://docs.astral.sh/ruff/rules/#convention-c + "PLE", # https://docs.astral.sh/ruff/rules/#error-e_1 + "TRY", # https://docs.astral.sh/ruff/rules/#tryceratops-try "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf "FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf @@ -129,13 +134,14 @@ fixable = [ "PT", "RSE", "SIM", + "PLC", + "PLE", + "TRY", "PERF", "FURB", "RUF" ] # ANN003 - missing-type-kwargs -# ANN101 - missing-type-self -# ANN102 - missing-type-cls # D203 - one-blank-line-before-class # D205 - blank-line-after-summary # D206 - indent-with-spaces* @@ -150,20 +156,21 @@ fixable = [ # PGH003 - blanket-type-ignore # W191 - tab-indentation* # S321 - suspicious-ftp-lib-usage +# PLC0206 - dict-index-missing-items # *ignored for compatibility with formatter ignore = [ - "ANN003", "ANN101", "ANN102", + "ANN003", "D203", "D205", "D206", "D213", "D300", "D400", "D415", "E111", "E114", "E117", "E501", "PGH003", "W191", "S321", + "PLC0206", ] [tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type -# ANN102 - missing-type-cls # D100 - undocumented-public-module # D102 - undocumented-public-class # S101 - assert @@ -173,10 +180,10 @@ ignore = [ # INP001 - implicit-namespace-package # SLF001 - private-member-access # PERF401 - manual-list-comprehension +# S608 - hardcoded-sql-expression "tests/*" = [ "ANN001", "ANN2", - "ANN102", "D100", "D102", "S101", @@ -186,6 +193,7 @@ ignore = [ "PERF401" ] "tests/unit/test_emit_warnings.py" = ["RUF001"] +"tests/scripts/build_chembl_data.py" = ["S608"] "src/therapy/schemas.py" = ["ANN001", "ANN201", "N805"] [tool.ruff.lint.flake8-annotations] diff --git a/src/therapy/cli.py b/src/therapy/cli.py index 148044c6..809e3a7d 100644 --- a/src/therapy/cli.py +++ b/src/therapy/cli.py @@ -103,13 +103,13 @@ def _load_source( f"Encountered ModuleNotFoundError attempting to import {e.name}. Are ETL dependencies installed?" ) click.get_current_context().exit() - SourceClass = eval(name.value) # noqa: N806 PGH001 S307 + SourceClass = eval(name.value) # noqa: N806, S307 source = SourceClass(database=db, silent=False) try: processed_ids += source.perform_etl(use_existing) except EtlError as e: - _logger.error(e) + _logger.exception("Encountered ETL error while loading source %s", name) click.echo(f"Encountered error while loading {name}: {e}.") click.get_current_context().exit() end_load = timer() @@ -260,13 +260,13 @@ def update_normalizer_db( if len(sources_split) == 0: msg = "Must enter 1 or more source names to update" - raise Exception(msg) + raise ValueError(msg) non_sources = set(sources_split) - set(SOURCES) if len(non_sources) != 0: msg = f"Not valid source(s): {non_sources}" - raise Exception(msg) + raise ValueError(msg) parsed_source_names = {SourceName(SOURCES[s]) for s in sources_split} if ( diff --git a/src/therapy/database/dynamodb.py b/src/therapy/database/dynamodb.py index 64db5869..114c3d91 100644 --- a/src/therapy/database/dynamodb.py +++ b/src/therapy/database/dynamodb.py @@ -110,8 +110,8 @@ def drop_db(self) -> None: try: if not self._check_delete_okay(): return - except DatabaseWriteError as e: - raise e + except DatabaseWriteError: # noqa: TRY203 + raise if self.therapy_table in self.list_tables(): self.dynamodb.Table(self.therapy_table).delete() @@ -239,23 +239,21 @@ def get_record_by_id( otherwise. :return: complete therapy record, if match is found; None otherwise """ + if merge: + pk = f"{concept_id.lower()}##{RecordType.MERGER.value}" + else: + pk = f"{concept_id.lower()}##{RecordType.IDENTITY.value}" try: - if merge: - pk = f"{concept_id.lower()}##{RecordType.MERGER.value}" - else: - pk = f"{concept_id.lower()}##{RecordType.IDENTITY.value}" if case_sensitive: match = self.therapies.get_item( Key={"label_and_type": pk, "concept_id": concept_id} ) return match["Item"] - exp = Key("label_and_type").eq(pk) - response = self.therapies.query(KeyConditionExpression=exp) - record = response["Items"][0] - del record["label_and_type"] - return record + response = self.therapies.query( + KeyConditionExpression=Key("label_and_type").eq(pk) + ) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on get_records_by_id for search term %s: %s", concept_id, e.response["Error"]["Message"], @@ -263,6 +261,10 @@ def get_record_by_id( return None except (KeyError, IndexError): # record doesn't exist return None + else: + record = response["Items"][0] + del record["label_and_type"] + return record def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]: """Retrieve concept IDs for records matching the user's query. Other methods @@ -278,7 +280,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]: matches = self.therapies.query(KeyConditionExpression=filter_exp) return [m["concept_id"] for m in matches.get("Items", None)] except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on get_refs_by_type for search term %s: %s", search_term, e.response["Error"]["Message"], @@ -296,8 +298,8 @@ def get_rxnorm_id_by_brand(self, brand_id: str) -> str | None: try: matches = self.therapies.query(KeyConditionExpression=filter_exp) except ClientError as e: - _logger.error( - "boto3 client error on rx_brand fetch for brand ID {brand_id}: {e.response['Error']['Message']}", + _logger.exception( + "boto3 client error on rx_brand fetch for brand ID %s: %s", brand_id, e.response["Error"]["Message"], ) @@ -428,7 +430,7 @@ def add_rxnorm_brand(self, brand_id: str, record_id: str) -> None: try: self.batch.put_item(Item=item) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on add_rxnorm_brand for %s -> %s: %s", brand_id, record_id, @@ -449,7 +451,7 @@ def add_record(self, record: dict, src_name: SourceName) -> None: try: self.batch.put_item(Item=record) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on add_record for %s: %s", concept_id, e.response["Error"]["Message"], @@ -482,7 +484,7 @@ def add_merged_record(self, record: dict) -> None: try: self.batch.put_item(Item=record) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on add_record for %s: %s", concept_id, e.response["Error"]["Message"], @@ -509,7 +511,7 @@ def _add_ref_record( try: self.batch.put_item(Item=record) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error adding reference %s for %s with match type %s: %s", term, concept_id, @@ -541,7 +543,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: str) -> None: if code == "ConditionalCheckFailedException": msg = f"No such record exists for keys {label_and_type}, {concept_id}" raise DatabaseWriteError(msg) from e - _logger.error( + _logger.exception( "boto3 client error in `database.update_record()`: %s", e.response["Error"]["Message"], ) diff --git a/src/therapy/etl/__init__.py b/src/therapy/etl/__init__.py index 3678ad3b..39735050 100644 --- a/src/therapy/etl/__init__.py +++ b/src/therapy/etl/__init__.py @@ -13,11 +13,11 @@ from .wikidata import Wikidata __all__ = [ - "EtlError", "ChEMBL", "ChemIDplus", "DrugBank", "DrugsAtFDA", + "EtlError", "GuideToPHARMACOLOGY", "HemOnc", "Merge", diff --git a/src/therapy/etl/base.py b/src/therapy/etl/base.py index 70d2c9df..8fbd34a5 100644 --- a/src/therapy/etl/base.py +++ b/src/therapy/etl/base.py @@ -244,9 +244,9 @@ def _load_therapy(self, therapy: dict) -> None: """ try: Therapy(**therapy) - except ValidationError as e: - _logger.error("Attempted to load invalid therapy: %s", therapy) - raise e + except ValidationError: + _logger.exception("Attempted to load invalid therapy: %s", therapy) + raise therapy = self._rules.apply_rules_to_therapy(therapy) therapy = self._process_searchable_attributes(therapy) diff --git a/src/therapy/etl/hemonc.py b/src/therapy/etl/hemonc.py index a8474dd4..5f16e250 100644 --- a/src/therapy/etl/hemonc.py +++ b/src/therapy/etl/hemonc.py @@ -147,7 +147,7 @@ def _get_rels(self, therapies: dict, brand_names: dict, conditions: dict) -> dic try: year = self._id_to_yr(row[1]) except TypeError: - _logger.error( + _logger.exception( "Failed parse of FDA approval year ID %s for HemOnc ID %s", row[1], row[0], @@ -168,7 +168,7 @@ def _get_rels(self, therapies: dict, brand_names: dict, conditions: dict) -> dic label = conditions[row[1]] except KeyError: # concept is deprecated or otherwise unavailable - _logger.error( + _logger.exception( "Unable to process relation with indication %s -- deprecated?", row[0], ) diff --git a/src/therapy/etl/merge.py b/src/therapy/etl/merge.py index af79681f..834743e7 100644 --- a/src/therapy/etl/merge.py +++ b/src/therapy/etl/merge.py @@ -75,15 +75,17 @@ def create_merged_concepts(self, record_ids: set[str]) -> None: merge_ref = merged_record["concept_id"] try: self.database.update_merge_ref(concept_id, merge_ref) - except DatabaseWriteError as dw: - if str(dw).startswith("No such record exists"): - logger.error( + except DatabaseWriteError as e: + if str(e).startswith("No such record exists"): + logger.exception( "Updating nonexistent record: %s for merge ref to %s", concept_id, merge_ref, ) else: - logger.error(str(dw)) + logger.exception( + "Unrecognized database write error encountered" + ) uploaded_ids |= group self.database.complete_write_transaction() logger.info("Merged concept generation successful.") diff --git a/src/therapy/etl/rules.py b/src/therapy/etl/rules.py index 28d068ef..d486bf5d 100644 --- a/src/therapy/etl/rules.py +++ b/src/therapy/etl/rules.py @@ -23,6 +23,7 @@ class Rules: def __init__(self, source_name: SourceName) -> None: """Initialize rules class. + :param source_name: name of source to use, for filtering unneeded rules """ rules_path = APP_ROOT / "etl" / "rules.csv" @@ -40,6 +41,7 @@ def __init__(self, source_name: SourceName) -> None: def apply_rules_to_therapy(self, therapy: dict) -> dict: """Apply all rules to therapy. First find relevant rules, then call the apply method. + :param therapy: therapy object from ETL base :return: processed therapy object """ @@ -52,14 +54,16 @@ def _apply_rule_to_field( self, therapy: dict, field: str, value: str | list | dict | int | float ) -> dict: """Given a (field, value) rule, apply it to the given therapy object. + :param therapy: therapy object ready to load to DB :param field: name of object property field to check :param value: value to remove from field, if possible :return: therapy object with rule applied + :raise NotImplementedError: if unsupported field attempted """ if field not in {"aliases", "trade_names", "xrefs", "associated_with"}: msg = "Non-scalar fields currently not implemented" - raise Exception(msg) + raise NotImplementedError(msg) field_data = set(therapy.get(field, [])) if value in field_data: field_data.remove(value) diff --git a/tests/conftest.py b/tests/conftest.py index 46cb7bd5..9db93b1d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -216,13 +216,13 @@ def _compare_response( """ if fixture and fixture_list: msg = "Args provided for both `fixture` and `fixture_list`" - raise Exception(msg) + pytest.fail(msg) if not fixture and not fixture_list: msg = "Must pass 1 of {fixture, fixture_list}" - raise Exception(msg) + pytest.fail(msg) if fixture and num_records: msg = "`num_records` should only be given with " "`fixture_list`." - raise Exception(msg) + pytest.fail(msg) assert response.match_type == match_type if fixture: