Skip to content

Commit

Permalink
Merge branch 'main' into ruff-update
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 4, 2025
2 parents 0ecbe06 + 3783333 commit 3e798d0
Show file tree
Hide file tree
Showing 9 changed files with 1,019 additions and 790 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:

- name: Install dependencies
run: |
python3 -m pip install ".[etl,test]"
python3 -m pip install ".[etl,tests]"
- name: Build local DynamoDB
run: |
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

## Installation

Install from [PyPI](https://pypi.org/projects/thera-py):
Install from [PyPI](https://pypi.org/project/thera-py):

```shell
python3 -m pip install thera-py
Expand Down Expand Up @@ -158,7 +158,7 @@ source venv/bin/activate
Install development dependencies and `pre-commit`:

```shell
python3 -m pip install -e '.[dev,test]'
python3 -m pip install -e '.[dev,tests]'
pre-commit install
```

Expand Down
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,23 @@ dependencies = [
"uvicorn",
"click",
"boto3",
"ga4gh.vrs~=2.0.0a8",
"ga4gh.vrs==2.0.0a13",
"disease-normalizer~=0.7.0",
]
dynamic = ["version"]

[project.optional-dependencies]
etl = [
"disease-normalizer[etl]~=0.5.0",
"disease-normalizer[etl]~=0.7.0",
"owlready2",
"rdflib",
"wikibaseintegrator>=0.12.0",
"wags-tails~=0.2.0",
"wags-tails~=0.2.2",
"tqdm",
"rich",
"pyyaml"
]
test = ["pytest", "pytest-cov", "pytest-mock", "isodate"]
tests = ["pytest", "pytest-cov", "pytest-mock", "isodate"]
dev = ["pre-commit>=3.7.1", "ruff==0.8.4", "lxml", "xmlformatter", "types-pyyaml"]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion src/therapy/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _normalize_disease(self, query: str) -> str | None:
if term in self._disease_cache:
return self._disease_cache[term]
response = self.disease_normalizer.normalize(term)
normalized_id = response.normalized_id
normalized_id = response.disease.primaryCode.root if response.disease else None
self._disease_cache[term] = normalized_id
if normalized_id is None:
_logger.warning("Failed to normalize disease term: %s", query)
Expand Down
4 changes: 2 additions & 2 deletions src/therapy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def custom_openapi() -> dict:
"Return merged strongest-match concept for query string " "provided by user."
)
merged_matches_summary = (
"Given query, provide merged normalized record as a " "Therapeutic Agent."
"Given query, provide merged normalized record as a Therapy Mappable Concept."
)
merged_response_descr = "A response to a validly-formed query."
normalize_q_descr = "Therapy to normalize."
Expand Down Expand Up @@ -148,7 +148,7 @@ def normalize(
:param q: therapy search term
:param bool infer_namespace: if True, try to infer namespace from query term.
:returns: JSON response with matching normalized record provided as a
Therapeutic Agent, and source metadata
Therapy Mappable Concept, and source metadata
"""
try:
response = query_handler.normalize(html.unescape(q), infer_namespace)
Expand Down
145 changes: 97 additions & 48 deletions src/therapy/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,23 @@
from typing import Any, TypeVar

from botocore.exceptions import ClientError
from ga4gh.core import domain_models, entity_models
from disease.schemas import NAMESPACE_TO_SYSTEM_URI as DISEASE_NAMESPACE_TO_SYSTEM_URI
from disease.schemas import NamespacePrefix as DiseaseNamespacePrefix
from ga4gh.core.models import (
Coding,
ConceptMapping,
Extension,
MappableConcept,
Relation,
code,
)
from uvicorn.config import logger

from therapy import NAMESPACE_LUIS, PREFIX_LOOKUP, SOURCES
from therapy.database import AbstractDatabase
from therapy.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
BaseNormalizationService,
HasIndication,
MatchesNormalized,
Expand Down Expand Up @@ -350,20 +361,17 @@ def _add_merged_meta(self, response: NormalizationService) -> NormalizationServi
:return: completed response object.
"""
sources_meta = {}
therapeutic_agent = response.therapeutic_agent
sources = [response.normalized_id.split(":")[0]] # type: ignore[union-attr]
if therapeutic_agent.mappings: # type: ignore[union-attr]
sources += [m.coding.system for m in therapeutic_agent.mappings] # type: ignore[union-attr]
therapy = response.therapy

sources = []
for m in therapy.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

for src in sources:
try:
src_name = SourceName(PREFIX_LOOKUP[src])
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
sources_meta[src_name] = self.db.get_source_metadata(src_name)
if src not in sources_meta:
sources_meta[src] = self.db.get_source_metadata(src)
response.source_meta_ = sources_meta # type: ignore[assignment]
return response

Expand All @@ -377,42 +385,88 @@ def _record_order(self, record: dict) -> tuple[int, str]:
source_rank = SourcePriority[src]
return source_rank, record["concept_id"]

def _add_therapeutic_agent(
def _add_therapy(
self,
response: NormalizationService,
record: dict,
match_type: MatchType,
) -> NormalizationService:
"""Format received DB record as therapeutic agent and update response object.
"""Format received DB record as Mappable Concept and update response object.
:param NormalizationService response: in-progress response object
:param Dict record: record as stored in DB
:param str query: query string from user request
:param MatchType match_type: type of match achieved
:return: completed response object ready to return to user
"""
therapeutic_agent_obj = domain_models.TherapeuticAgent(
id=f"normalize.therapy.{record['concept_id']}", label=record.get("label")

def _create_concept_mapping(
concept_id: str,
relation: Relation,
ns_to_system_uri: dict[str, str],
ns_prefix: NamespacePrefix | DiseaseNamespacePrefix,
) -> ConceptMapping:
"""Create concept mapping for therapy or disease identifier
``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
namespace prefix, in that order of preference, if available.
:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:param ns_to_system_uri: Dictionary containing mapping from namespace to
system URI
:param ns_prefix: Namespace prefix enum
:return: Concept mapping for therapy or disease identifier
"""
source = concept_id.split(":")[0]

try:
source = ns_prefix(source)
except ValueError:
try:
source = ns_prefix(source.upper())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = ns_to_system_uri.get(source, source)

return ConceptMapping(
coding=Coding(code=code(concept_id), system=system), relation=relation
)

therapy_obj = MappableConcept(
id=f"normalize.therapy.{record['concept_id']}",
primaryCode=code(root=record["concept_id"]),
conceptType="Therapy",
label=record.get("label"),
)

# mappings
mappings = [
_create_concept_mapping(
concept_id=record["concept_id"],
relation=Relation.EXACT_MATCH,
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
ns_prefix=NamespacePrefix,
)
]
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, code = source_id.split(":")
mappings.append(
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
)
mappings.extend(
_create_concept_mapping(
concept_id=source_id,
relation=Relation.RELATED_MATCH,
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
ns_prefix=NamespacePrefix,
)
for source_id in source_ids
)
if mappings:
therapeutic_agent_obj.mappings = mappings
therapy_obj.mappings = mappings

extensions = []
if "aliases" in record:
therapeutic_agent_obj.alternativeLabels = record["aliases"]
extensions.append(Extension(name="aliases", value=record["aliases"]))

extensions = []
if any(
filter(
lambda f: f in record,
Expand All @@ -435,49 +489,44 @@ def _add_therapeutic_agent(
indication = self._get_indication(ind_db)

if indication.normalized_disease_id:
system, code = indication.normalized_disease_id.split(":")
mappings = [
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
_create_concept_mapping(
concept_id=indication.normalized_disease_id,
relation=Relation.RELATED_MATCH,
ns_to_system_uri=DISEASE_NAMESPACE_TO_SYSTEM_URI,
ns_prefix=DiseaseNamespacePrefix,
)
]
else:
mappings = []
ind_disease_obj = domain_models.Disease(
ind_disease_obj = MappableConcept(
id=indication.disease_id,
conceptType="Disease",
label=indication.disease_label,
mappings=mappings or None,
)

if indication.supplemental_info:
ind_disease_obj.extensions = [
entity_models.Extension(name=k, value=v)
Extension(name=k, value=v)
for k, v in indication.supplemental_info.items()
]
inds_list.append(ind_disease_obj.model_dump(exclude_none=True))
if inds_list:
approv_value["has_indication"] = inds_list

approv = entity_models.Extension(
name="regulatory_approval", value=approv_value
)
approv = Extension(name="regulatory_approval", value=approv_value)
extensions.append(approv)

trade_names = record.get("trade_names")
if trade_names:
extensions.append(
entity_models.Extension(name="trade_names", value=trade_names)
)
extensions.append(Extension(name="trade_names", value=trade_names))

if extensions:
therapeutic_agent_obj.extensions = extensions
therapy_obj.extensions = extensions

response.match_type = match_type
response.normalized_id = record["concept_id"]
response.therapeutic_agent = therapeutic_agent_obj
response.therapy = therapy_obj
return self._add_merged_meta(response)

def _resolve_merge(
Expand Down Expand Up @@ -537,7 +586,7 @@ def normalize(self, query: str, infer: bool = True) -> NormalizationService:
response = NormalizationService(**self._prepare_normalized_response(query))

return self._perform_normalized_lookup(
response, query, infer, self._add_therapeutic_agent
response, query, infer, self._add_therapy
)

def _construct_drug_match(self, record: dict) -> Therapy:
Expand Down
Loading

0 comments on commit 3e798d0

Please sign in to comment.