Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: remove normalized_id and leverage MappableConcept.primaryCode #216

Merged
merged 13 commits into from
Dec 31, 2024
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ $ curl -s 'https://normalize.cancervariants.org/disease/normalize?q=liver%20canc
"query": "liver cancer",
"warnings": null,
"match_type": 80,
"normalized_id": "ncit:C34803",
"disease": {
"type": "Disease",
"conceptType": "Disease",
"primaryCode": "ncit:C34803",
"id": "normalize.disease:liver%20cancer",
"label": "Primary Malignant Liver Neoplasm",
# ...
Expand All @@ -55,7 +55,7 @@ Or utilize the [Python API](https://disease-normalizer.readthedocs.io/latest/ref
>>> from disease.database import create_db
>>> q = QueryHandler(create_db())
>>> result = q.normalize("NSCLC")
>>> result.normalized_id
>>> result.disease.primaryCode.root
'ncit:C2926'
```

Expand Down
8 changes: 4 additions & 4 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ A `public REST instance of the service <https://normalize.cancervariants.org/dis

>>> import requests
>>> result = requests.get("https://normalize.cancervariants.org/disease/normalize?q=nsclc").json()
>>> result["normalized_id"]
>>> result["disease"]["primaryCode"]
'ncit:C2926'
>>> result["disease"]["aliases"][:5]
>>> next(ext for ext in result["disease"]["extensions"] if ext["name"] == "aliases")["value"][:5]
['Non-Small Cell Carcinoma of Lung', 'NSCLC - non-small cell lung cancer', 'Non-small cell lung cancer', 'Non-Small Cell Carcinoma of the Lung', 'non-small cell cancer of the lung']

The Disease Normalizer can also be installed locally as a Python package for fast access:
Expand All @@ -37,9 +37,9 @@ The Disease Normalizer can also be installed locally as a Python package for fas
>>> from disease.database import create_db
>>> q = QueryHandler(create_db())
>>> result = q.normalize("nsclc")
>>> result.normalized_id
>>> result.disease.primaryCode.root
'ncit:C2926'
>>> result.disease.aliases[:5]
>>> next(ext for ext in result.disease.extensions if ext.name == "aliases").value[:5]
['Non-Small Cell Carcinoma of Lung', 'NSCLC - non-small cell lung cancer', 'Non-small cell lung cancer', 'Non-Small Cell Carcinoma of the Lung', 'non-small cell cancer of the lung']

The Disease Normalizer was created to support the `Knowledgebase Integration Project <https://cancervariants.org/projects/integration/>`_ of the `Variant Interpretation for Cancer Consortium (VICC) <https://cancervariants.org/>`_. It is developed primarily by the `Wagner Lab <https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab>`_. Full source code is available on `GitHub <https://github.com/cancervariants/disease-normalization>`_.
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ The beginning of the response to a GET request to http://localhost:5000/disease/
{
"query": "nsclc",
"match_type": 60,
"normalized_id": "ncit:C2926",
"disease": {
"id": "normalize.disease.ncit:C2926",
"primaryCode": "ncit:C2926",
"label": "Lung Non-Small Cell Carcinoma",

...
Expand Down
14 changes: 7 additions & 7 deletions src/disease/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,6 @@ def _add_merged_meta(self, response: dict) -> dict:
disease = response["disease"]

sources = []
concept_id_source = response["normalized_id"].split(":")[0]
if concept_id_source in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[concept_id_source])

for m in disease.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system, "").lower()
if ns in PREFIX_LOOKUP:
Expand Down Expand Up @@ -361,13 +357,18 @@ def _create_concept_mapping(

disease_obj = MappableConcept(
id=f"normalize.disease.{record['concept_id']}",
primaryCode=code(root=record["concept_id"]),
conceptType="Disease",
label=record["label"],
extensions=[],
)

# mappings
mappings = [
_create_concept_mapping(record["concept_id"], relation=Relation.EXACT_MATCH)
]
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = [_create_concept_mapping(source_id) for source_id in source_ids]
mappings.extend(_create_concept_mapping(source_id) for source_id in source_ids)
if mappings:
disease_obj.mappings = mappings

Expand All @@ -378,7 +379,6 @@ def _create_concept_mapping(

response["match_type"] = match_type
response["disease"] = disease_obj
response["normalized_id"] = record["concept_id"]
response = self._add_merged_meta(response)
return NormalizationService(**response)

Expand Down Expand Up @@ -432,7 +432,7 @@ def normalize(self, query: str) -> NormalizationService:
>>> from disease.database import create_db
>>> q = QueryHandler(create_db())
>>> result = q.normalize("NSCLC")
>>> result.normalized_id
>>> result.disease.primaryCode.root
'ncit:C2926'

:param query: String to find normalized concept for
Expand Down
10 changes: 8 additions & 2 deletions src/disease/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ class NormalizationService(BaseModel):
query: StrictStr
warnings: dict | None = None
match_type: MatchType
normalized_id: str | None = None
disease: MappableConcept | None = None
source_meta_: dict[SourceName, SourceMeta] | None = None
service_meta_: ServiceMeta
Expand All @@ -307,12 +306,19 @@ class NormalizationService(BaseModel):
"query": "childhood leukemia",
"warnings": None,
"match_type": 80,
"normalized_id": "ncit:C4989",
"disease": {
"id": "normalize.disease.ncit:C4989",
"primaryCode": "ncit:C4989",
"conceptType": "Disease",
"label": "Childhood Leukemia",
"mappings": [
{
"coding": {
"code": "ncit:C4989",
"system": "https://www.ebi.ac.uk/ols4/ontologies/ncit/classes?short_form=NCIT_",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "mondo:0004355",
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_normalize(api_client):
"""Test /normalize endpoint."""
response = api_client.get("/disease/normalize?q=neuroblastoma")
assert response.status_code == 200
assert response.json()["normalized_id"] == "ncit:C3270"
assert response.json()["disease"]["primaryCode"] == "ncit:C3270"

response = api_client.get("/disease/normalize")
assert response.status_code == 422
30 changes: 28 additions & 2 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime

import pytest
from ga4gh.core.models import Extension, MappableConcept
from ga4gh.core.models import Extension, MappableConcept, code

from disease.query import InvalidParameterException, QueryHandler
from disease.schemas import MatchType, SourceName
Expand All @@ -21,8 +21,16 @@ def neuroblastoma():
return MappableConcept(
conceptType="Disease",
id="normalize.disease.ncit:C3270",
primaryCode=code(root="ncit:C3270"),
label="Neuroblastoma",
mappings=[
{
"coding": {
"code": "ncit:C3270",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "mondo:0005072",
Expand Down Expand Up @@ -129,7 +137,17 @@ def skin_myo():
return MappableConcept(
conceptType="Disease",
id="normalize.disease.ncit:C167370",
primaryCode=code(root="ncit:C167370"),
label="Skin Myoepithelioma",
mappings=[
{
"coding": {
"code": "ncit:C167370",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "exactMatch",
},
],
extensions=[Extension(name="aliases", value=["Cutaneous Myoepithelioma"])],
)

Expand All @@ -142,8 +160,16 @@ def mafd2():
return MappableConcept(
conceptType="Disease",
id="normalize.disease.mondo:0010648",
primaryCode=code(root="mondo:0010648"),
label="major affective disorder 2",
mappings=[
{
"coding": {
"code": "mondo:0010648",
"system": "http://purl.obolibrary.org/obo/mondo.owl",
},
"relation": "exactMatch",
},
{
"coding": {"code": "MIM:309200", "system": "https://www.omim.org"},
"relation": "relatedMatch",
Expand Down Expand Up @@ -196,7 +222,7 @@ def mafd2():

def compare_disease(actual, fixture):
"""Verify correctness of returned Disease core object against test fixture."""
assert actual.normalized_id == fixture.id.split("normalize.disease.")[-1]
assert actual.disease.primaryCode.root == fixture.id.split("normalize.disease.")[-1]
actual = actual.disease
actual_keys = actual.model_dump(exclude_none=True).keys()
fixture_keys = fixture.model_dump(exclude_none=True).keys()
Expand Down
Loading