diff --git a/README.md b/README.md index ba5b6f4..8b7538a 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ $ curl -s 'https://normalize.cancervariants.org/disease/normalize?q=liver%20canc "query": "liver cancer", "warnings": null, "match_type": 80, - "normalized_id": "ncit:C34803", "disease": { - "type": "Disease", + "conceptType": "Disease", + "primaryCode": "ncit:C34803", "id": "normalize.disease:liver%20cancer", "label": "Primary Malignant Liver Neoplasm", # ... @@ -55,7 +55,7 @@ Or utilize the [Python API](https://disease-normalizer.readthedocs.io/latest/ref >>> from disease.database import create_db >>> q = QueryHandler(create_db()) >>> result = q.normalize("NSCLC") ->>> result.normalized_id +>>> result.disease.primaryCode.root 'ncit:C2926' ``` diff --git a/docs/source/index.rst b/docs/source/index.rst index 8e06707..11fad84 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -24,9 +24,9 @@ A `public REST instance of the service >> import requests >>> result = requests.get("https://normalize.cancervariants.org/disease/normalize?q=nsclc").json() - >>> result["normalized_id"] + >>> result["disease"]["primaryCode"] 'ncit:C2926' - >>> result["disease"]["aliases"][:5] + >>> next(ext for ext in result["disease"]["extensions"] if ext["name"] == "aliases")["value"][:5] ['Non-Small Cell Carcinoma of Lung', 'NSCLC - non-small cell lung cancer', 'Non-small cell lung cancer', 'Non-Small Cell Carcinoma of the Lung', 'non-small cell cancer of the lung'] The Disease Normalizer can also be installed locally as a Python package for fast access: @@ -37,9 +37,9 @@ The Disease Normalizer can also be installed locally as a Python package for fas >>> from disease.database import create_db >>> q = QueryHandler(create_db()) >>> result = q.normalize("nsclc") - >>> result.normalized_id + >>> result.disease.primaryCode.root 'ncit:C2926' - >>> result.disease.aliases[:5] + >>> next(ext for ext in result.disease.extensions if ext.name == "aliases").value[:5] ['Non-Small Cell Carcinoma of Lung', 'NSCLC - non-small cell lung cancer', 'Non-small cell lung cancer', 'Non-Small Cell Carcinoma of the Lung', 'non-small cell cancer of the lung'] The Disease Normalizer was created to support the `Knowledgebase Integration Project `_ of the `Variant Interpretation for Cancer Consortium (VICC) `_. It is developed primarily by the `Wagner Lab `_. Full source code is available on `GitHub `_. diff --git a/docs/source/install.rst b/docs/source/install.rst index aebe232..f63cc2b 100644 --- a/docs/source/install.rst +++ b/docs/source/install.rst @@ -75,9 +75,9 @@ The beginning of the response to a GET request to http://localhost:5000/disease/ { "query": "nsclc", "match_type": 60, - "normalized_id": "ncit:C2926", "disease": { "id": "normalize.disease.ncit:C2926", + "primaryCode": "ncit:C2926", "label": "Lung Non-Small Cell Carcinoma", ... diff --git a/src/disease/query.py b/src/disease/query.py index 3402fca..60d444d 100644 --- a/src/disease/query.py +++ b/src/disease/query.py @@ -304,10 +304,6 @@ def _add_merged_meta(self, response: dict) -> dict: disease = response["disease"] sources = [] - concept_id_source = response["normalized_id"].split(":")[0] - if concept_id_source in PREFIX_LOOKUP: - sources.append(PREFIX_LOOKUP[concept_id_source]) - for m in disease.mappings or []: ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system, "").lower() if ns in PREFIX_LOOKUP: @@ -361,13 +357,18 @@ def _create_concept_mapping( disease_obj = MappableConcept( id=f"normalize.disease.{record['concept_id']}", + primaryCode=code(root=record["concept_id"]), conceptType="Disease", label=record["label"], extensions=[], ) + # mappings + mappings = [ + _create_concept_mapping(record["concept_id"], relation=Relation.EXACT_MATCH) + ] source_ids = record.get("xrefs", []) + record.get("associated_with", []) - mappings = [_create_concept_mapping(source_id) for source_id in source_ids] + mappings.extend(_create_concept_mapping(source_id) for source_id in source_ids) if mappings: disease_obj.mappings = mappings @@ -378,7 +379,6 @@ def _create_concept_mapping( response["match_type"] = match_type response["disease"] = disease_obj - response["normalized_id"] = record["concept_id"] response = self._add_merged_meta(response) return NormalizationService(**response) @@ -432,7 +432,7 @@ def normalize(self, query: str) -> NormalizationService: >>> from disease.database import create_db >>> q = QueryHandler(create_db()) >>> result = q.normalize("NSCLC") - >>> result.normalized_id + >>> result.disease.primaryCode.root 'ncit:C2926' :param query: String to find normalized concept for diff --git a/src/disease/schemas.py b/src/disease/schemas.py index 7125caa..92267f5 100644 --- a/src/disease/schemas.py +++ b/src/disease/schemas.py @@ -296,7 +296,6 @@ class NormalizationService(BaseModel): query: StrictStr warnings: dict | None = None match_type: MatchType - normalized_id: str | None = None disease: MappableConcept | None = None source_meta_: dict[SourceName, SourceMeta] | None = None service_meta_: ServiceMeta @@ -307,12 +306,19 @@ class NormalizationService(BaseModel): "query": "childhood leukemia", "warnings": None, "match_type": 80, - "normalized_id": "ncit:C4989", "disease": { "id": "normalize.disease.ncit:C4989", + "primaryCode": "ncit:C4989", "conceptType": "Disease", "label": "Childhood Leukemia", "mappings": [ + { + "coding": { + "code": "ncit:C4989", + "system": "https://www.ebi.ac.uk/ols4/ontologies/ncit/classes?short_form=NCIT_", + }, + "relation": "exactMatch", + }, { "coding": { "code": "mondo:0004355", diff --git a/tests/unit/test_endpoints.py b/tests/unit/test_endpoints.py index d2b751c..5b53baf 100644 --- a/tests/unit/test_endpoints.py +++ b/tests/unit/test_endpoints.py @@ -36,7 +36,7 @@ def test_normalize(api_client): """Test /normalize endpoint.""" response = api_client.get("/disease/normalize?q=neuroblastoma") assert response.status_code == 200 - assert response.json()["normalized_id"] == "ncit:C3270" + assert response.json()["disease"]["primaryCode"] == "ncit:C3270" response = api_client.get("/disease/normalize") assert response.status_code == 422 diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index af2282f..a84cdbc 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -3,7 +3,7 @@ from datetime import datetime import pytest -from ga4gh.core.models import Extension, MappableConcept +from ga4gh.core.models import Extension, MappableConcept, code from disease.query import InvalidParameterException, QueryHandler from disease.schemas import MatchType, SourceName @@ -21,8 +21,16 @@ def neuroblastoma(): return MappableConcept( conceptType="Disease", id="normalize.disease.ncit:C3270", + primaryCode=code(root="ncit:C3270"), label="Neuroblastoma", mappings=[ + { + "coding": { + "code": "ncit:C3270", + "system": "http://purl.obolibrary.org/obo/ncit.owl", + }, + "relation": "exactMatch", + }, { "coding": { "code": "mondo:0005072", @@ -129,7 +137,17 @@ def skin_myo(): return MappableConcept( conceptType="Disease", id="normalize.disease.ncit:C167370", + primaryCode=code(root="ncit:C167370"), label="Skin Myoepithelioma", + mappings=[ + { + "coding": { + "code": "ncit:C167370", + "system": "http://purl.obolibrary.org/obo/ncit.owl", + }, + "relation": "exactMatch", + }, + ], extensions=[Extension(name="aliases", value=["Cutaneous Myoepithelioma"])], ) @@ -142,8 +160,16 @@ def mafd2(): return MappableConcept( conceptType="Disease", id="normalize.disease.mondo:0010648", + primaryCode=code(root="mondo:0010648"), label="major affective disorder 2", mappings=[ + { + "coding": { + "code": "mondo:0010648", + "system": "http://purl.obolibrary.org/obo/mondo.owl", + }, + "relation": "exactMatch", + }, { "coding": {"code": "MIM:309200", "system": "https://www.omim.org"}, "relation": "relatedMatch", @@ -196,7 +222,7 @@ def mafd2(): def compare_disease(actual, fixture): """Verify correctness of returned Disease core object against test fixture.""" - assert actual.normalized_id == fixture.id.split("normalize.disease.")[-1] + assert actual.disease.primaryCode.root == fixture.id.split("normalize.disease.")[-1] actual = actual.disease actual_keys = actual.model_dump(exclude_none=True).keys() fixture_keys = fixture.model_dump(exclude_none=True).keys()