Skip to content

Commit

Permalink
feat!: use preferred formats for MappableConcept.mappings
Browse files Browse the repository at this point in the history
close #212

* use preferred format (URI) for `system`, where possible
  • Loading branch information
korikuzma committed Dec 27, 2024
1 parent c666955 commit 6d4531b
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 33 deletions.
63 changes: 43 additions & 20 deletions src/disease/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
from disease import NAMESPACE_LOOKUP, PREFIX_LOOKUP, SOURCES_LOWER_LOOKUP, __version__
from disease.database.database import AbstractDatabase
from disease.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
Disease,
MatchType,
NamespacePrefix,
NormalizationService,
RefType,
SearchService,
Expand Down Expand Up @@ -299,19 +302,20 @@ def _add_merged_meta(self, response: dict) -> dict:
"""
sources_meta = {}
disease = response["disease"]
sources = [response["normalized_id"].split(":")[0]]
if disease.mappings:
sources += [m.coding.system for m in disease.mappings]

sources = []
concept_id_source = response["normalized_id"].split(":")[0]
if concept_id_source in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[concept_id_source])

for m in disease.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system, "").lower()
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

for src in sources:
try:
src_name = PREFIX_LOOKUP[src]
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
sources_meta[src_name] = self.db.get_source_metadata(src_name)
if src not in sources_meta:
sources_meta[src] = self.db.get_source_metadata(src)
response["source_meta_"] = sources_meta
return response

Expand All @@ -325,6 +329,33 @@ def _add_disease(
:param match_type: type of match achieved
:return: completed normalized response object ready to return to user
"""

def _create_concept_mapping(
concept_id: str, relation: Relation = Relation.RELATED_MATCH
) -> ConceptMapping:
"""Create concept mapping for identifier
:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:return: Concept mapping for identifier
"""
source, source_id = concept_id.split(":")

try:
source = NamespacePrefix(source)
except ValueError:
try:
source = NamespacePrefix(source.upper())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = NAMESPACE_TO_SYSTEM_URI.get(source, source)

return ConceptMapping(
coding=Coding(code=code(source_id), system=system), relation=relation
)

disease_obj = MappableConcept(
id=f"normalize.disease.{record['concept_id']}",
conceptType="Disease",
Expand All @@ -333,15 +364,7 @@ def _add_disease(
)

source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, source_code = source_id.split(":")
mappings.append(
ConceptMapping(
coding=Coding(code=code(source_code), system=system.lower()),
relation=Relation.RELATED_MATCH,
)
)
mappings = [_create_concept_mapping(source_id) for source_id in source_ids]
if mappings:
disease_obj.mappings = mappings

Expand Down
30 changes: 30 additions & 0 deletions src/disease/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,36 @@ class NamespacePrefix(Enum):
WIKIDATA = "wikidata"


# Source to URI (from identifiers.org, if found)
NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
NamespacePrefix.NCIT: "https://www.ebi.ac.uk/ols4/ontologies/ncit/classes?short_form=NCIT_",
NamespacePrefix.MONDO: "https://monarchinitiative.org/MONDO:",
NamespacePrefix.DO: "https://www.ebi.ac.uk/ols4/ontologies/doid/terms?obo_id=DOID:",
NamespacePrefix.DOID: "https://www.ebi.ac.uk/ols4/ontologies/doid/terms?obo_id=DOID:",
NamespacePrefix.OMIM: "https://www.omim.org/entry/",
NamespacePrefix.ONCOTREE: "https://oncotree.mskcc.org/?version=oncotree_latest_stable&field=NAME&search=",
NamespacePrefix.EFO: "https://www.ebi.ac.uk/efo/EFO_",
NamespacePrefix.HP: "https://hpo.jax.org/app/browse/term/HP:",
NamespacePrefix.HPO: "https://hpo.jax.org/app/browse/term/HP:",
NamespacePrefix.KEGG: "https://www.kegg.jp/entry/",
NamespacePrefix.MEDDRA: "https://purl.bioontology.org/ontology/MEDDRA/",
NamespacePrefix.MEDGEN: "https://www.ncbi.nlm.nih.gov/medgen/",
NamespacePrefix.MESH: "https://id.nlm.nih.gov/mesh/",
NamespacePrefix.MP: "https://www.ebi.ac.uk/ols4/ontologies/mp/terms?obo_id=MP:",
NamespacePrefix.OBI: "https://purl.obolibrary.org/obo/",
NamespacePrefix.ORPHANET: "https://www.orpha.net/consor/cgi-bin/OC_Exp.php?Lng=EN&Expert=",
NamespacePrefix.PATO: "https://www.ebi.ac.uk/ols4/ontologies/pato/terms?obo_id=PATO:",
NamespacePrefix.UMLS: "https://linkedlifedata.com/resource/umls/id/",
NamespacePrefix.WIKIPEDIA: "https://en.wikipedia.org/wiki/",
NamespacePrefix.WIKIDATA: "https://www.wikidata.org/entity/",
}

# URI to source
SYSTEM_URI_TO_NAMESPACE = {
system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
}


class SourcePriority(IntEnum):
"""Define priorities for sources in building merged concepts."""

Expand Down
56 changes: 43 additions & 13 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,47 +24,71 @@ def neuroblastoma():
label="Neuroblastoma",
mappings=[
{
"coding": {"code": "0005072", "system": "mondo"},
"coding": {
"code": "0005072",
"system": "https://monarchinitiative.org/MONDO:",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "NBL", "system": "oncotree"},
"coding": {
"code": "NBL",
"system": "https://oncotree.mskcc.org/?version=oncotree_latest_stable&field=NAME&search=",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "769", "system": "doid"},
"coding": {
"code": "769",
"system": "https://www.ebi.ac.uk/ols4/ontologies/doid/terms?obo_id=DOID:",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C0027819", "system": "umls"},
"coding": {
"code": "C0027819",
"system": "https://linkedlifedata.com/resource/umls/id/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "9500/3", "system": "icdo"},
"relation": "relatedMatch",
},
{
"coding": {"code": "0000621", "system": "efo"},
"coding": {
"code": "0000621",
"system": "https://www.ebi.ac.uk/efo/EFO_",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "7185", "system": "gard"},
"relation": "relatedMatch",
},
{
"coding": {"code": "D009447", "system": "mesh"},
"coding": {"code": "D009447", "system": "https://id.nlm.nih.gov/mesh/"},
"relation": "relatedMatch",
},
{
"coding": {"code": "635", "system": "orphanet"},
"coding": {
"code": "635",
"system": "https://www.orpha.net/consor/cgi-bin/OC_Exp.php?Lng=EN&Expert=",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C2751421", "system": "umls"},
"coding": {
"code": "C2751421",
"system": "https://linkedlifedata.com/resource/umls/id/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "18012", "system": "medgen"},
"coding": {
"code": "18012",
"system": "https://www.ncbi.nlm.nih.gov/medgen/",
},
"relation": "relatedMatch",
},
],
Expand Down Expand Up @@ -112,19 +136,25 @@ def mafd2():
label="major affective disorder 2",
mappings=[
{
"coding": {"code": "309200", "system": "mim"},
"coding": {"code": "309200", "system": "https://www.omim.org/entry/"},
"relation": "relatedMatch",
},
{
"coding": {"code": "C564108", "system": "mesh"},
"coding": {"code": "C564108", "system": "https://id.nlm.nih.gov/mesh/"},
"relation": "relatedMatch",
},
{
"coding": {"code": "326975", "system": "medgen"},
"coding": {
"code": "326975",
"system": "https://www.ncbi.nlm.nih.gov/medgen/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C1839839", "system": "umls"},
"coding": {
"code": "C1839839",
"system": "https://linkedlifedata.com/resource/umls/id/",
},
"relation": "relatedMatch",
},
],
Expand Down

0 comments on commit 6d4531b

Please sign in to comment.