Skip to content

Commit

Permalink
Merge pull request #145 from cancervariants/issue-89
Browse files Browse the repository at this point in the history
Issue 89
  • Loading branch information
jsstevenson authored Apr 13, 2021
2 parents b9e023b + 4632d9c commit 0bca173
Show file tree
Hide file tree
Showing 15 changed files with 201 additions and 35 deletions.
70 changes: 70 additions & 0 deletions dynamodb_revisions/add_item_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Add item_type attribute to all items."""
import sys
from pathlib import Path
from timeit import default_timer as timer
import click
from botocore.exceptions import ClientError
import logging

PROJECT_ROOT = Path(__file__).resolve().parents[1]
sys.path.append(f"{PROJECT_ROOT}")

from therapy.database import Database # noqa: E402


logger = logging.getLogger('therapy')
logger.setLevel(logging.DEBUG)

db = Database()


def add_item_type(label_and_type: str, concept_id: str, item_type: str):
"""Add item_type to individual db item."""
key = {
'label_and_type': label_and_type,
'concept_id': concept_id
}
update_expression = "set item_type=:r"
update_values = {':r': item_type}
try:
db.therapies.update_item(Key=key,
UpdateExpression=update_expression,
ExpressionAttributeValues=update_values)
except ClientError as e:
logger.error(f"boto3 client error in `database.update_record()`: "
f"{e.response['Error']['Message']}")


VALID_TYPES = {'identity', 'label', 'trade_name', 'rx_brand', 'alias',
'other_id', 'xref', 'merger'}


def add_item_types():
"""Add item_type attribute to all items."""
last_evaluated_key = None
while True:
if last_evaluated_key:
response = db.therapies.scan(ExclusiveStartKey=last_evaluated_key)
else:
response = db.therapies.scan()

records = response['Items']
for record in records:
label_and_type = record['label_and_type']
item_type = label_and_type.split('##')[-1]
if item_type in VALID_TYPES:
add_item_type(label_and_type, record['concept_id'], item_type)
else:
logger.error(f"Couldn't parse item type for record: {record}")

last_evaluated_key = response.get('LastEvaluatedKey')
if not last_evaluated_key:
break


if __name__ == '__main__':
click.echo("Adding item_types...")
start = timer()
add_item_types()
end = timer()
click.echo(f"finished adding item_types in {end - start:.5f}s.")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Defines how metakb is packaged and distributed."""
from setuptools import setup

setup(version="0.2.14")
setup(version="0.2.15")
26 changes: 18 additions & 8 deletions tests/unit/data/therapies.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[
{
"label_and_type": "chembl:chembl11359##identity",
"src_name": "ChEMBL",
"item_type": "identity",
"concept_id": "chembl:CHEMBL11359",
"label": "CISPLATIN",
"aliases": [
Expand All @@ -16,8 +18,7 @@
"Platinol-Aq"
],
"approval_status": "approved",
"trade_names": ["PLATINOL", "PLATINOL-AQ", "CISPLATIN"],
"src_name": "ChEMBL"
"trade_names": ["PLATINOL", "PLATINOL-AQ", "CISPLATIN"]
},
{
"label_and_type": "cisplatin##label",
Expand Down Expand Up @@ -172,7 +173,8 @@
{
"label_and_type": "pms-cisplatin##trade_name",
"concept_id": "drugbank:db00515",
"src_name": "DrugBank"
"src_name": "DrugBank",
"item_type": "trade_name"
},
{
"label_and_type": "drugbank:db00522##identity",
Expand Down Expand Up @@ -350,7 +352,8 @@
{
"label_and_type": "interferon alfacon-1##label",
"concept_id": "wikidata:q15353101",
"src_name": "Wikidata"
"src_name": "Wikidata",
"item_type": "label"
},
{
"label_and_type": "wikidata:q191924##identity",
Expand Down Expand Up @@ -487,7 +490,8 @@
{
"label_and_type": "fda:5x5hb3vz3z##xref",
"concept_id": "ncit:c74021",
"src_name": "NCIt"
"src_name": "NCIt",
"item_type": "xref"
},
{
"label_and_type": "chemidplus:197904-84-0##other_id",
Expand Down Expand Up @@ -608,6 +612,7 @@
},
{
"label_and_type": "chemidplus:15663-27-1##identity",
"item_type": "identity",
"concept_id": "chemidplus:15663-27-1",
"label": "Cisplatin",
"aliases": [
Expand All @@ -623,7 +628,8 @@
{
"label_and_type": "drugbank:db00515##other_id",
"concept_id": "chemidplus:15663-27-1",
"src_name": "ChemIDplus"
"src_name": "ChemIDplus",
"item_type": "other_id"
},
{
"label_and_type": "cisplatin##label",
Expand All @@ -632,6 +638,7 @@
},
{
"label_and_type": "cis-diaminedichloroplatinum##alias",
"item_type": "alias",
"concept_id": "chemidplus:15663-27-1",
"src_name": "ChemIDplus"
},
Expand Down Expand Up @@ -735,7 +742,8 @@
{
"label_and_type": "dichlorodiammineplatinum##alias",
"concept_id": "rxcui:2555",
"src_name": "RxNorm"
"src_name": "RxNorm",
"item_type": "alias"
},
{
"label_and_type": "cis platinum##alias",
Expand Down Expand Up @@ -1416,6 +1424,7 @@
},
{
"label_and_type": "ncit:c839##merger",
"item_type": "merger",
"concept_id": "ncit:C839",
"other_ids": ["chemidplus:8025-81-8"],
"label": "Spiramycin",
Expand Down Expand Up @@ -1467,7 +1476,8 @@
{
"label_and_type": "rxcui:1041527##rx_brand",
"concept_id": "rxcui:161",
"src_name": "RxNorm"
"src_name": "RxNorm",
"item_type": "rx_brand"
},
{
"label_and_type": "rxcui:218330##rx_brand",
Expand Down
5 changes: 4 additions & 1 deletion tests/unit/test_chemidplus_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from therapy.schemas import Drug, MatchType
import pytest
from tests.conftest import compare_records
import datetime


@pytest.fixture(scope='module')
Expand Down Expand Up @@ -191,7 +192,9 @@ def test_meta(chemidplus):
response = chemidplus.search('incoherent-string-of-text')
assert response['source_meta_'].data_license == 'custom'
assert response['source_meta_'].data_license_url == 'https://www.nlm.nih.gov/databases/download/terms_and_conditions.html' # noqa: E501
assert response['source_meta_'].version == '20210204'
version = response['source_meta_'].version
assert len(version) == 8
assert datetime.datetime.strptime(version, '%Y%m%d')
assert response['source_meta_'].data_url == 'ftp://ftp.nlm.nih.gov/nlmdata/.chemidlease/' # noqa: E501
assert response['source_meta_'].rdp_url is None
assert response['source_meta_'].data_license_attributes == {
Expand Down
44 changes: 44 additions & 0 deletions tests/unit/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from tests.conftest import TEST_ROOT
import json
import os
from boto3.dynamodb.conditions import Key


@pytest.fixture(scope='module')
Expand Down Expand Up @@ -41,3 +42,46 @@ def test_tables_created(db):
existing_tables = db.dynamodb_client.list_tables()['TableNames']
assert 'therapy_concepts' in existing_tables
assert 'therapy_metadata' in existing_tables


def test_item_type(db):
"""Check that objects are tagged with item_type attribute."""
filter_exp = Key('label_and_type').eq('chembl:chembl11359##identity')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'identity'

filter_exp = Key('label_and_type').eq('interferon alfacon-1##label')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'label'

filter_exp = Key('label_and_type').eq('fda:5x5hb3vz3z##xref')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'xref'

filter_exp = Key('label_and_type').eq('drugbank:db00515##other_id')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'other_id'

filter_exp = Key('label_and_type').eq('dichlorodiammineplatinum##alias')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'alias'

filter_exp = Key('label_and_type').eq('rxcui:1041527##rx_brand')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'rx_brand'

filter_exp = Key('label_and_type').eq('pms-cisplatin##trade_name')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'trade_name'

filter_exp = Key('label_and_type').eq('ncit:c839##merger')
item = db.therapies.query(KeyConditionExpression=filter_exp)['Items'][0]
assert 'item_type' in item
assert item['item_type'] == 'merger'
2 changes: 1 addition & 1 deletion therapy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
logger.setLevel(logging.DEBUG)

# TODO: Fix so that we don't have to change in setup.cfg
__version__ = "0.2.14"
__version__ = "0.2.15"


class DownloadException(Exception):
Expand Down
25 changes: 22 additions & 3 deletions therapy/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ def create_therapies_table(self, existing_tables: List):
{
'AttributeName': 'src_name',
'AttributeType': 'S'
},
{
'AttributeName': 'item_type',
'AttributeType': 'S'
}

],
GlobalSecondaryIndexes=[
{
Expand All @@ -115,6 +118,22 @@ def create_therapies_table(self, existing_tables: List):
'ReadCapacityUnits': 10,
'WriteCapacityUnits': 10
}
},
{
'IndexName': 'item_type_index',
'KeySchema': [
{
'AttributeName': 'item_type',
'KeyType': 'HASH'
}
],
'Projection': {
'ProjectionType': 'KEYS_ONLY'
},
'ProvisionedThroughput': {
'ReadCapacityUnits': 10,
'WriteCapacityUnits': 10
}
}
],
ProvisionedThroughput={
Expand Down Expand Up @@ -227,7 +246,7 @@ def add_record(self, record: Dict, record_type="identity"):
f"{e.response['Error']['Message']}")

def add_ref_record(self, term: str, concept_id: str, ref_type: str):
"""Add auxilliary/reference record to database.
"""Add auxiliary/reference record to database.
:param str term: referent term
:param str concept_id: concept ID to refer to
Expand All @@ -253,7 +272,7 @@ def update_record(self, concept_id: str, field: str, new_value: Any):
:param str concept_id: record to update
:param str field: name of field to update
:parm str new_value: new value
:param Any new_value: new value
"""
key = {
'label_and_type': f'{concept_id.lower()}##identity',
Expand Down
10 changes: 7 additions & 3 deletions therapy/etl/chembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def _load_therapy(self, record, batch):
"""Load therapy record into DynamoDB."""
record['label_and_type'] = \
f"{record['concept_id'].lower()}##identity"
record['item_type'] = 'identity'
batch.put_item(Item=record)
self._added_ids.append(record['concept_id'])

Expand All @@ -246,7 +247,8 @@ def _load_label(self, record, batch):
'label_and_type':
f"{record['label'].lower()}##label",
'concept_id': f"{record['concept_id'].lower()}",
'src_name': SourceName.CHEMBL.value
'src_name': SourceName.CHEMBL.value,
'item_type': 'label',
}
batch.put_item(Item=label)

Expand All @@ -264,7 +266,8 @@ def _load_alias(self, record, batch):
alias = {
'label_and_type': f"{alias}##alias",
'concept_id': f"{record['concept_id'].lower()}",
'src_name': SourceName.CHEMBL.value
'src_name': SourceName.CHEMBL.value,
'item_type': 'alias',
}
batch.put_item(Item=alias)

Expand All @@ -287,7 +290,8 @@ def _load_trade_name(self, record, batch):
'label_and_type':
f"{trade_name}##trade_name",
'concept_id': f"{record['concept_id'].lower()}",
'src_name': SourceName.CHEMBL.value
'src_name': SourceName.CHEMBL.value,
'item_type': 'trade_name'
}
batch.put_item(Item=trade_name)

Expand Down
5 changes: 4 additions & 1 deletion therapy/etl/chemidplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def _load_record(self, batch: BatchWriter, record: Dict):
'label_and_type': f'{record["label"].lower()}##label',
'concept_id': concept_id_ref,
'src_name': SourceName.CHEMIDPLUS.value,
'item_type': 'label',
})
else:
del record['label']
Expand All @@ -204,13 +205,15 @@ def _load_record(self, batch: BatchWriter, record: Dict):
batch.put_item(Item={
'label_and_type': pk,
'concept_id': concept_id_ref,
'src_name': SourceName.CHEMIDPLUS.value
'src_name': SourceName.CHEMIDPLUS.value,
'item_type': field_type,
})
else:
del record[field]

record['src_name'] = SourceName.CHEMIDPLUS.value
record['label_and_type'] = f'{concept_id_ref}##identity'
record['item_type'] = 'identity'
batch.put_item(Item=record)
self._added_ids.append(record['concept_id'])

Expand Down
Loading

0 comments on commit 0bca173

Please sign in to comment.