Skip to content

Commit

Permalink
parse script for rna localization
Browse files Browse the repository at this point in the history
issue #58: Table S3 of Atlas of Subcellular RNA Localization Revealed by APEX-Seq
  • Loading branch information
JustKong13 authored Aug 20, 2020
1 parent ed5f217 commit f2def4f
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions rna_localization/justin_10.1016j.cell.2019.05.027.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pandas as pd
from datanator_query_python.util import mongo_util
from datanator_query_python.config import config


class RNALocate(mongo_util.MongoUtil):
def __init__(self, MongoDB, db, username, password):
super().__init__(MongoDB=MongoDB,
db=db,
username=username,
password=password)
self.identifier_collection = self.db_obj['identifier']
self.observation_collection = self.db_obj['observation']

def parse_rna_location(self):
data = pd.read_excel('mmc3.xlsx', 'Gene lists and orphans')
for i in range(len(data)):
d = {}
d['entity'] = {'type': 'RNA',
'name': data['Common_Gene'][i],
'identifiers': [{'namespace': 'ensembl',
'value': data['Ensembl_Gene'][i]}]}
d['identifier'] = {'namespace': 'ensembl',
'value': data['Ensembl_Gene'][i],
'description': data['Common_Gene'][i]}
d['values'] = [{'type': col_name, 'value': data[col_name][i]} for col_name in data.columns[3:11]]
d['source'] = [{'namespace': 'doi', 'value': '10.1016/j.cell.2019.05.027'}]
d['schema_version'] = '2.0'

self.observation_collection.update_one({'type': 'RNA',
'name': data['Common_Gene'][i],
'identifiers': [{'namespace': 'ensembl',
'value': data['Ensembl_Gene'][i]}]},
{'$set': d},
upsert=True)

self.identifier_collection.update_one({'namespace': 'ensembl', 'value': data['Ensembl_Gene'][i]},
{'$set': {'description': data['Common_Gene'][i]}},
upsert=True)

print("Row {} has been added".format(str(i)))



def main():
conf = config.Justin()
username = conf.USERNAME
password = conf.PASSWORD
MongoDB = conf.SERVER
db = 'datanator-demo'
#url = 'https://www.cell.com/cms/10.1016/j.cell.2019.05.027/attachment/618723b6-c0fb-4138-846e-fb09eb6b2f2f/mmc3'
src = RNALocate(MongoDB, db, username, password)
src.parse_rna_location()

if __name__ == "__main__":
main()

0 comments on commit f2def4f

Please sign in to comment.