Skip to content

Commit

Permalink
revert pathway merge
Browse files Browse the repository at this point in the history
  • Loading branch information
Maiykol committed Oct 9, 2024
1 parent 6bd4d7b commit 3fe6770
Show file tree
Hide file tree
Showing 30 changed files with 12 additions and 16,265 deletions.
320 changes: 0 additions & 320 deletions data/gene_sets/KEGG_2021_Human.txt

This file was deleted.

1,818 changes: 0 additions & 1,818 deletions data/gene_sets/Reactome_2022.txt

This file was deleted.

801 changes: 0 additions & 801 deletions data/gene_sets/WikiPathway_2023_Human.txt

This file was deleted.

320 changes: 0 additions & 320 deletions data/gene_sets/kegg_ensg.txt

This file was deleted.

320 changes: 0 additions & 320 deletions data/gene_sets/kegg_entrez.txt

This file was deleted.

320 changes: 0 additions & 320 deletions data/gene_sets/kegg_symbol.txt

This file was deleted.

320 changes: 0 additions & 320 deletions data/gene_sets/kegg_uniprot.txt

This file was deleted.

1,818 changes: 0 additions & 1,818 deletions data/gene_sets/reactome_ensg.txt

This file was deleted.

1,818 changes: 0 additions & 1,818 deletions data/gene_sets/reactome_entrez.txt

This file was deleted.

1,818 changes: 0 additions & 1,818 deletions data/gene_sets/reactome_symbol.txt

This file was deleted.

1,818 changes: 0 additions & 1,818 deletions data/gene_sets/reactome_uniprot.txt

This file was deleted.

801 changes: 0 additions & 801 deletions data/gene_sets/wiki_ensg.txt

This file was deleted.

801 changes: 0 additions & 801 deletions data/gene_sets/wiki_entrez.txt

This file was deleted.

801 changes: 0 additions & 801 deletions data/gene_sets/wiki_symbol.txt

This file was deleted.

801 changes: 0 additions & 801 deletions data/gene_sets/wiki_uniprot.txt

This file was deleted.

10 changes: 0 additions & 10 deletions drugstone/backend_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,6 @@ def set_result(results):
elif algorithm in ['quick', 'super']:
from tasks.quick_task import quick_task
quick_task(task_hook)
elif algorithm == 'pathway-enrichment':
from tasks.pathway_enrichment import pathway_enrichment
pathway_enrichment(task_hook)
elif algorithm == 'louvain-clustering':
from tasks.louvain_clustering import louvain_clustering
louvain_clustering(task_hook)
elif algorithm == 'leiden-clustering':
from tasks.leiden_clustering import leiden_clustering
leiden_clustering(task_hook)

except Exception as ex:
r.set(f'{token}_status', f'{ex}')
r.set(f'{token}_failed', '1')
Expand Down
113 changes: 2 additions & 111 deletions drugstone/management/commands/import_from_nedrex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import defaultdict

import nedrex
from nedrex.core import get_nodes, get_edges, get_api_key, iter_edges
from nedrex.core import get_nodes, get_edges, get_api_key

from drugstone import models
from drugstone.management.includes.NodeCache import NodeCache
Expand All @@ -20,21 +20,7 @@ def iter_node_collection(coll_name, eval):
for node in result:
eval(node)
offset += limit

def get_node_dict(ids, batch_size, node_type):
result = []
for i in range(0, len(ids), batch_size):
batch_ids = ids[i : i + batch_size]
# get_nodes for current group of ids
nodes = get_nodes(node_type=node_type, node_ids=batch_ids)
result.extend(nodes)
return result

def iter_node_collection_with_ids(coll_name, eval, ids, mapping):
batch_size = 300
result = get_node_dict(ids, batch_size, coll_name)
for node in result:
eval(node, mapping)


def iter_edge_collection(coll_name, eval):
offset = 0
Expand Down Expand Up @@ -102,102 +88,7 @@ def set_licenced(self, on):
nedrex.config.set_api_key(self.get_api_key())

self.licenced_on = on

def import_cellularComponent(self, update: bool):
def find_parents_in_set(go_id, go2parents, ids_set):
found_ids = set()

def find_parents_rec(go_id):
if go_id in ids_set:
found_ids.add(go_id)

if go_id in go2parents:
for parent_id in go2parents[go_id]:
find_parents_rec(parent_id)

find_parents_rec(go_id)
return list(found_ids)

from goatools.obo_parser import GODag
from goatools.godag.go_tasks import get_go2parents
import subprocess
import os

url = "http://current.geneontology.org/ontology/go-basic.obo"
file_name = "go-basic.obo"
subprocess.run(["wget", url])

godag = GODag(file_name,
optional_attrs={'relationship'})

optional_relationships = {'part_of'}
go2parents_isa = get_go2parents(godag, optional_relationships)
os.remove(file_name)

cellularComponents = dict()

existing_edges = dict()
existing_nodes = dict()
if update:
for edge in models.ActiveIn.objects.all():
existing_edges[edge.__hash__()] = edge
for node in models.CellularComponent.objects.all():
existing_nodes[node.__hash__()] = node

def add_cellularComponent(node, mapping):
go_id = to_id(node['primaryDomainId'])
display_name = node['displayName']
GO_id = "GO:" + go_id
if GO_id in mapping:
cellular_component = models.CellularComponent(go_code=go_id, display_name=display_name, layer=mapping[GO_id][0])
else:
cellular_component = models.CellularComponent(go_code=go_id, display_name=display_name)
if not update or cellular_component.__hash__() not in existing_nodes:
cellularComponents[go_id] = cellular_component
existing_nodes[cellular_component.__hash__()] = cellular_component

new_edges = [e for e in iter_edges("protein_has_go_annotation")]
edges_relevant = []
go_ids = set()
for e in new_edges:
if "is_active_in" in e["qualifiers"]:
go_ids.add(e["targetDomainId"])
edges_relevant.append(e)

layer_ids = {'GO:0005737': "Cytoplasm", 'GO:0005634': "Nucleus", 'GO:0005576': "Extracellular", 'GO:0009986': "Cell surface", 'GO:0005886': "Plasma membrane"}
ids_set = set(layer_ids.keys())
map_gos = {}
# create mapping of go terms
for go_id in go_ids:
GO_id = go_id.replace("go.", "GO:")
if not GO_id in map_gos:
found_parents = find_parents_in_set(GO_id, go2parents_isa, ids_set)
if found_parents:
map_gos[GO_id] = found_parents

# save cellular component objects
iter_node_collection_with_ids('go', add_cellularComponent, list(go_ids), map_gos)

if len(cellularComponents.values())>0:
models.CellularComponent.objects.bulk_create(cellularComponents.values())
self.cache.create_cellularComponent()

bulk = []
for edge in edges_relevant:
protein_id = to_id(edge['sourceDomainId'])
if self.cache.has_protein(protein_id):
protein = self.cache.get_protein_by_uniprot(protein_id)
go_id = to_id(edge['targetDomainId'])
go = self.cache.get_cellularComponent_by_go(go_id)
edge = models.ActiveIn(cellularComponent=go, protein=protein)
if not update or edge.__hash__() not in existing_edges:
bulk.append(edge)
existing_edges[edge.__hash__()] = edge
if len(bulk)>0:
models.ActiveIn.objects.bulk_create(bulk)

return len(bulk)

def import_proteins(self, update: bool):
self.set_licenced(False)
proteins = dict()
Expand Down
3 changes: 1 addition & 2 deletions drugstone/management/commands/make_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ def create_gt(params: List[str]) -> None:
v_drug_id = g.new_vertex_property("string")
v_internal_id = g.new_vertex_property("string")


g.edge_properties["type"] = e_type
# g.edge_properties["drugstone_id"] = e_type

Expand Down Expand Up @@ -156,7 +155,7 @@ def create_gt(params: List[str]) -> None:
elif is_ensg:
for id in ensembl_set[node.id]:
node_id_map[id].add(node.id)
drugstone_ids_to_node_ids[node.id].add(id)
drugstone_ids_to_node_ids[node.id].add(id)

for id, nodes in node_id_map.items():
v = g.add_vertex()
Expand Down
18 changes: 3 additions & 15 deletions drugstone/management/commands/populate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from django.db import OperationalError

from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, PDIDataset, Disorder, PDisDataset, \
DrDiDataset, EnsemblGene, CellularComponent
DrDiDataset, EnsemblGene
from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction, ProteinDisorderAssociation, \
DrugDisorderIndication, ActiveIn
DrugDisorderIndication

from drugstone.management.includes.DataPopulator import DataPopulator
from .import_from_nedrex import NedrexImporter
Expand All @@ -27,7 +27,7 @@ def delete_model(self, model):
cursor.execute('DELETE FROM "{0}"'.format(model._meta.db_table))

def delete_all(self):
models = ['PPI', 'PDI', 'DrDi', 'Protein', 'Drug', 'Disorder', 'PDi', 'Expression', 'Tissue', 'CellularComponent']
models = ['PPI', 'PDI', 'DrDi', 'Protein', 'Drug', 'Disorder', 'PDi', 'Expression', 'Tissue']
self.delete_models(models)

def delete_models(self, model_list):
Expand Down Expand Up @@ -57,9 +57,6 @@ def delete_models(self, model_list):
self.delete_model(ExpressionLevel)
elif model_name == 'Tissue':
self.delete_model(Tissue)
elif model_name == 'CellularComponent':
self.delete_model(CellularComponent)
self.delete_model(ActiveIn)


class Command(BaseCommand):
Expand All @@ -74,8 +71,6 @@ def add_arguments(self, parser):
parser.add_argument('-p', '--proteins', action='store_true', help='Populate Proteins')
parser.add_argument('-di', '--disorders', action='store_true', help='Populate Disorders')
parser.add_argument('-dr', '--drugs', action='store_true', help='Drug file name')

parser.add_argument('-cc', '--cellular_components', action='store_true', help='Populate cellular components')

parser.add_argument('-exp', '--exp', action='store_true',
help='Tissue expression file (.gct without first 2 lines)')
Expand Down Expand Up @@ -133,7 +128,6 @@ def populate(kwargs):
kwargs['protein_drug'] = True
kwargs['protein_disorder'] = True
kwargs['drug_disorder'] = True
kwargs['cellular_components'] = True

if kwargs['drugs']:
print('Populating Drugs...')
Expand Down Expand Up @@ -242,12 +236,6 @@ def populate(kwargs):
total_n += n
print(f'Populated {n} DrDi associations from DrugBank.')

if kwargs['cellular_components']:
print('Importing cellular components...')
n = NedrexImporter.import_cellularComponent(populator, update)
print(f'Populated {n} Cellular components relations.')


if kwargs['protein_protein']:
print('Importing PPIs from unlicensed NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
Expand Down
14 changes: 0 additions & 14 deletions drugstone/management/includes/NodeCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

class NodeCache:
proteins = dict()
cellularComponent = dict()
entrez_to_uniprot = defaultdict(lambda: set())
gene_name_to_uniprot = defaultdict(lambda: set())
disorders = dict()
Expand All @@ -16,7 +15,6 @@ class NodeCache:

def clear(self):
self.proteins = dict()
self.cellularComponent = dict()
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
self.disorders = dict()
Expand All @@ -41,12 +39,6 @@ def init_proteins(self):
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()

def create_cellularComponent(self):
if len(self.cellularComponent) == 0:
print("Generating cellular component...")
for cellularComponent in models.CellularComponent.objects.all():
self.cellularComponent[cellularComponent.go_code] = cellularComponent

def init_drugs(self):
if len(self.drugs) == 0:
Expand All @@ -62,9 +54,6 @@ def init_disorders(self):

def is_new_protein(self, protein: models.Protein):
return protein.uniprot_code in self.protein_updates

def has_protein(self, uniprot_id):
return uniprot_id in self.proteins

def is_new_drug(self, drug: models.Drug):
return drug.drug_id in self.drug_updates
Expand All @@ -74,9 +63,6 @@ def is_new_disease(self, disease: models.Disorder):

def get_protein_by_uniprot(self, uniprot_id):
return self.proteins[uniprot_id]

def get_cellularComponent_by_go(self, go_id):
return self.cellularComponent[go_id]

def get_proteins_by_entrez(self, entrez_id):
out = list()
Expand Down
40 changes: 0 additions & 40 deletions drugstone/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,35 +66,6 @@ class EnsemblGene(models.Model):
protein = models.ForeignKey(
"Protein", on_delete=models.CASCADE, related_name="ensg"
)

class CellularComponent(models.Model):
id = models.AutoField(primary_key=True)
go_code = models.CharField(max_length=10)
display_name = models.CharField(max_length=255, default="")
layer = models.CharField(max_length=255, default="other")

class Meta:
unique_together = ("go_code", "display_name")

def __str__(self):
return self.display_name

def __eq__(self, other):
return (
self.go_code == other.go_code
)

def __ne__(self, other):
return not self.__eq__(other)

def __hash__(self):
return hash((self.go_code))

def update(self, other):
self.go_code = other.go_code
self.display_name = other.display_name
self.description = other.description
self.layer = other.layer


class Protein(models.Model):
Expand All @@ -108,9 +79,6 @@ class Protein(models.Model):
drugs = models.ManyToManyField(
"Drug", through="ProteinDrugInteraction", related_name="interacting_drugs"
)
cellular_components = models.ManyToManyField(
"CellularComponent", through="ActiveIn", related_name="active_in"
)
tissue_expression = models.ManyToManyField(
"Tissue", through="ExpressionLevel", related_name="interacting_drugs"
)
Expand Down Expand Up @@ -154,14 +122,6 @@ class Meta:
def __hash__(self):
return hash(f"{self.tissue_id}_{self.protein_id}")

class ActiveIn(models.Model):
id = models.AutoField(primary_key=True)
cellularComponent = models.ForeignKey("CellularComponent", on_delete=models.CASCADE)
protein = models.ForeignKey("Protein", on_delete=models.CASCADE)
class Meta:
unique_together = ("cellularComponent", "protein")
def __hash__(self):
return hash((self.cellularComponent_id, self.protein_id))

class Tissue(models.Model):
id = models.AutoField(primary_key=True)
Expand Down
12 changes: 0 additions & 12 deletions drugstone/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
"""
from django.contrib import admin
from django.urls import path
from django.urls import re_path

from drugstone.views import (
FileUploadView,
map_nodes,
tasks_view,
result_view,
Expand All @@ -41,17 +39,12 @@
save_selection,
get_view,
get_view_infos,
calculate_result_for_pathway,
create_genesets,
add_edges,
apply_layout,
)

# cache time is 6 hours
urlpatterns = [
path("get_datasets/", get_datasets),
path("map_nodes/", map_nodes),
path("apply_layout/", apply_layout),
path("convert_compact_node_list/", convert_compact_ids),
path("fetch_edges/", fetch_edges),
path("task/", TaskView.as_view()),
Expand All @@ -73,9 +66,4 @@
path("save_selection", save_selection),
path("view/", get_view),
path("view_infos", get_view_infos),
path("calculate_result_for_pathway/", calculate_result_for_pathway),
path("create_genesets/", create_genesets),
path("add_edges/", add_edges),
re_path(r'^upload/(?P<filename>[^/]+)$', FileUploadView.as_view())

]
Loading

0 comments on commit 3fe6770

Please sign in to comment.