Skip to content

Commit

Permalink
Merge pull request #132 from xchem/FragalysisAPILuigiTasks
Browse files Browse the repository at this point in the history
Fragalysis api luigi tasks
  • Loading branch information
reskyner authored Jan 15, 2021
2 parents c791711 + 8fc3234 commit 220ebc3
Show file tree
Hide file tree
Showing 10 changed files with 789 additions and 369 deletions.
3 changes: 1 addition & 2 deletions api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from xchem_db.views import TargetView, CompoundsView, ReferenceView, SoakdbFilesView, CrystalView, DataProcessingView, \
DimpleView, LabView, RefinementView, PanddaAnalysisView, PanddaRunView, PanddaSiteView, PanddaEventView, \
ProasisOutView, FragspectCrystalView
FragspectCrystalView

# from rest_framework_swagger.views import get_swagger_view

Expand All @@ -24,7 +24,6 @@
router.register(r'pandda_run', PanddaRunView)
router.register(r'pandda_site', PanddaSiteView)
router.register(r'pandda_event', PanddaEventView)
router.register(r'proasis_out', ProasisOutView)
router.register(r'fragspect', FragspectCrystalView)

# schema_view = get_swagger_view(title='Pipeline API')
Expand Down
18 changes: 16 additions & 2 deletions functions/db_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,22 @@ def transfer_table(translate_dict, filename, model):
# find relevant entries for foreign keys and set as value - crystal names and proteins

if key == 'crystal_name' and model != models.Crystal:
d[key] = models.Crystal.objects.get(crystal_name=d[key], visit=models.SoakdbFiles.objects.get(
filename=filename), compound=models.Compounds.objects.get_or_create(smiles=compound_smiles)[0])
# d[key] = models.Crystal.objects.get(crystal_name=d[key], visit=models.SoakdbFiles.objects.get(
# filename=filename), compound=models.Compounds.objects.get_or_create(smiles=compound_smiles)[0])
compound_obj, is_new = models.Compounds.objects.get_or_create(smiles=compound_smiles)
filter_set = models.Crystal.objects.filter(crystal_name=d[key],
visit=models.SoakdbFiles.objects.get(filename=filename),
compound=compound_obj)
if len(filter_set) == 0:
d[key] = models.Crystal.objects.get(crystal_name=d[key], visit=models.SoakdbFiles.objects.get(filename=filename))
d[key].compound = compound_obj
d[key].save()
elif len(filter_set) == 1:
d[key] = filter_set[0]
else:
print('Not sure how we got here, but more than two crystals with the same name for the same crystal')
raise Exception(f'More than 1 crystal in same visit! {d[key]} - {filename}')


if key == 'target':
d[key] = models.Target.objects.get_or_create(target_name=d[key])[0]
Expand Down
10 changes: 10 additions & 0 deletions functions/misc_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,13 @@ def antechamber_mol2(rd_mol, input, output):
out = out.decode('ascii')

return out


def get_filepath_of_potential_symlink(file):
try:
path = os.readlink(file)
except OSError:
path = file

return path

398 changes: 237 additions & 161 deletions luigi_classes/prepare_fragalysis.py

Large diffs are not rendered by default.

197 changes: 197 additions & 0 deletions luigi_classes/transfer_fragalysis_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import glob

from functions import misc_functions
from setup_django import setup_django

setup_django()

import datetime
import luigi
import re
import os

from xchem_db import models
from django.core.exceptions import ObjectDoesNotExist
from .config_classes import SoakDBConfig, DirectoriesConfig


class BatchTranslateFragalysisAPIOutput(luigi.Task):
resources = {'django': 1}
date = luigi.DateParameter(default=datetime.datetime.now())
hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory)
soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path)
date_time = luigi.Parameter(default=datetime.datetime.now().strftime("%Y%m%d%H"))
log_directory = luigi.Parameter(default=DirectoriesConfig().log_directory)
staging_directory = luigi.Parameter(default=DirectoriesConfig().staging_directory)
input_directory = luigi.Parameter(default=DirectoriesConfig().input_directory)

def requires(self):
# Honestly do not know how slow this is haha...
staging_folders = [x[0] for x in os.walk(self.staging_directory) if 'aligned' in x[0]]
folders_containing_mols = [x for x in staging_folders if len(glob.glob(os.path.join(x, '*.mol'))) > 0]
# Check Modification date to fire off!!
return [TranslateFragalysisAPIOutput(target=x) for x in folders_containing_mols if compare_mod_date(glob.glob(os.path.join(x, '*.mol'))[0])]
#return [TranslateFragalysisAPIOutput(target=x) for x in folders_containing_mols] # if compare_mod_date(glob.glob(os.path.join(x, '*.mol'))[0])]

def output(self):
return luigi.LocalTarget(os.path.join(DirectoriesConfig().log_directory,
str('Translation/BatchTranslate_' + str(self.date) + '.done')))

def run(self):
with self.output().open('w') as f:
f.write('')


class TranslateFragalysisAPIOutput(luigi.Task):
resources = {'django': 1}
date = luigi.DateParameter(default=datetime.datetime.now())
hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory)
soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path)
date_time = luigi.Parameter(default=datetime.datetime.now().strftime("%Y%m%d%H"))
log_directory = luigi.Parameter(default=DirectoriesConfig().log_directory)
staging_directory = luigi.Parameter(default=DirectoriesConfig().staging_directory)
input_directory = luigi.Parameter(default=DirectoriesConfig().input_directory)
target = luigi.Parameter()

def requires(self):
pass

def output(self):
return luigi.LocalTarget(os.path.join(DirectoriesConfig().log_directory,
str('Translation/Translate_' + str(
os.path.basename(self.target)) + '.done')))

def run(self):
split = self.target.split('/')
target_name = split[split.index('aligned') - 1]
# Do each file one at a time!
# Ensure to only run if data is updated???
Translate_Files(fragment_abs_dirname=self.target,
target_name=target_name,
staging_directory=os.path.join(self.staging_directory, target_name),
input_directory=os.path.join(self.input_directory, target_name)
)
with self.output().open('w') as f:
f.write('')


def compare_mod_date(molfile):
new_date = misc_functions.get_mod_date(molfile)
if new_date is 'None':
# Cannot resolve mod date, do not process!
return False

ligand_name = os.path.basename(molfile).replace('.mol', '')
target = ligand_name.rsplit('_', 1)[0]
try:
frag_target = models.FragalysisTarget.objects.get(target=target)
except models.FragalysisTarget.DoesNotExist:
print(f'{target} is a new Fragalysis Target')
return True

try:
frag_ligand = models.FragalysisLigand.objects.get(ligand=ligand_name, fragalysis_target=frag_target)
except models.FragalysisLigand.DoesNotExist:
print(f'{ligand_name} is a new Ligand for {target}')
return True

old_date = frag_ligand.modification_date
return int(new_date) > int(old_date)

def Translate_Files(fragment_abs_dirname, target_name, staging_directory, input_directory):
'''
fragment_abs_dirname = folder path for particular fragalysis Entry
target_name = Name of the Crystal system: e.g. 70X or Mpro
'''
# Should be target_name_[0-9]{1}[A-Z]{1}
ligand_name = os.path.basename(fragment_abs_dirname)

# Should be /staging_directory/target_name/aligned/
# target = os.path.dirname(fragment_abs_dirname)

# Should be prefix of ligand_name e.g. 70x-x0001_0A would be 70x-x0001
crystal_name = ligand_name.rsplit('_', 1)[0]

# Get or Create FragTarget
try:
frag_target = models.FragalysisTarget.objects.get(target=target_name)
except models.FragalysisTarget.DoesNotExist:
frag_target = models.FragalysisTarget.objects.create(
open=True,
target=target_name,
staging_root=staging_directory,
input_root=input_directory
)
frag_target.save()

mod_date = misc_functions.get_mod_date(os.path.join(fragment_abs_dirname, f'{ligand_name}.mol'))
if mod_date is 'None':
mod_date = 0
# Frag Target information is edited post-pipeline?
# Should test all paths to makesure they exist otherwise set to None?
ligand_props = {
'ligand_name': ligand_name,
'fragalysis_target': frag_target,
'crystallographic_bound': os.path.join(fragment_abs_dirname, f'{ligand_name}_bound.pdb'),
'lig_mol_file': os.path.join(fragment_abs_dirname, f'{ligand_name}.mol'),
'apo_pdb': os.path.join(fragment_abs_dirname, f'{ligand_name}_apo.pdb'),
'bound_pdb': os.path.join(fragment_abs_dirname, f'{ligand_name}.pdb'),
'smiles_file': os.path.join(fragment_abs_dirname, f'{ligand_name}_smiles.txt'),
'desolvated_pdb': os.path.join(fragment_abs_dirname, f'{ligand_name}_apo-desolv.pdb'),
'solvated_pdb': os.path.join(fragment_abs_dirname, f'{ligand_name}_apo-solv.pdb'),
'pandda_event': os.path.join(fragment_abs_dirname, f'{ligand_name}_event_0.ccp4'),
'two_fofc': os.path.join(fragment_abs_dirname, f'{ligand_name}_2fofc.map'),
'fofc': os.path.join(fragment_abs_dirname, f'{ligand_name}_fofc.map'),
'modification_date': int(mod_date)
}
try:
frag_ligand = models.FragalysisLigand.objects.get(ligand_name=ligand_name,
fragalysis_target=frag_target)
for key, value in ligand_props.items():
print(key)
print(value)
setattr(frag_ligand, key, value)

frag_ligand.save()
except models.FragalysisLigand.DoesNotExist:
print('Creating Fragalysis Ligand')
print(ligand_props)
frag_ligand = models.FragalysisLigand.objects.create(**ligand_props) # Does this EVEN work?
frag_ligand.save()

# Bonza, now link frag_ligand to ligand table for internal stuff.
symlink = os.path.join(input_directory, f'{crystal_name}.pdb')
try:
path = os.readlink(symlink)
except OSError:
# Exit out
print('Ligand is directly deposited into input directory, no known reference crystal')
return None
visit = re.findall('[a-z]{2}[0-9]{5}-[0-9]*', path)[0]
crys = models.Crystal.objects.filter(crystal_name=crystal_name).filter(visit__visit=visit) # This should only return one thing...
if len(crys) > 1:
try:
raise Exception(ligand_name, symlink, crystal_name, visit, crys)
except Exception as e:
bad_ligname, bad_symlink, bad_crystal_name, bad_visit, bad_crys = e.args
print(bad_ligname)
print(bad_symlink)
print(bad_crystal_name)
print(bad_visit)
print(bad_crys)
print(bad_crys.values())

elif len(crys) == 1:
crystal = crys[0]
ligand_entry, created = models.Ligand.objects.get_or_create(
fragalysis_ligand=frag_ligand,
crystal=crystal,
target=crystal.target,
compound=crystal.compound
)
if created:
print('Created New Ligand Entry!')
else:
print(f'No base Crystal entry for {ligand_name}, will not be linked')

2 changes: 1 addition & 1 deletion overview/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from functions.db_functions import check_file_status
from functions.misc_functions import get_mod_date
from xchem_db.models import Target, Crystal, Refinement, SoakdbFiles, PanddaEvent, ProasisHits, ProasisOut
from xchem_db.models import Target, Crystal, Refinement, SoakdbFiles, PanddaEvent


def targets(request):
Expand Down
24 changes: 14 additions & 10 deletions start_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# from luigi_classes.transfer_proasis import InitDBEntries, UploadLeads, WriteBlackLists, UploadHits, AddProjects
# from luigi_classes.pull_proasis import GetOutFiles
from luigi_classes.transfer_soakdb import StartTransfers
from luigi_classes.prepare_fragalysis import BatchCreateSymbolicLinks, BatchAlignTargets
from luigi_classes.prepare_fragalysis import BatchCreateSymbolicLinks, BatchAlignTargets, BatchCutMaps
# from luigi_classes.transfer_verne import UpdateVerne
from luigi_classes.config_classes import SentryConfig, SoakDBConfig, DirectoriesConfig

from luigi_classes.transfer_fragalysis_api import BatchTranslateFragalysisAPIOutput
import os
import datetime
import glob
Expand Down Expand Up @@ -54,6 +54,8 @@ def requires(self):
yield StartTransfers()
yield BatchCreateSymbolicLinks()
yield BatchAlignTargets()
yield BatchCutMaps()
yield BatchTranslateFragalysisAPIOutput()
# yield fragalysis Stuff?
# yield AddProjects()
# yield TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath)
Expand Down Expand Up @@ -91,15 +93,17 @@ def output(self):
f'pipe_run_{datetime.datetime.now().strftime("%Y%m%d%H%M")}.done'))

def run(self):
paths = [# TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path,
# AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path,
# InitDBEntries(date=self.date, hit_directory=self.hit_directory).output().path,
# UploadLeads(date=self.date, hit_directory=self.hit_directory).output().path,
# UploadHits(date=self.date, hit_directory=self.hit_directory).output().path,
# WriteBlackLists(date=self.date, hit_directory=self.hit_directory).output().path,
os.path.join(self.log_directory, 'pipe.done')]

# paths = [# TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path,
# AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path,
# InitDBEntries(date=self.date, hit_directory=self.hit_directory).output().path,
# UploadLeads(date=self.date, hit_directory=self.hit_directory).output().path,
# UploadHits(date=self.date, hit_directory=self.hit_directory).output().path,
# WriteBlackLists(date=self.date, hit_directory=self.hit_directory).output().path,
# os.path.join(self.log_directory, 'pipe.done')]
paths = [x for x in glob.glob(os.path.join(self.log_directory, '*', '*')) if 'done' in x]
paths.extend(os.path.join(self.log_directory, 'pipe.done'))
paths.extend(glob.glob(str(self.log_directory + '*pipe_run_*.done')))
paths = [x for x in paths if 'cut' not in x] # I don't think I want to constantly try to cut the maps... May delete later.

for path in paths:
if os.path.isfile(path):
Expand Down
Loading

0 comments on commit 220ebc3

Please sign in to comment.