Skip to content

Commit

Permalink
handle incompatible CIF + better reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
tdudgeon committed Nov 24, 2023
1 parent 791726b commit 5807064
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 24 deletions.
2 changes: 1 addition & 1 deletion DEV-GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Make sure you create the venv using Python 3.10 (or later).
From your clean virtual environment you can now install the run-time and development
dependencies like this: -

pip install .[dev]
pip install -e .[dev]

The project also relies on CI that is run in GitHub using the actions defined
in the files you'll find in the `.github/workflows` directory.
Expand Down
28 changes: 19 additions & 9 deletions src/xchemalign/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import argparse
import os
import logging
import traceback
import shutil
from pathlib import Path

Expand Down Expand Up @@ -101,6 +101,7 @@ def _get_xmap_path_or_none(output_path, binding_event):
else:
return None


def get_datasets_from_crystals(crystals, output_path):
# dataset_ids = [DatasetID(dtag=dtag) for dtag in crystals]
# paths to files will be defined like this: upload_1/crystallographic_files/8dz1/8dz1.pdb
Expand Down Expand Up @@ -724,14 +725,23 @@ def _extract_components(self, crystals, aligner_meta):
pdbxtal.apo_desolv_file.relative_to(self.base_dir)
)
if cif_file:
pdbxtal.create_ligands(k2, k3, str(self.base_dir / cif_file))
v4[Constants.META_LIGAND_MOL] = (
str(pdbxtal.ligand_base_file.relative_to(self.base_dir)) + '.mol'
)
v4[Constants.META_LIGAND_PDB] = (
str(pdbxtal.ligand_base_file.relative_to(self.base_dir)) + '.pdb'
)
v4[Constants.META_LIGAND_SMILES] = pdbxtal.smiles
try:
pdbxtal.create_ligands(k2, k3, str(self.base_dir / cif_file))
v4[Constants.META_LIGAND_MOL] = (
str(pdbxtal.ligand_base_file.relative_to(self.base_dir)) + '.mol'
)
v4[Constants.META_LIGAND_PDB] = (
str(pdbxtal.ligand_base_file.relative_to(self.base_dir)) + '.pdb'
)
v4[Constants.META_LIGAND_SMILES] = pdbxtal.smiles
except:
num_errors += 1
self.logger.warn(
"failed to create ligand for",
k1,
"Check that ligand in PDB and CIF files are compatible ",
)
traceback.print_exc()

return num_errors

Expand Down
20 changes: 6 additions & 14 deletions src/xchemalign/collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,6 @@ def _copy_files(self, meta):
elif type == Constants.CONFIG_TYPE_MODEL_BUILDING:
self.logger.warn("CIF entry missing for {}".format(xtal_name))


# Handle histroical ligand binding events (in particular pull up their event map SHA256s for comparing)
hist_event_maps = {}
for ligand_binding_data in historical_xtal_data.get(Constants.META_BINDING_EVENT, []):
Expand All @@ -637,11 +636,9 @@ def _copy_files(self, meta):
if path:
digest = utils.gen_sha256(path)
ccp4_output = (
cryst_path / xtal_name / "{}_{}_{}_{}.ccp4".format(
xtal_name,
ligand_key[0],
ligand_key[1],
ligand_key[2])
cryst_path
/ xtal_name
/ "{}_{}_{}_{}.ccp4".format(xtal_name, ligand_key[0], ligand_key[1], ligand_key[2])
)
attested_ligand_events[ligand_key] = (
path,
Expand Down Expand Up @@ -680,7 +677,6 @@ def _copy_files(self, meta):
)
forbidden_unattested_ligand_events[xtal_name] = ligand_key


else:
self.logger.error("PDB entry missing for {}".format(xtal_name))
return meta
Expand Down Expand Up @@ -749,7 +745,6 @@ def _copy_files(self, meta):
# Mark that copying failed
unsucessfully_copied_event_maps[ligand_key] = True


# Create ligand binding events for the dataset
ligand_binding_events = []
for ligand_key in dataset_ligands:
Expand All @@ -767,7 +762,7 @@ def _copy_files(self, meta):
Constants.META_PROT_RES: ligand_key[2],
Constants.META_PROT_INDEX: attested_ligand_event_data[4],
Constants.META_PROT_BDC: attested_ligand_event_data[5],
}
}
# Add binding events for permitted ligands without an event map
elif ligand_key in unattested_ligand_events:
data = {
Expand All @@ -787,7 +782,6 @@ def _copy_files(self, meta):
# Add data on the ligand binding events to the new dataset to add
data_to_add[Constants.META_BINDING_EVENT] = ligand_binding_events


new_xtal_data = {}
for k, v in historical_xtal_data.items():
new_xtal_data[k] = v
Expand All @@ -803,7 +797,7 @@ def _copy_files(self, meta):
)
for dtag, ligand_key in forbidden_unattested_ligand_events.items():
lk = ligand_key
exception = exception + f"{dtag} : Model: {lk[0]}; Chain: {lk[1]}; Residue: {lk[2]}"
exception = exception + f"{dtag} : Model: {lk[0]}; Chain: {lk[1]}; Residue: {lk[2]}\n"
raise Exception(exception)

return meta
Expand Down Expand Up @@ -1002,9 +996,8 @@ def get_closest_event_map(
else:
return None, None


def get_dataset_event_maps(
self, xtal_name: str, ligand_coords: dict[(str,str,int), np.array], event_tables: dict[Path, pd.DataFrame]
self, xtal_name: str, ligand_coords: dict[(str, str, int), np.array], event_tables: dict[Path, pd.DataFrame]
) -> dict[tuple[str, str, str], Path]:
# Get the relevant structure

Expand All @@ -1018,7 +1011,6 @@ def get_dataset_event_maps(
if closest_event_map:
closest_event_maps[ligand_key] = (closest_event_map, data[0], data[1])


return closest_event_maps


Expand Down

0 comments on commit 5807064

Please sign in to comment.