Skip to content

Commit

Permalink
general fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tdudgeon committed Nov 16, 2023
1 parent 6459ec4 commit f84f389
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 34 deletions.
6 changes: 4 additions & 2 deletions src/xchemalign/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import argparse
import os
import logging
import shutil
from pathlib import Path

Expand Down Expand Up @@ -217,7 +218,7 @@ def run(self):
def _write_output(self, collator_dict, aligner_dict):
# keep a copy of the xtaforms and assemblies configs
self._copy_file_to_version_dir(self.xtalforms_file)
# self._copy_file_to_version_dir(self.assemblies_file)
self._copy_file_to_version_dir(self.assemblies_file)

collator_dict[Constants.META_XTALFORMS] = aligner_dict[Constants.META_XTALFORMS]
collator_dict[Constants.META_CONFORMER_SITES] = aligner_dict[Constants.META_CONFORMER_SITES]
Expand Down Expand Up @@ -544,8 +545,10 @@ def _perform_alignments(self, meta):

new_meta[Constants.META_XTALS] = {}
for dtag, crystal in crystals.items():
self.logger.info('looking at', dtag)
# Skip if no output for this dataset
if dtag not in fs_model.alignments:
self.logger.warn('skipping {} as not in alignments'.format(dtag))
continue

new_meta[Constants.META_XTALS][dtag] = {}
Expand Down Expand Up @@ -574,7 +577,6 @@ def _perform_alignments(self, meta):
Constants.META_AIGNED_EVENT_MAP: aligned_event_map_path,
Constants.META_AIGNED_X_MAP: aligned_xmap_path,
Constants.META_AIGNED_DIFF_MAP: aligned_diff_map_path,

}

## Add the reference alignments
Expand Down
18 changes: 11 additions & 7 deletions src/xchemalign/collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,14 @@ def __init__(self, config_file, logger=None):
self.inputs = []
inputs = utils.find_property(config, Constants.CONFIG_INPUTS)
self.logger.info("found {} inputs".format(len(inputs)))

if inputs:
for input in inputs:
# Determine which datasets to exclude
excluded_datasets = utils.find_property(input, Constants.CONFIG_EXCLUDE)
if not excluded_datasets:
excluded_datasets = []

input_path = utils.find_path(input, Constants.CONFIG_DIR)
type = utils.find_property(input, Constants.CONFIG_TYPE)
if type == Constants.CONFIG_TYPE_MODEL_BUILDING:
Expand All @@ -165,11 +171,6 @@ def __init__(self, config_file, logger=None):
else:
panddas_paths = []

# Determine which datasets to exclude
excluded_datasets = utils.find_property(input, Constants.CONFIG_EXCLUDE)
if not excluded_datasets:
excluded_datasets = []

self.logger.info("adding input", input_path)
self.inputs.append(
Input(
Expand Down Expand Up @@ -342,7 +343,10 @@ def _validate_soakdb_input(self, input, crystals):
else:
expanded_files.append(None)
missing_files += 1
self._log_warning("PDB file for {} not found: {}".format(xtal_name, full_inputpath))
self._log_warning(
"PDB file for {} not found: {}. Skipping entry".format(xtal_name, full_inputpath)
)
continue

# if we have a PDB file then continue to look for the others
colname = Constants.SOAKDB_COL_MTZ
Expand Down Expand Up @@ -852,7 +856,7 @@ def _write_metadata(self, meta, all_xtals, new_xtals):
def _copy_config(self):
f = shutil.copy2(self.config_file, self.output_path / self.version_dir / 'config.yaml')
if not f:
print("Failed to copy config file to {}".format((self.output_path / self.version_dir)))
self.logger.warn("Failed to copy config file to {}".format((self.output_path / self.version_dir)))
return False
return True

Expand Down
15 changes: 15 additions & 0 deletions src/xchemalign/copier.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ def validate(self):

return len(self.errors), len(self.warnings)

def check_path(self, path, expected_path):
try:
relp = path.relative_to(expected_path)
return True
except ValueError as ve:
self.logger.warn('unexpected path for file:', path)
return False

def copy_files(self):
if self.base_path and self.input_path.is_absolute():
self.logger.warn("INFO: making input_path relative as a base_path is specified")
Expand All @@ -160,12 +168,19 @@ def copy_files(self):
for index, row in df.iterrows():
count += 1
xtal_name = row["CrystalName"]
status_str = str(row[Constants.SOAKDB_COL_REFINEMENT_OUTCOME])
if status_str.startswith("7"):
self.logger.info("ignoring {} as status is 7".format(xtal_name))
continue

xtal_dir_path = collator.generate_xtal_dir(self.input_path, xtal_name)
self.logger.info("processing {} {}".format(count, xtal_name))
expected_path = self.base_path / self.input_path / Constants.DEFAULT_MODEL_BUILDING_DIR

file = row["RefinementPDB_latest"]
if file:
path = Path(file)
self.check_path(path, expected_path)
ok = self.copy_file(path, xtal_dir_path)
if ok:
num_files += 1
Expand Down
51 changes: 26 additions & 25 deletions src/xchemalign/pdb_xtal.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,31 +226,32 @@ def create_ligands(self, chain: str, res_id, cif_file: str):

return mol

def extract_sequences(self):
if not self.apo_desolv_file:
self.create_apo_solv_desolv()
pdb_file = open(self.output_dir / (self.filebase + "_apo-desolv.pdb"), "rt")
lines = pdb_file.readlines()
curr_chain = None
curr_resno = 0
curr_seq = None
seqs = []
for line in lines:
if line.startswith('ATOM'):
alt = line[16].strip()
chain = line[21].strip()
code = line[17:20].strip()
resno = int(line[22:26].strip())
if chain != curr_chain:
curr_chain = chain
curr_seq = ProteinSeq(chain, [], start=int(resno))
seqs.append(curr_seq)
if resno != curr_resno:
for i in range(resno - curr_resno - 1):
curr_seq.seq.append('UNK')
curr_resno = resno
curr_seq.seq.append(code)
return seqs
def extract_sequences(self, pdb_file=None):
if not pdb_file:
pdb_file = self.pdbfile

with open(pdb_file, "rt") as pdb:
lines = pdb.readlines()
curr_chain = None
curr_resno = 0
curr_seq = None
seqs = []
for line in lines:
if line.startswith('ATOM'):
alt = line[16].strip()
chain = line[21].strip()
code = line[17:20].strip()
resno = int(line[22:26].strip())
if chain != curr_chain:
curr_chain = chain
curr_seq = ProteinSeq(chain, [], start=int(resno))
seqs.append(curr_seq)
if resno != curr_resno:
for i in range(resno - curr_resno - 1):
curr_seq.seq.append('UNK')
curr_resno = resno
curr_seq.seq.append(code)
return seqs


class ProteinSeq:
Expand Down

0 comments on commit f84f389

Please sign in to comment.