diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b84feb7..409b61b 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.3 +current_version = 0.3.0 commit = True tag = True diff --git a/.gitignore b/.gitignore index 71e6852..b2e00f4 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,20 @@ target/ # pyenv python configuration file .python-version + +# Jupyter Notebook +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# swap +.sw[a-p] +.*.sw[a-p] +# session +Session.vim +# temporary +.netrwhist +*~ +tags + +local/ + diff --git a/privileged_residues/__init__.py b/privileged_residues/__init__.py index 0a24a38..f2ea943 100644 --- a/privileged_residues/__init__.py +++ b/privileged_residues/__init__.py @@ -2,7 +2,7 @@ """Top-level package for Privileged Residues.""" from .privileged_residues import * -from . import hbond_ray_pairs, position_residue, process_networks +from . import bidentify, hbond_ray_pairs, position_residue, process_bidentates, process_networks __author__ = """Brian D. Weitzner""" __email__ = 'weitzner@uw.edu' diff --git a/privileged_residues/bidentify.py b/privileged_residues/bidentify.py new file mode 100644 index 0000000..c7efb44 --- /dev/null +++ b/privileged_residues/bidentify.py @@ -0,0 +1,204 @@ +import os +import numpy as np +import itertools + +# The following packages are not pip-installable +# The import calls are wrapped in a try/except block +try: + import pyrosetta + from pyrosetta.rosetta.core.id import AtomID +except ImportError: + print('Module "pyrosetta" not found in the current environment! ' + 'Go to http://www.pyrosetta.org to download it.') + pass + + +from . import hbond_ray_pairs +from . import position_residue as pr +from . import process_networks as pn + +# the object of this submodule is to scan over all surface residues and +# identify pairs of rays to look up in each hash table +def look_up_interactions(pairs_of_rays, ht, cart_resl, ori_resl, cart_bound): + """ + """ + hits = [] + for r1, r2 in pairs_of_rays: + hashed_rays = int(hbond_ray_pairs.hash_rays(r1, r2)) + try: + positioning_info = ht[hashed_rays] + print(positioning_info) + except KeyError: + continue + + ray_frame = hbond_ray_pairs.get_frame_for_rays(r1, r2) + # now positioning_info is a set of possible values + fxnl_grps = list(pn.fxnl_groups.keys()) + + for pos_info in positioning_info: + xform = None + try: + xform = pr.get_ht_from_table(pos_info[1], + cart_resl, + ori_resl, + cart_bound) + except: + continue + + rsd = fxnl_grps[pos_info[0]] + pos_grp = pn.fxnl_groups[rsd] + + p = pyrosetta.Pose() + pyrosetta.make_pose_from_sequence(p, 'Z[{}]'.format(rsd), 'fa_standard') + coords = [np.array([*p.residues[1].xyz(atom)]) for atom in pos_grp.atoms] + c = np.stack(coords) + + pos_frame = hbond_ray_pairs.get_frame_for_coords(c) + xf = np.dot(np.dot(ray_frame, xform), np.linalg.inv(pos_frame)) + pr.transform_pose(p, xf) + hits.append(p) + return hits + + +def look_for_sc_bb_bidentates(p): + """ + """ + + pairs_of_rays = [] + for i in range(1, len(p.residues) + 1): + # look at the amino and carboxyl rays, swap the order and look ahead + # and back to see if the previous residue's carboxyl or the next + # residue's amino ray can be used to place a bidentate interaction + + + rsd = p.residues[i] + + # ray reference atoms in backbone + H = rsd.attached_H_begin(rsd.atom_index("N")) + N = rsd.atom_base(H) + O = rsd.atom_index("O") + C = rsd.atom_base(O) + + n_ray = hbond_ray_pairs.create_ray(rsd.xyz(H), rsd.xyz(N)) + c_ray = hbond_ray_pairs.create_ray(rsd.xyz(O), rsd.xyz(C)) + + pairs_of_rays += [(n_ray, c_ray), (c_ray, n_ray)] + + if i != 1: + prev_rsd = p.residues[i - 1] + + O = prev_rsd.atom_index("O") + C = prev_rsd.first_adjacent_heavy_atom(O) + + prev_c = hbond_ray_pairs.create_ray(prev_rsd.xyz(O), + prev_rsd.xyz(C)) + pairs_of_rays += [(n_ray, prev_c), (prev_c, n_ray)] + + if i != len(p.residues): + next_rsd = p.residues[i + 1] + H = next_rsd.attached_H_begin(rsd.atom_index("N")) + N = next_rsd.first_adjacent_heavy_atom(H) + next_n = hbond_ray_pairs.create_ray(next_rsd.xyz(H), + next_rsd.xyz(N)) + pairs_of_rays += [(next_n, c_ray), (c_ray, next_n)] + return pairs_of_rays + +def look_for_sc_scbb_bidentates(p): + """ + """ + + pairs_of_rays = [] + for rsd in p.residues: + H = rsd.attached_H_begin(rsd.atom_index("N")) + N = rsd.atom_base(H) + O = rsd.atom_index("O") + C = rsd.atom_base(O) + + n_ray = hbond_ray_pairs.create_ray(rsd.xyz(H), rsd.xyz(N)) + c_ray = hbond_ray_pairs.create_ray(rsd.xyz(O), rsd.xyz(C)) + + for i in rsd.Hpos_polar_sc(): + ray = hbond_ray_pairs.create_ray(rsd.xyz(i), rsd.xyz(rsd.atom_base(i))) + + pairs_of_rays += [(n_ray, ray), (ray, n_ray)] + pairs_of_rays += [(c_ray, ray), (ray, c_ray)] + + for i in rsd.accpt_pos_sc(): + ray = hbond_ray_pairs.create_ray(rsd.xyz(i), rsd.xyz(rsd.atom_base(i))) + + pairs_of_rays += [(n_ray, ray), (ray, n_ray)] + pairs_of_rays += [(c_ray, ray), (ray, c_ray)] + + return pairs_of_rays + +def look_for_sc_sc_bidentates(p): + """ + """ + + pairs_of_rays = [] + for rsd in p.residues: + for (i, j) in itertools.permutations(list(rsd.Hpos_polar_sc()) + list(rsd.accpt_pos_sc()), 2): + if i == j: + continue + + first = hbond_ray_pairs.create_ray(rsd.xyz(i), rsd.xyz(rsd.atom_base(i))) + second = hbond_ray_pairs.create_ray(rsd.xyz(j), rsd.xyz(rsd.atom_base(j))) + + pairs_of_rays += [(first, second)] + + return pairs_of_rays + +def look_up_connected_network(p): + """ + """ + + pairs_of_rays = [] + + hbondset = hbond_ray_pairs.identify_bonded_pairs(p, hbond_ray_pairs.find_hbonds(p)) + + for hbond in hbondset.hbonds(): + don_res = p.residue(hbond.don_res()) + acc_res = p.residue(hbond.acc_res()) + + don_list = list(don_res.Hpos_polar_sc()) + list(don_res.accpt_pos_sc()) + acc_list = list(acc_res.Hpos_polar_sc()) + list(acc_res.accpt_pos_sc()) + + for (j, k) in itertools.product(don_list, acc_list): + if (j == hbond.don_hatm() or k == hbond.acc_atm()): + continue + + print(*((y, x.atom_name(x.atom_base(y))) for (x, y) in zip([don_res, acc_res], [j, k]))) + + first = hbond_ray_pairs.create_ray(don_res.xyz(j), don_res.xyz(don_res.atom_base(j))) + second = hbond_ray_pairs.create_ray(acc_res.xyz(k), acc_res.xyz(acc_res.atom_base(k))) + + pairs_of_rays += [(first, second)] + pairs_of_rays += [(second, first)] + + return pairs_of_rays + +def find_hashable_positions(p, ht): + # look up each type of privileged interaction + # + hits = [] + pairs_of_rays = [] + + # networks: + # iterate over pairs of residues with side chains that are + # already hydrogen bonded to one another + # Sc_Sc bidentates: + # iteratate over every residue, throw out non-bidentate capable + # residues + pairs_of_rays += look_for_sc_sc_bidentates(p) + # Sc_ScBb bidentates: + # iterate over pairs of neighboring residues on the surface + pairs_of_rays += look_for_sc_scbb_bidentates(p) + # Sc_Bb bidentates: + # iterate over pairs of neighboring residues on the surface + pairs_of_rays += look_for_sc_bb_bidentates(p) + # hits += look_up_interactions(pairs_of_rays, ht) + pairs_of_rays += look_up_connected_network(p) + + # tmp + return pairs_of_rays + diff --git a/privileged_residues/hbond_ray_pairs.py b/privileged_residues/hbond_ray_pairs.py index 67e466f..08c28a9 100644 --- a/privileged_residues/hbond_ray_pairs.py +++ b/privileged_residues/hbond_ray_pairs.py @@ -135,9 +135,9 @@ def create_ray(center, base): 2. The point at which the ray is centered is at `center`. Args: - center (numpy.array): A (2, 3) array representing the + center (numpy.array): A (1, 3) array representing the coordinate at which to center the resulting ray. - base (numpy.array): A (2, 3) array representing the base used + base (numpy.array): A (1, 3) array representing the base used to determine the direction of the resulting ray. Returns: @@ -147,12 +147,12 @@ def create_ray(center, base): direction = center - base direction /= np.linalg.norm(direction) - ray = np.empty((2, 4)) - ray[0][:-1] = center - ray[0][-1] = 1. # point! + ray = np.empty((4, 2)) + ray[:-1, 0] = center + ray[-1, 0] = 1. # point! - ray[1][:-1] = direction - ray[1][-1] = 0. # direction! + ray[:-1, 1] = direction + ray[-1, 1] = 0. # point! return ray @@ -256,9 +256,9 @@ def hash_rays(r1, r2, resl=2, lever=10): `r1` and `r2` must be the same shape. Args: - r1 (numpy.array): An (N, 2, 4) array representing N rays in + r1 (numpy.array): An (N, 4, 2) array representing N rays in space each with a point and a unit direction. - r2 (numpy.array): An (N, 2, 4) array representing N rays in + r2 (numpy.array): An (N, 4, 2) array representing N rays in space each with a point and a unit direction. Returns: @@ -298,7 +298,7 @@ def get_frame_for_coords(coords): assert(coords.shape == (3, 3)) - frame = np.empty((4, 4)) + frame = np.zeros((4, 4)) frame[:3, 3] = coords[2] frame[3, 3] = 1. @@ -309,7 +309,7 @@ def get_frame_for_coords(coords): y_hat = coords[0] - coords[1] y_hat -= np.dot(y_hat, z_hat) * z_hat y_hat /= np.linalg.norm(y_hat) - assert_allclose(np.dot(y_hat, z_hat), 0.) + assert_allclose(np.dot(y_hat, z_hat), 0., atol=1E-10) frame[:3, 1] = y_hat @@ -332,20 +332,20 @@ def get_frame_for_rays(r1, r2): the point of r2 to lie in the xy-plane. Args: - r1 (numpy.array): A (2, 4) array representing a ray in space + r1 (numpy.array): A (4, 2) array representing a ray in space with a point and a unit direction. - r2 (numpy.array): A (2, 4) array representing a ray in space + r2 (numpy.array): A (4, 2) array representing a ray in space with a point and a unit direction. Returns: numpy.array: A (4, 4) array representing the coordinate frame as a homogenous transform. """ - trans = r1[0] - x_hat = r1[1] + trans = r1[:, 0] + x_hat = r1[:, 1] assert_allclose(np.linalg.norm(x_hat), 1.) # make sure this is unity - xy_pojnt = r2[0] - trans + xy_pojnt = r2[:, 0] - trans y_component = xy_pojnt - (np.dot(xy_pojnt, x_hat) * x_hat) y_hat = y_component / np.linalg.norm(y_component) diff --git a/privileged_residues/position_residue.py b/privileged_residues/position_residue.py index c7749c0..37e63e5 100644 --- a/privileged_residues/position_residue.py +++ b/privileged_residues/position_residue.py @@ -84,8 +84,8 @@ def get_ht_from_table(bin, cart_resl, ori_resl, cart_bound): return xh.get_center([bin])['raw'].squeeze() -def tranform_pose(p, xform): - """Tranform the atomic coordinates of Pose by a specified +def transform_pose(p, xform): + """Transform the atomic coordinates of Pose by a specified homogenous tranform. Args: @@ -96,8 +96,7 @@ def tranform_pose(p, xform): coords = [] for i in range(1, p.size() + 1): for j in range(1, p.residues[i].natoms() + 1): - c = np.ones(4) - c[:3] = np.array([*p.residues[i].xyz(j)]) + c = np.array([*p.residues[i].xyz(j)] + [1]) coords.append(c) new_coords = np.dot(xform, np.stack(coords).T) @@ -109,5 +108,6 @@ def tranform_pose(p, xform): p.residues[i].atom(j).xyz(xyzVec(x, y, z)) tot_atoms += 1 + if __name__ == '__main__': print('Don\'t execute me, bruh.') diff --git a/privileged_residues/privileged_residues.py b/privileged_residues/privileged_residues.py index 22c1be2..93c2d57 100644 --- a/privileged_residues/privileged_residues.py +++ b/privileged_residues/privileged_residues.py @@ -19,6 +19,25 @@ logging.basicConfig(level=logging.WARN) _logging_handler = 'interactive' +_PYROSETTA_INIT = False + + +def _init_pyrosetta(): + # initialize pyrosetta + assert(HAVE_PYROSETTA) + global _PYROSETTA_INIT + if _PYROSETTA_INIT: + return + + from . import process_networks as pn + params_files_list = [path.join(_dir, t.resName) + '.params' for _, t in + pn.fxnl_groups.items()] + opts = ['-ignore_waters false', '-mute core', + '-extra_res_fa {}'.format(' '.join(params_files_list))] + pyrosetta.init(extra_options=' '.join(opts), + set_logging_handler=_logging_handler) + _PYROSETTA_INIT = True + def hash_ray_pairs_from_pdb_file(argv): """ @@ -45,56 +64,122 @@ def hash_ray_pairs_from_pdb_file(argv): hrp.hash_rays(np.stack(donors), np.stack(acceptors)) -def hash_networks_and_write_to_file(fname, out_dir, cart_resl=.1, ori_resl=2., - cart_bound=16.): + +def hash_all_network_files_in_directory(root_dir, out_dir='hash_tables', + fname='functional_stubs.pdb', + cart_resl=.1, ori_resl=2., + cart_bound=16.): """ Args: fname (str): out_dir (str): + fname (str): A pattern for network files in the subdirectories + of root_dir. cart_resl (float): The cartesian resolution of the BCC lattice. Defaults to 0.1. ori_resl (float): The orientational resolution of the BCC lattice. In general, ori_resl should be roughly 20 * cart_resl. Defaults to 2.0. cart_bound (float): The bounds of the lattice. Defaults to 16. + """ - import numpy as np - import pickle - from os import path, makedirs - from . import process_networks as pn - # TODO: use argparse to set these variables - # TODO: adapt this script so all arrangements are searched in a single - # execution. This will ensure that the groupings are appropriate - fname = 'arrangements/imidazole_carboxylate_guanidinium/functional_stubs.pdb' - out_dir = 'hash_tables' + from os import path, makedirs, walk + + _init_pyrosetta() + + # get a list of the files that need to be processed + arrangement_files = [] + output_file_base_names = [] + for directory, sub_dirs, files in walk(path.abspath(root_dir)): + if fname in files: + _, arr = path.split(directory) + arrangement_files.append(path.join(directory, fname)) + output_file_base_names.append(path.join(out_dir), + path.basename(path.normpath(directory))) - assert(path.isfile(fname)) if not path.exists(out_dir): makedirs(out_dir) - params_files_list = [path.join(_dir, t.resName) + '.params' for _, t in - pn.fxnl_groups.items()] + # iterate over all of the files and make a giant ndarry containing all + # of the information. This thing is going to take up some serious memory. + for ntwrk_file in arrangement_files: + hash_networks_and_write_to_file(fname, out_dir, cart_resl, ori_resl, + cart_bound) - opts = ['-ignore_waters false', '-mute core', - '-extra_res_fa {}'.format(' '.join(params_files_list))] - pyrosetta.init(extra_options=' '.join(opts), - set_logging_handler=_logging_handler) +def _hash_from_file(fname, out_name_base, cart_resl, ori_resl, cart_bound, mod): + import numpy as np + import pickle + from . import process_networks as pn + + _init_pyrosetta() hash_types = [] - for pose in pn.poses_for_all_models(fname): - hash_types.extend(pn.find_all_relevant_hbonds_for_pose(pose)) + print(fname) + for i, pose in enumerate(pn.poses_for_all_models(fname)): + if not i % 100: + print('Pose ' + str(i)) + hash_types.extend(mod.find_all_relevant_hbonds_for_pose(pose)) + if hash_types == []: + return + # hash all of the processed infromation ht = pn.hash_full(np.stack(hash_types), cart_resl, ori_resl, cart_bound) - for i, interaction_type in enumerate(pn.interaction_types): + # split the table into smaller tables based on interaction types + # and write to disk as a pickled dictionary. Keys are the hashed ray pairs, + # values are a list of (id, bin)s + for i, interaction_type in enumerate(mod.interaction_types): t = ht[np.isin(ht['it'], [i])] - out_fname = '_'.join([interaction_type, str(cart_resl), str(ori_resl), - str(cart_bound)]) + '.pkl' + out_fname = out_name_base + '_' + '_'.join([interaction_type, + str(cart_resl), + str(ori_resl), + str(cart_bound)]) + '.pkl' - with open(path.join(out_dir, out_fname), 'wb') as f: - pickle.dump({k: v for k, v in zip(t['key'], - t[['id', 'hashed_ht']])}, f) + with open(out_fname, 'wb') as f: + fdata = {} + for k, v in zip(t['key'], t[['id', 'hashed_ht']]): + try: + fdata[k].add(tuple(v)) + except KeyError: + fdata[k] = {tuple(v)} + pickle.dump(fdata, f) + + +def hash_networks_and_write_to_file(fname, out_name_base, cart_resl=.1, + ori_resl=2., cart_bound=16.): + """ + + Args: + fname (str): + out_dir (str): + cart_resl (float): The cartesian resolution of the BCC lattice. + Defaults to 0.1. + ori_resl (float): The orientational resolution of the BCC + lattice. In general, ori_resl should be roughly + 20 * cart_resl. Defaults to 2.0. + cart_bound (float): The bounds of the lattice. Defaults to 16. + """ + from . import process_networks as pn + _hash_from_file(fname, out_name_base, cart_resl, ori_resl, cart_bound, pn) + + +def hash_bidentates_and_write_to_file(fname, out_name_base, cart_resl=.1, + ori_resl=2., cart_bound=16.): + """ + + Args: + fname (str): + out_dir (str): + cart_resl (float): The cartesian resolution of the BCC lattice. + Defaults to 0.1. + ori_resl (float): The orientational resolution of the BCC + lattice. In general, ori_resl should be roughly + 20 * cart_resl. Defaults to 2.0. + cart_bound (float): The bounds of the lattice. Defaults to 16. + """ + from . import process_bidentates as pb + _hash_from_file(fname, out_name_base, cart_resl, ori_resl, cart_bound, pb) def laod_hash_tables_from_disk(fname=None): @@ -116,14 +201,7 @@ def laod_hash_tables_from_disk(fname=None): from . import process_networks from . import position_residue as pr - # setup per-use variables with argparse - params_files_list = [path.join(_dir, t.resName) + '.params' for _, t in - process_networks.fxnl_groups.items()] - - opts = ['-ignore_waters false', '-mute core', - '-extra_res_fa {}'.format(' '.join(params_files_list))] - pyrosetta.init(extra_options=' '.join(opts), - set_logging_handler=_logging_handler) + _init_pyrosetta() # for now I'll just focus on a single file hash_tables = {} @@ -141,8 +219,9 @@ def laod_hash_tables_from_disk(fname=None): hashed_rays = next(iter(hash_tables[table].keys())) positioning_info = hash_tables[table][hashed_rays] + # now positioning_info is a set of possible values fxnl_grps = list(process_networks.fxnl_groups.keys()) - xform = pr.get_ht_from_table(positioning_info[1], + xform = pr.get_ht_from_table(positioning_info[0][1], hash_info[table].cart_resl, hash_info[table].ori_resl, hash_info[table].cart_bound) @@ -156,5 +235,26 @@ def laod_hash_tables_from_disk(fname=None): pos_frame = np.linalg.inv(hbond_ray_pairs.get_frame_for_coords(c)) xf = np.dot(pos_frame, xform) - tranform_pose(p, xf) + pr.transform_pose(p, xf) # convert pose to ATOM records append all together and write them to a file + + +def find_privileged_interactions_in_pose(p): + """ + """ + from os import path + import pickle + from . import bidentify as bd + from . import position_residue as pr + + ht_name = 'Sc_Bb_02_0002_0000_Sc_Bb_0.1_2.0_16.0.pkl' + ht_path = path.expanduser('~weitzner/bidentate_hbond_pdbs_2/hash_tables/Sc_Bb/02') + ht_name_full = path.join(ht_path, ht_name) + table_data = pr._fname_to_HTD('Sc_Bb_0.1_2.0_16.0.pkl') + + with open(ht_name_full, 'rb') as f: + ht = pickle.load(f) + pairs_of_rays = bd.find_hashable_positions(p, ht) + + return pairs_of_rays, ht + diff --git a/privileged_residues/process_bidentates.py b/privileged_residues/process_bidentates.py new file mode 100644 index 0000000..d784e2d --- /dev/null +++ b/privileged_residues/process_bidentates.py @@ -0,0 +1,165 @@ +import numpy as np + +from collections import defaultdict, namedtuple, OrderedDict +from itertools import permutations +from more_itertools import chunked +from numpy.testing import assert_allclose + +# The following packages are not pip-installable +# The import calls are wrapped in a try/except block +try: + import pyrosetta + from pyrosetta.rosetta.core.id import AtomID +except ImportError: + print('Module "pyrosetta" not found in the current environment! ' + 'Go to http://www.pyrosetta.org to download it.') + pass + +try: + import rif + from rif.hash import * +except ImportError: + print('Module "rif" not found in the current environment! ' + 'Go to https://github.com/willsheffler/rif for more information.') + pass + +from . import hbond_ray_pairs +from . import rotation_matrix +from . import process_networks as pn + +ResInfo = namedtuple('ResName', ['grp', 'atoms']) +rsd_to_fxnl_grp = {'ASN': ResInfo('CA_', ['CG', 'OD1', 'ND2']), + 'GLN': ResInfo('CA_', ['CD', 'OE1', 'NE2']), + 'ASP': ResInfo('C__', ['CG', 'OD1', 'OD2']), + 'GLU': ResInfo('C__', ['CD', 'OE1', 'OE2']), + # Use HD1/HE2 to determine which tautomer to use + 'HIS': ResInfo('I__', ['ND1', 'CD2', 'NE2']), + 'ARG': ResInfo('G__', ['CZ', 'NH1', 'NH2']), + 'SER': ResInfo('OH_', ['CB', 'OG', 'HG']), + 'THR': ResInfo('OH_', ['CB', 'OG1', 'HG1']), + 'TYR': ResInfo('OH_', ['CZ', 'OH', 'HH']), + 'LYS': ResInfo('A__', ['NZ', '1HZ', '2HZ']), + } + +interaction_types = ['Sc_Sc', 'Sc_ScBb', 'Sc_Bb'] + + +def find_all_relevant_hbonds_for_pose(p): + """Enumerate all possible arrangements of residues in a Pose of a + closed hydrogen-bonded network of residues, pack the arrangment + into a numpy.array with enough information to reconstruct the + arrangement and return the arrays as a list. + + Notes: + The resulting numpy.arrays have a custom dtype with the + following fields: + + 1. it: The interation type. + 2. r1: Ray 1 + 3. r2: Ray 2 + 4. id: The identifier of the functional group positioned by + this interaction. + 5. ht: The homogenous transform that describes the orientation + of the positioned residue relative to the stationary residues. + + Args: + p (pyrosetta.Pose): The Pose to examine. + + Returns: + list: A list of numpy.arrays that each represent a network + configuration. + """ + # ah, at last. this is where it gets a little tricky. + # everything should be connected to everything else... + # , ray 1, ray 2, positioned rsd id, homogeneous transform to position rsd + entry_type = np.dtype([('it', 'u8'), ('r1', 'f8', (4, 2)), + ('r2', 'f8', (4, 2)), ('id', 'u8'), + ('ht', 'f8', (4, 4))]) + + # we are only interested in the two best-scoring hyrogen bonds + hbs = sorted(list(hbond_ray_pairs.find_hbonds(p, exclude_bsc=False, + exclude_scb=False).hbonds()), key=lambda hb: hb.energy())[:2] + + # figure out which residue is being postioned and which is stationary + rsds = defaultdict(list) + for hb in hbs: + r = (hb.don_res(), hb.don_hatm_is_backbone()), \ + (hb.acc_res(), hb.acc_atm_is_backbone()) + for k, v in r: + rsds[k].append(v) + + res_nums = [] + n_bb = 0 + for k, v in rsds.items(): + # if the residue is being postioned, it should have exactly two hbonds + # and neither should involve the backbone + for e in v: + n_bb += int(e) + if len(v) != 2: + continue + if True not in v: + res_nums.append(k) + + first = [] + second = [] + hash_types = [] + + table = '' + if n_bb == 0: + table = 'Sc_Sc' + elif n_bb == 1: + table = 'Sc_ScBb' + elif n_bb == 2: + table = 'Sc_Bb' + else: + print('wtf, mate') + sys.exit() + + fxnl_grps = list(pn.fxnl_groups.keys()) + + for positioned in res_nums: + # it's probably smart to do the reverse, too + for i, hb in enumerate(hbs): + ''' + par = hb.don_res() if positioned != hb.don_res() else hb.acc_res() + interaction = pn.Interaction(positioned, par) + target, pos = pn.rays_for_interaction(p.residues[hb.don_res()], + p.residues[hb.acc_res()], + interaction) + ''' + pos_rsd = p.residues[positioned] + pos_fxnl_grp = rsd_to_fxnl_grp[pos_rsd.name3()] + d, a = hbond_ray_pairs.find_ray_pair_for_hbond(p, hb) + target, pos = (d, a) if positioned != hb.don_res() else (a, d) + first.append(target) if i == 0 else second.append(target) + + + ## NOTE: from here to the end of this function, the code is nearly identical to + ## pn.find_all_relevant_hbonds_for_pose(p) -- factor that bit out? + + # now we just need the xform from the ray-frame to the positioned rsd + ray_frame = hbond_ray_pairs.get_frame_for_rays(first[-1], second[-1]) + + + c = [np.array([*pos_rsd.xyz(atom)]) for atom in pos_fxnl_grp.atoms] + pos_frame = hbond_ray_pairs.get_frame_for_coords(np.stack(c)) + frame_to_store = np.dot(np.linalg.inv(ray_frame), pos_frame) + assert_allclose(pos_frame, np.dot(ray_frame, frame_to_store), + atol=1E-10) + + array_size = 1 + # store a tuple of the Rays and the positioning information. + # pop first and second upon constructing entry to prepare for the + # next iteration + res = np.empty(array_size, dtype=entry_type) + res['it'] = interaction_types.index(table) + res['r1'] = first.pop() + res['r2'] = second.pop() + res['id'] = fxnl_grps.index(rsd_to_fxnl_grp[pos_rsd.name3()].grp) + res['ht'] = frame_to_store + hash_types.extend(res) + + return hash_types + + + diff --git a/privileged_residues/process_networks.py b/privileged_residues/process_networks.py index eb748b4..bfa24a9 100644 --- a/privileged_residues/process_networks.py +++ b/privileged_residues/process_networks.py @@ -9,6 +9,7 @@ # The import calls are wrapped in a try/except block try: import pyrosetta + from pyrosetta.rosetta.core.id import AtomID except ImportError: print('Module "pyrosetta" not found in the current environment! ' 'Go to http://www.pyrosetta.org to download it.') @@ -23,6 +24,8 @@ pass from . import hbond_ray_pairs +from . import rotation_matrix + # the order of the keys of this dictionary FxnlGrp = namedtuple('FxnlGrp', ['resName', 'donor', 'acceptor', 'atoms']) @@ -88,6 +91,25 @@ """ +# helper functions for identifying hbond partners +def _get_atom_id_pair(rsd, atmno): + base_atmno = rsd.atom_base(atmno) + if rsd.name() == 'OH_' and atmno == 2: + base_atmno = 1 + return AtomIDPair(AtomID(atmno, rsd.seqpos()), + AtomID(base_atmno, rsd.seqpos())) + + +def _vector_from_id(atom_id, rsd): + return np.array([*rsd.xyz(atom_id.atomno())]) + + +def _positioning_ray(rsd, atmno): + id_pair = _get_atom_id_pair(rsd, atmno) + return hbond_ray_pairs.create_ray(_vector_from_id(id_pair.center, rsd), + _vector_from_id(id_pair.base, rsd)) + + def get_models_from_file(fname): """Read a PDB-formatted file and return a list of ATOM records grouped by model. @@ -107,21 +129,20 @@ def get_models_from_file(fname): models. """ with open(fname, 'r') as f: - atom_records = [l.rstrip() for l in f.readlines()] - - models = [] - current_model = [] - for record in atom_records: - current_model.append(record) - if record == 'ENDMDL': - models.append(current_model) - current_model = [] - - # if the last line in the file is not 'ENDMDL' it needs to be added to the - # list here. - if len(current_model) != 0: - models.append(current_model) - return models + model = [] + for l in f: + if (l.startswith("#")): + continue + + line = l.rstrip() + model.append(line) + + if (line.startswith("ENDMDL")): + yield model + model = [] + + if (len(model) > 0): + yield model def pose_from_atom_records(atom_recs): @@ -199,21 +220,6 @@ def rays_for_interaction(donor, acceptor, interaction): describing the stationary and positioned residues, respectively. """ - from pyrosetta.rosetta.core.id import AtomID - - def _get_atom_id_pair(rsd, atmno): - base_atmno = rsd.atom_base(atmno) - return AtomIDPair(AtomID(atmno, rsd.seqpos()), - AtomID(base_atmno, rsd.seqpos())) - - def _vector_from_id(atom_id, rsd): - return np.array([*rsd.xyz(atom_id.atomno())]) - - def _positioning_ray(rsd, atmno): - id_pair = _get_atom_id_pair(rsd, atmno) - return hbond_ray_pairs.create_ray(_vector_from_id(id_pair.center, rsd), - _vector_from_id(id_pair.base, rsd)) - don_rays = [] for i in range(1, donor.natoms() + 1): # in case we use full residues later on @@ -245,6 +251,51 @@ def _positioning_ray(rsd, atmno): return target, positioned_residue +def positioned_residue_is_donor(positioned, target): + """Determine whether the residue positioned by the hydrogen bond of + interest is the donor and return True if it is. + + Args: + positioned (pyrosetta.rosetta.core.conformation.Residue): The + residue that is positioned by the hydrogen bond. + target (pyrosetta.rosetta.core.conformation.Residue): The + residue that is having a hydrogen bonding ray extracted + from it. + Returns: + bool: True if positioned residue is the hydrogen bond donor, + False if it is the acceptor. + + If the function cannot determine which residue is the donor, it + returns None. + """ + def _get_all_rays(rsd): + rays = [] + atms = [] + for i in range(1, rsd.natoms() + 1): + # in case we use full residues later on + if rsd.atom_is_backbone(i): + continue + if rsd.atom_type(i).element() in ('N', 'O', 'H'): + rays.append(_positioning_ray(rsd, i)) + atms.append(i) + return rays, atms + + tgt_rays, tgt_atms = _get_all_rays(target) + pos_rays, pos_atms = _get_all_rays(positioned) + + tgt = np.stack(tgt_rays) + pos = np.stack(pos_rays) + dist = np.linalg.norm(tgt[:, np.newaxis, :, 0] - pos[np.newaxis, :, :, 0], + axis=-1) + tgt_idx, pos_idx = np.unravel_index(np.argmin(dist, axis=None), dist.shape) + + if positioned.atom_type(pos_atms[pos_idx]).element() == 'H': + return True + elif target.atom_type(tgt_atms[tgt_idx]).element() == 'H': + return False + return None + + def find_all_relevant_hbonds_for_pose(p): """Enumerate all possible arrangements of residues in a Pose of a closed hydrogen-bonded network of residues, pack the arrangment @@ -273,8 +324,8 @@ def find_all_relevant_hbonds_for_pose(p): # ah, at last. this is where it gets a little tricky. # everything should be connected to everything else... # , ray 1, ray 2, positioned rsd id, homogeneous transform to position rsd - entry_type = np.dtype([('it', 'u8'), ('r1', 'f8', (2, 4)), - ('r2', 'f8', (2, 4)), ('id', 'u8'), + entry_type = np.dtype([('it', 'u8'), ('r1', 'f8', (4, 2)), + ('r2', 'f8', (4, 2)), ('id', 'u8'), ('ht', 'f8', (4, 4))]) res_orderings = generate_combinations(len(p.residues)) @@ -283,6 +334,12 @@ def find_all_relevant_hbonds_for_pose(p): second = [] hash_types = [] + # it some poses have too many residues in them, presumably the result of + # some strange race condition in an upstream process. Since the data are + # somewhat nonsensical, I am just going to skip these poses entirely. + if len(p.residues) != 3: + return hash_types + fxnl_grps = list(fxnl_groups.keys()) for interactions in res_orderings: # look at the order, figure out which hydrogen bonds to use @@ -312,11 +369,15 @@ def find_all_relevant_hbonds_for_pose(p): donor, acceptor = (p.residues[i] for i in res_nos) ht.append('acceptor') else: - print('Ambiguous arrangement: both can donate & accept!') - print('Trying something a little more complicated...') - print('Oh shit, I should probably implement this!') - import sys - sys.exit(1) + par_rsd = p.residues[interaction.partner] + pos_don = positioned_residue_is_donor(pos_rsd, par_rsd) + assert(pos_don is not None) + if pos_don: + donor, acceptor = pos_rsd, par_rsd + ht.append('acceptor') + else: + acceptor, donor = pos_rsd, par_rsd + ht.append('donor') target, pos = rays_for_interaction(donor, acceptor, interaction) first.append(target) if i == 0 else second.append(target) @@ -329,16 +390,36 @@ def find_all_relevant_hbonds_for_pose(p): c = [np.array([*pos_rsd.xyz(atom)]) for atom in pos_fxnl_grp.atoms] pos_frame = hbond_ray_pairs.get_frame_for_coords(np.stack(c)) frame_to_store = np.dot(np.linalg.inv(ray_frame), pos_frame) - assert_allclose(pos_frame, np.dot(ray_frame, frame_to_store)) + assert_allclose(pos_frame, np.dot(ray_frame, frame_to_store), + atol=1E-10) + array_size = 1 + if pos_fxnl_grp.resName == 'hydroxide': + # hydroxide only has two clearly positioned atoms + # the positioned frame needs to be rotated about the OH--HH bond + # to fill out the relevant orientations. + # the rotation will be centered on the hydrogen. + resl = 5. # degrees + + rot_cntr = np.array([*pos_rsd.xyz('HH')]) + axis = np.array([*pos_rsd.xyz('OH')]) - rot_cntr + angles = np.arange(0., 360., resl) + r = rotation_matrix.rot_ein(axis, angles, degrees=True, + center=rot_cntr) + assert(r.shape == (int(360. / resl),) + (4, 4)) + + frame_to_store = r * frame_to_store + array_size = frame_to_store.shape[0] # store a tuple of the Rays and the positioning information. # pop first and second upon constructing entry to prepare for the # next iteration - hash_types.append(np.array((interaction_types.index(table), - first.pop(), second.pop(), - fxnl_grps.index(pos_rsd.name3()), - frame_to_store), - dtype=entry_type)) + res = np.empty(array_size, dtype=entry_type) + res['it'] = interaction_types.index(table) + res['r1'] = first.pop() + res['r2'] = second.pop() + res['id'] = fxnl_grps.index(pos_rsd.name3()) + res['ht'] = frame_to_store + hash_types.extend(res) return hash_types diff --git a/privileged_residues/rotation_matrix.py b/privileged_residues/rotation_matrix.py new file mode 100644 index 0000000..e358a65 --- /dev/null +++ b/privileged_residues/rotation_matrix.py @@ -0,0 +1,94 @@ +import numpy as np + + +def _is_broadcastable(shp1, shp2): + for a, b in zip(shp1[::-1], shp2[::-1]): + if a == 1 or b == 1 or a == b: + pass + else: + return False + return True + + +def rot_ein(axis, angle, degrees='auto', dtype='f8', shape=(4, 4), + center=None): + """Compute the rotation matrix that describes the rotation by an + angle about an axis and return it. + + Notes: + The matrix for a rotation by an angle of theta about an axis in + the direction of u can be written as + ------------------------------------------------------------ + R = cos(theta) I + sin(theta) u_x + (1 - cos(theta)) (u X u) + ------------------------------------------------------------ + 1. 'I' is the identity matrix, + 2. 'u_x' is the cross product matrix of the unit vector + representing the axis. See: https://en.wikipedia.org/wiki/\ + Cross_product#Conversion_to_matrix_multiplication; and + 3. '(u X u)' is the tensor product of u and itself. Since u can + be written as a vector, the tensor product is equal to the + Kronecker product. See: https://en.wikipedia.org/wiki/\ + Kronecker_product + + The function name comes from the fact that Einstein summation + is used to compute the cross product matrix of the unit vector + in the direction of the axis. + + Args: + axis (numpy.array): An axis or a vector of axes to rotate about. + angle (numpy.array OR float): The angle or a vector angles by + which to rotate. + degrees (str): Set to True to indicate that the value being + passed as `angle` is in degrees, False or None or indicates + the value is in radians. Setting degrees to 'auto' will + cause the script to make its best guess to determine + whether the value is in degrees or radians. Defaults to + 'auto'. + dtype (str): The dtype for the values in the matrix. Defaults + to 'f8'. + shape (tuple): Shape of the output rotation matrix. Defaults to + (3, 3). + + Returns: + numpy.array: Rotation matrix describing a rotation of `angle` + about `axis` in the specified shape. + """ + axis = np.array(axis, dtype=dtype) + angle = np.array(angle, dtype=dtype) + if degrees is 'auto': + degrees = guess_is_degrees(angle) + angle = angle * np.pi / 180.0 if degrees else angle + if axis.shape and angle.shape and not _is_broadcastable( + axis.shape[:-1], angle.shape): + raise ValueError('axis and angle not compatible: ' + + str(axis.shape) + ' ' + str(angle.shape)) + axis /= np.linalg.norm(axis, axis=-1)[..., np.newaxis] + + center = (np.array([0, 0, 0], dtype=dtype) if center is None + else np.array(center, dtype=dtype)) + + outshape = angle.shape if angle.shape else axis.shape[:-1] + r = np.zeros(outshape + shape, dtype=dtype) + + sin_vec = np.sin(angle)[..., np.newaxis, np.newaxis] + cos_vec = np.cos(angle)[..., np.newaxis, np.newaxis] + + # Define the Levi-Civita tensor in R3 + # https://en.wikipedia.org/wiki/Levi-Civita_symbol#Three_dimensions + eijk = np.zeros((3, 3, 3)) + eijk[0, 1, 2] = eijk[1, 2, 0] = eijk[2, 0, 1] = 1 + eijk[0, 2, 1] = eijk[2, 1, 0] = eijk[1, 0, 2] = -1 + axis_cross_prod_mat = np.einsum('ijk,...j->...ik', eijk, axis[..., :3]) + + kronecker_prod = axis[..., np.newaxis, :3] * axis[..., :3, np.newaxis] + + r[..., :3, :3] = cos_vec * np.identity(3) + \ + sin_vec * axis_cross_prod_mat + \ + (1 - cos_vec) * kronecker_prod + + x, y, z = center[..., 0], center[..., 1], center[..., 2] + r[..., 0, 3] = x - r[..., 0, 0] * x - r[..., 0, 1] * y - r[..., 0, 2] * z + r[..., 1, 3] = y - r[..., 1, 0] * x - r[..., 1, 1] * y - r[..., 1, 2] * z + r[..., 2, 3] = z - r[..., 2, 0] * x - r[..., 2, 1] * y - r[..., 2, 2] * z + r[..., 3, 3] = 1 + return r diff --git a/requirements_dev.txt b/requirements_dev.txt index 7192aa1..160209c 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,5 @@ pip==9.0.3 -bumpversion==0.5.3 +# bumpversion==0.5.3 wheel==0.31.0 watchdog==0.8.3 flake8==3.5.0 @@ -12,4 +12,4 @@ pytest==3.5.0 pytest-runner==4.2 docutils==0.14 numpy==1.14.2 -more_itertools==4.1.0 \ No newline at end of file +more_itertools==4.1.0 diff --git a/sc_bb_example.pdb b/sc_bb_example.pdb new file mode 100644 index 0000000..e28fabc --- /dev/null +++ b/sc_bb_example.pdb @@ -0,0 +1,25 @@ +ATOM 33 N GLY B 4 -3.311 -0.898 -0.085 1.00 0.00 N +ATOM 34 CA GLY B 4 -4.381 -0.604 -1.030 1.00 0.00 C +ATOM 35 C GLY B 4 -4.483 -1.683 -2.101 1.00 0.00 C +ATOM 36 O GLY B 4 -5.568 -2.187 -2.387 1.00 0.00 O +ATOM 37 1H GLY B 4 -3.267 -0.175 0.605 1.00 0.00 H +ATOM 38 2H GLY B 4 -3.492 -1.774 0.362 1.00 0.00 H +ATOM 39 3H GLY B 4 -2.439 -0.946 -0.572 1.00 0.00 H +ATOM 40 1HA GLY B 4 -5.329 -0.526 -0.496 1.00 0.00 H +ATOM 41 2HA GLY B 4 -4.197 0.363 -1.498 1.00 0.00 H +ATOM 42 N GLY B 5 -3.345 -2.032 -2.691 1.00 0.00 N +ATOM 43 CA GLY B 5 -3.300 -3.066 -3.719 1.00 0.00 C +ATOM 44 C GLY B 5 -2.550 -4.297 -3.227 1.00 0.00 C +ATOM 45 O GLY B 5 -1.688 -4.203 -2.353 1.00 0.00 O +ATOM 46 H GLY B 5 -2.489 -1.569 -2.422 1.00 0.00 H +ATOM 47 1HA GLY B 5 -4.316 -3.343 -4.001 1.00 0.00 H +ATOM 48 2HA GLY B 5 -2.816 -2.672 -4.611 1.00 0.00 H +ATOM 49 N GLY B 6 -2.883 -5.452 -3.794 1.00 0.00 N +ATOM 50 CA GLY B 6 -2.180 -6.691 -3.481 1.00 0.00 C +ATOM 51 C GLY B 6 -1.566 -7.307 -4.732 1.00 0.00 C +ATOM 52 O GLY B 6 -0.529 -6.884 -5.162 1.00 0.00 O +ATOM 53 OXT GLY B 6 -2.120 -8.215 -5.287 1.00 0.00 O +ATOM 54 H GLY B 6 -3.643 -5.473 -4.460 1.00 0.00 H +ATOM 55 1HA GLY B 6 -1.398 -6.490 -2.749 1.00 0.00 H +ATOM 56 2HA GLY B 6 -2.873 -7.398 -3.027 1.00 0.00 H +TER \ No newline at end of file diff --git a/sc_bb_example_with_bidentate.pdb b/sc_bb_example_with_bidentate.pdb new file mode 100644 index 0000000..c4c5fb8 --- /dev/null +++ b/sc_bb_example_with_bidentate.pdb @@ -0,0 +1,57 @@ +ATOM 1 N GLY A 1 -1.705 3.999 -5.562 1.00 0.00 N +ATOM 2 CA GLY A 1 -0.823 2.844 -5.445 1.00 0.00 C +ATOM 3 C GLY A 1 -0.260 2.722 -4.035 1.00 0.00 C +ATOM 4 O GLY A 1 -0.324 3.665 -3.247 1.00 0.00 O +ATOM 5 1H GLY A 1 -2.060 4.055 -6.495 1.00 0.00 H +ATOM 6 2H GLY A 1 -2.467 3.903 -4.922 1.00 0.00 H +ATOM 7 3H GLY A 1 -1.194 4.832 -5.349 1.00 0.00 H +ATOM 8 1HA GLY A 1 -1.373 1.939 -5.701 1.00 0.00 H +ATOM 9 2HA GLY A 1 -0.006 2.936 -6.160 1.00 0.00 H +ATOM 10 N ASN A 2 0.292 1.554 -3.724 1.00 0.00 N +ATOM 11 CA ASN A 2 0.916 1.323 -2.426 1.00 0.00 C +ATOM 12 C ASN A 2 2.086 2.270 -2.200 1.00 0.00 C +ATOM 13 O ASN A 2 2.849 2.561 -3.123 1.00 0.00 O +ATOM 14 CB ASN A 2 1.366 -0.121 -2.300 1.00 0.00 C +ATOM 15 CG ASN A 2 0.211 -1.083 -2.249 1.00 0.00 C +ATOM 16 OD1 ASN A 2 -0.941 -0.681 -2.049 1.00 0.00 O +ATOM 17 ND2 ASN A 2 0.498 -2.348 -2.424 1.00 0.00 N +ATOM 18 H ASN A 2 0.280 0.807 -4.403 1.00 0.00 H +ATOM 19 HA ASN A 2 0.179 1.527 -1.647 1.00 0.00 H +ATOM 20 1HB ASN A 2 2.002 -0.379 -3.146 1.00 0.00 H +ATOM 21 2HB ASN A 2 1.962 -0.239 -1.394 1.00 0.00 H +ATOM 22 1HD2 ASN A 2 -0.230 -3.034 -2.401 1.00 0.00 H +ATOM 23 2HD2 ASN A 2 1.445 -2.629 -2.583 1.00 0.00 H +ATOM 24 N GLY A 3 2.224 2.750 -0.970 1.00 0.00 N +ATOM 25 CA GLY A 3 3.329 3.632 -0.610 1.00 0.00 C +ATOM 26 C GLY A 3 3.567 3.630 0.894 1.00 0.00 C +ATOM 27 O GLY A 3 2.672 3.956 1.675 1.00 0.00 O +ATOM 28 OXT GLY A 3 4.637 3.308 1.332 1.00 0.00 O +ATOM 29 H GLY A 3 1.546 2.500 -0.265 1.00 0.00 H +ATOM 30 1HA GLY A 3 4.234 3.310 -1.126 1.00 0.00 H +ATOM 31 2HA GLY A 3 3.111 4.645 -0.946 1.00 0.00 H +TER +ATOM 33 N GLY B 4 -3.311 -0.898 -0.085 1.00 0.00 N +ATOM 34 CA GLY B 4 -4.381 -0.604 -1.030 1.00 0.00 C +ATOM 35 C GLY B 4 -4.483 -1.683 -2.101 1.00 0.00 C +ATOM 36 O GLY B 4 -5.568 -2.187 -2.387 1.00 0.00 O +ATOM 37 1H GLY B 4 -3.267 -0.175 0.605 1.00 0.00 H +ATOM 38 2H GLY B 4 -3.492 -1.774 0.362 1.00 0.00 H +ATOM 39 3H GLY B 4 -2.439 -0.946 -0.572 1.00 0.00 H +ATOM 40 1HA GLY B 4 -5.329 -0.526 -0.496 1.00 0.00 H +ATOM 41 2HA GLY B 4 -4.197 0.363 -1.498 1.00 0.00 H +ATOM 42 N GLY B 5 -3.345 -2.032 -2.691 1.00 0.00 N +ATOM 43 CA GLY B 5 -3.300 -3.066 -3.719 1.00 0.00 C +ATOM 44 C GLY B 5 -2.550 -4.297 -3.227 1.00 0.00 C +ATOM 45 O GLY B 5 -1.688 -4.203 -2.353 1.00 0.00 O +ATOM 46 H GLY B 5 -2.489 -1.569 -2.422 1.00 0.00 H +ATOM 47 1HA GLY B 5 -4.316 -3.343 -4.001 1.00 0.00 H +ATOM 48 2HA GLY B 5 -2.816 -2.672 -4.611 1.00 0.00 H +ATOM 49 N GLY B 6 -2.883 -5.452 -3.794 1.00 0.00 N +ATOM 50 CA GLY B 6 -2.180 -6.691 -3.481 1.00 0.00 C +ATOM 51 C GLY B 6 -1.566 -7.307 -4.732 1.00 0.00 C +ATOM 52 O GLY B 6 -0.529 -6.884 -5.162 1.00 0.00 O +ATOM 53 OXT GLY B 6 -2.120 -8.215 -5.287 1.00 0.00 O +ATOM 54 H GLY B 6 -3.643 -5.473 -4.460 1.00 0.00 H +ATOM 55 1HA GLY B 6 -1.398 -6.490 -2.749 1.00 0.00 H +ATOM 56 2HA GLY B 6 -2.873 -7.398 -3.027 1.00 0.00 H +TER diff --git a/setup.py b/setup.py index 3720f55..62f10d0 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setup( name='privileged_residues', - version='0.2.3', + version='0.3.0', description="Privileged Residues contains methods for placing residues on the surface of a target protein that can be added to a RIF.", long_description=readme + '\n\n' + history, author="Brian D. Weitzner", diff --git a/tests/test_connected_networks.py b/tests/test_connected_networks.py new file mode 100644 index 0000000..c94207d --- /dev/null +++ b/tests/test_connected_networks.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pickle +import pytest + +from glob import glob +from os import path +from privileged_residues.privileged_residues import HAVE_PYROSETTA + + +pdb = path.expanduser("~weitzner/three_res_hbnet/arrangements/imidazole_carboxylate_amide/functional_stubs.pdb") + +# workaround because Rosetta SFX does not capture all hbonds +def _look_up_connected_network(p): + import itertools + import privileged_residues + + from privileged_residues import hbond_ray_pairs + + pairs_of_rays = [] + + resA = p.residue(2) + resB = p.residue(1) + + listA = list(resA.Hpos_polar_sc()) + list(resA.accpt_pos_sc()) + listB = list(resB.Hpos_polar_sc()) + list(resB.accpt_pos_sc()) + + for (j, k) in itertools.product(listA, listB): + first = hbond_ray_pairs.create_ray(resA.xyz(j), resA.xyz(resA.atom_base(j))) + second = hbond_ray_pairs.create_ray(resB.xyz(k), resB.xyz(resB.atom_base(k))) + + pairs_of_rays += [(first, second)] + pairs_of_rays += [(second, first)] + + return pairs_of_rays + +def _network_bidentate_placement(inpdb): + import pyrosetta + import privileged_residues + import numpy as np + import itertools + + from numpy.testing import assert_allclose + from privileged_residues import bidentify as bd + from privileged_residues import process_networks + from privileged_residues.privileged_residues import _init_pyrosetta as init + from pyrosetta.rosetta.core.kinematics import Stub + + init() + + params = (0.1, 2.0, 16.0) + + for pdb in glob(path.expanduser("~weitzner/three_res_hbnet/arrangements/*/functional_stubs.pdb")): + print(pdb) + + p = next(privileged_residues.process_networks.poses_for_all_models(pdb)) + target = p.residue(3) + atoms = privileged_residues.process_networks.fxnl_groups[target.name()].atoms + + hbond_types = ["acceptor", "donor"] + + interaction = path.basename(path.dirname((pdb))) + ht_name_base = path.expanduser("~onalant/source/privileged_residues/local/hash_tables/%s/%s*_%s_%s_0.1_2.0_16.0.pkl") + + rays = _look_up_connected_network(p) + + found = False + for (i, j) in itertools.product(hbond_types, hbond_types): + hits = [] + ht_glob = glob(ht_name_base % (interaction, interaction, i, j)) + + if (len(ht_glob) == 0): + print("Could not find table") + continue + + ht_name = ht_glob[0] + + with open(ht_name, "rb") as f: + table = pickle.load(f) + try: + hits += bd.look_up_interactions(rays, table, *params) + except: + print("RIF XformHash assertion") + continue + + # NOTE(onalant): this needs to be pyrosetta distance compatible + + ref_coords = Stub(*(target.xyz(atom) for atom in privileged_residues.process_networks.fxnl_groups[target.name()].atoms)) + + for k, hit in enumerate(hits): + coords = Stub(*(hit.residue(1).xyz(atom) for atom in process_networks.fxnl_groups[hit.residue(1).name()].atoms)) + dist = pyrosetta.rosetta.core.kinematics.distance(ref_coords, coords) + found = found or dist < 1.0 + + assert(found) +@pytest.mark.skipif('not HAVE_PYROSETTA') +def test_network_bidentate_placement(): + _network_bidentate_placement(pdb) + diff --git a/tests/test_privileged_residues.py b/tests/test_privileged_residues.py index 030f73b..175f85f 100644 --- a/tests/test_privileged_residues.py +++ b/tests/test_privileged_residues.py @@ -9,7 +9,7 @@ from privileged_residues import privileged_residues from privileged_residues import cli - +from privileged_residues.privileged_residues import HAVE_PYROSETTA @pytest.fixture def response(): @@ -36,3 +36,43 @@ def test_command_line_interface(): help_result = runner.invoke(cli.main, ['--help']) assert help_result.exit_code == 0 assert '--help Show this message and exit.' in help_result.output + + +def _test_sc_bb_bidentate_placement(fname): + import pyrosetta + import privileged_residues + from privileged_residues.privileged_residues import _init_pyrosetta as init + import numpy as np + from numpy.testing import assert_allclose + from privileged_residues import bidentify as bd + + init() + + atoms = privileged_residues.process_networks.fxnl_groups['CA_'].atoms + cart_resl, ori_resl, cart_bound = 0.1, 2.0, 16.0 + + ref_pose = pyrosetta.pose_from_file(fname) + ref_coords = np.stack([np.array([*ref_pose.residues[2].xyz(atom)]) for atom in atoms]) + + pairs, ht = privileged_residues.find_privileged_interactions_in_pose(ref_pose) + hits = bd.look_up_interactions(pairs, ht, cart_resl, ori_resl, cart_bound) + + for i, hit in enumerate(hits): + coords = np.stack([np.array([*hit.residues[1].xyz(atom)]) for atom in atoms]) + try: + assert_allclose(coords, ref_coords, atol=0.5) + return + except AssertionError: + continue + assert(False) + + +@pytest.mark.skipif('not HAVE_PYROSETTA') +def test_sc_bb_bidentate_placement(): + _test_sc_bb_bidentate_placement('sc_bb_example_with_bidentate.pdb') + + +@pytest.mark.skipif('not HAVE_PYROSETTA') +def test_sc_bb_bidentate_placement_transformed(): + _test_sc_bb_bidentate_placement('transformed_example.pdb') + diff --git a/transformed_example.pdb b/transformed_example.pdb new file mode 100644 index 0000000..31e2768 --- /dev/null +++ b/transformed_example.pdb @@ -0,0 +1,56 @@ +ATOM 1 N GLY A 1 -1.036 -1.411 -4.482 1.00 0.00 N +ATOM 2 CA GLY A 1 -0.041 -0.820 -3.596 1.00 0.00 C +ATOM 3 C GLY A 1 -0.595 0.410 -2.888 1.00 0.00 C +ATOM 4 O GLY A 1 -1.624 0.955 -3.286 1.00 0.00 O +ATOM 5 1H GLY A 1 -0.647 -2.214 -4.933 1.00 0.00 H +ATOM 6 2H GLY A 1 -1.837 -1.685 -3.949 1.00 0.00 H +ATOM 7 3H GLY A 1 -1.309 -0.741 -5.173 1.00 0.00 H +ATOM 8 1HA GLY A 1 0.274 -1.558 -2.860 1.00 0.00 H +ATOM 9 2HA GLY A 1 0.842 -0.545 -4.173 1.00 0.00 H +ATOM 10 N ASN A 2 0.094 0.840 -1.837 1.00 0.00 N +ATOM 11 CA ASN A 2 -0.298 2.038 -1.103 1.00 0.00 C +ATOM 12 C ASN A 2 -0.268 3.269 -1.998 1.00 0.00 C +ATOM 13 O ASN A 2 0.622 3.416 -2.838 1.00 0.00 O +ATOM 14 CB ASN A 2 0.595 2.237 0.108 1.00 0.00 C +ATOM 15 CG ASN A 2 0.398 1.172 1.152 1.00 0.00 C +ATOM 16 ND2 ASN A 2 1.308 1.099 2.089 1.00 0.00 N +ATOM 17 OD1 ASN A 2 -0.580 0.417 1.110 1.00 0.00 O +ATOM 18 H ASN A 2 0.910 0.326 -1.539 1.00 0.00 H +ATOM 19 HA ASN A 2 -1.327 1.912 -0.760 1.00 0.00 H +ATOM 20 1HB ASN A 2 1.638 2.235 -0.205 1.00 0.00 H +ATOM 21 2HB ASN A 2 0.390 3.211 0.556 1.00 0.00 H +ATOM 22 1HD2 ASN A 2 1.230 0.410 2.810 1.00 0.00 H +ATOM 23 2HD2 ASN A 2 2.084 1.731 2.084 1.00 0.00 H +ATOM 24 N GLY A 3 -1.243 4.151 -1.817 1.00 0.00 N +ATOM 25 CA GLY A 3 -1.305 5.394 -2.579 1.00 0.00 C +ATOM 26 C GLY A 3 -2.163 6.432 -1.870 1.00 0.00 C +ATOM 27 O GLY A 3 -3.343 6.199 -1.602 1.00 0.00 O +ATOM 28 OXT GLY A 3 -1.692 7.493 -1.564 1.00 0.00 O1- +ATOM 29 H GLY A 3 -1.962 3.957 -1.136 1.00 0.00 H +ATOM 30 1HA GLY A 3 -0.297 5.784 -2.720 1.00 0.00 H +ATOM 31 2HA GLY A 3 -1.713 5.194 -3.569 1.00 0.00 H +ATOM 32 N GLY B 4 -3.093 -0.435 2.685 1.00 0.00 N +ATOM 33 CA GLY B 4 -3.215 -1.786 2.151 1.00 0.00 C +ATOM 34 C GLY B 4 -2.024 -2.647 2.552 1.00 0.00 C +ATOM 35 O GLY B 4 -2.190 -3.770 3.026 1.00 0.00 O +ATOM 36 1H GLY B 4 -3.886 0.107 2.407 1.00 0.00 H +ATOM 37 2H GLY B 4 -3.050 -0.474 3.684 1.00 0.00 H +ATOM 38 3H GLY B 4 -2.260 -0.010 2.332 1.00 0.00 H +ATOM 39 1HA GLY B 4 -4.137 -2.241 2.516 1.00 0.00 H +ATOM 40 2HA GLY B 4 -3.289 -1.741 1.064 1.00 0.00 H +ATOM 41 N GLY B 5 -0.823 -2.114 2.356 1.00 0.00 N +ATOM 42 CA GLY B 5 0.399 -2.825 2.714 1.00 0.00 C +ATOM 43 C GLY B 5 1.111 -2.142 3.875 1.00 0.00 C +ATOM 44 O GLY B 5 0.969 -0.937 4.080 1.00 0.00 O +ATOM 45 H GLY B 5 -0.754 -1.193 1.948 1.00 0.00 H +ATOM 46 1HA GLY B 5 0.156 -3.852 2.986 1.00 0.00 H +ATOM 47 2HA GLY B 5 1.061 -2.870 1.851 1.00 0.00 H +ATOM 48 N GLY B 6 1.879 -2.920 4.631 1.00 0.00 N +ATOM 49 CA GLY B 6 2.687 -2.377 5.717 1.00 0.00 C +ATOM 50 C GLY B 6 4.165 -2.684 5.511 1.00 0.00 C +ATOM 51 O GLY B 6 4.816 -2.016 4.757 1.00 0.00 O +ATOM 52 OXT GLY B 6 4.677 -3.594 6.102 1.00 0.00 O1- +ATOM 53 H GLY B 6 1.904 -3.914 4.450 1.00 0.00 H +ATOM 54 1HA GLY B 6 2.540 -1.299 5.775 1.00 0.00 H +ATOM 55 2HA GLY B 6 2.354 -2.799 6.665 1.00 0.00 H +END