Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhuLvs authored Nov 27, 2024
1 parent d3692ca commit ccf0593
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions helper_scripts/Calculate Distance Map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
import Bio.PDB
import pandas as pd
import os
from multiprocessing import Pool, cpu_count

def calculate_min_distance(residue1, residue2):
"""Calculate the minimum distance between two residues, preferring CA atoms, else another atom."""
ca1 = residue1['CA'] if 'CA' in residue1 else next(iter(residue1), None)
ca2 = residue2['CA'] if 'CA' in residue2 else next(iter(residue2), None)

if ca1 is not None and ca2 is not None:
distance = ca1 - ca2
else:
distance = float('inf')
return distance

def calculate_distance_matrix(structure):
"""Calculate the all-against-all CA atoms distance matrix for a given protein structure."""
residues = [residue for residue in structure.get_residues()
if Bio.PDB.is_aa(residue, standard=True) or residue.get_resname() in ['HSD', 'HSE', 'HSP', 'NLE', 'HIP', 'NLE', 'HIE']]

distance_matrix = np.zeros((len(residues), len(residues)))

for i, residue1 in enumerate(residues):
for j, residue2 in enumerate(residues):
distance_matrix[i, j] = calculate_min_distance(residue1, residue2)

return distance_matrix, residues

def main(pdb_file_path, csv_file_path):
"""Main function to calculate the distance matrix and save it as a CSV file."""
pdb_parser = Bio.PDB.PDBParser(QUIET=True)
structure = pdb_parser.get_structure('protein_structure', pdb_file_path)

distance_matrix, residues = calculate_distance_matrix(structure[0])

residue_labels = [f"{residue.get_resname()} {residue.get_id()[1]}" for residue in residues]

pd.DataFrame(distance_matrix, columns=residue_labels).to_csv(csv_file_path, index=False)

def process_single_file(args):
"""Helper function to allow Pool.map to work with multiple arguments."""
pdb_file_path, csv_file_path = args
main(pdb_file_path, csv_file_path)

def process_pdb_files(input_folder, output_folder):
"""Process all PDB files in the input folder and save the distance matrices to the output folder."""
files_to_process = []
for filename in os.listdir(input_folder):
if filename.endswith('.pdb'):
pdb_file_path = os.path.join(input_folder, filename)
csv_file_name = os.path.splitext(filename)[0] + '.csv'
csv_file_path = os.path.join(output_folder, csv_file_name)
files_to_process.append((pdb_file_path, csv_file_path))

with Pool(processes=cpu_count()) as pool:
pool.map(process_single_file, files_to_process)

if __name__ == '__main__':
input_folder = 'F:/Desktop/PDB'
output_folder = 'F:/Desktop/contact'
process_pdb_files(input_folder, output_folder)

0 comments on commit ccf0593

Please sign in to comment.