-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
62 changed files
with
279,064 additions
and
0 deletions.
There are no files selected for viewing
File renamed without changes.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
>4LQW_B | ||
MELSKETNPVVFFDVCADGEPLGRITMELFSNIVPRTAENFRALCTGEKGFGFKNSIFHRVIPDFVCQGGDITKHDGTGGQSIYGDKFEDENFDVKHTGPGLLSMANQGQNTNNSQFVITLKKAEHLDFKHVVFGFVKDGMDTVKKIESFGSPKGSVCRRITITECGQI |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
>4LQW_C | ||
PIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRLHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTHNPPIPVGEIYKRWIILGLNKIVRMYS |
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,312 @@ | ||
#!/usr/bin/python | ||
|
||
"written by Phil Bradley, Rhiju Das, Michael Tyka, TJ Brunette, and James Thompson from the Baker Lab. Edits done by Steven Combs, Sam Deluca and Jordan Willis from the Meiler Lab." | ||
|
||
import sys | ||
import os | ||
from sys import argv, stderr, stdout | ||
from os import popen, system | ||
from os.path import exists, basename | ||
from amino_acids import longer_names | ||
from amino_acids import modres | ||
# remote host for downloading pdbs | ||
remote_host = '' | ||
|
||
shit_stat_insres = False | ||
shit_stat_altpos = False | ||
shit_stat_modres = False | ||
shit_stat_misdns = False # missing density! | ||
|
||
fastaseq = {} | ||
pdbfile = "" | ||
|
||
|
||
def download_pdb(pdb_id, dest_dir): | ||
# print "downloading %s" % ( pdb_id ) | ||
url = 'http://www.rcsb.org/pdb/files/%s.pdb.gz' % (pdb_id.upper()) | ||
dest = '%s/%s.pdb.gz' % (os.path.abspath(dest_dir), pdb_id.lower()) | ||
wget_cmd = 'wget --quiet %s -O %s' % (url, dest) | ||
print wget_cmd | ||
if remote_host: | ||
wget_cmd = 'ssh %s %s' % (remote_host, wget_cmd) | ||
|
||
lines = popen(wget_cmd).readlines() | ||
if (exists(dest)): | ||
return dest | ||
else: | ||
print "Error: didn't download file!" | ||
|
||
|
||
def check_and_print_pdb(count, residue_buffer, residue_letter): | ||
global pdbfile | ||
# Check that CA, N and C are present!def check_and_print_pdb( outid, residue_buffer ) | ||
hasCA = False | ||
hasN = False | ||
hasC = False | ||
for line in residue_buffer: | ||
atomname = line[12:16] | ||
# Only add bb atoms if they have occupancy! | ||
occupancy = float(line[55:60]) | ||
if atomname == " CA " and occupancy > 0.0: | ||
hasCA = True | ||
if atomname == " N " and occupancy > 0.0: | ||
hasN = True | ||
if atomname == " C " and occupancy > 0.0: | ||
hasC = True | ||
|
||
# if all three backbone atoms are present withoccupancy proceed to print the residue | ||
if hasCA and hasN and hasC: | ||
for line in residue_buffer: | ||
# add linear residue count | ||
newnum = '%4d ' % count | ||
line_edit = line[0:22] + newnum + line[27:] | ||
# write the residue line | ||
pdbfile = pdbfile + line_edit | ||
|
||
# finally print residue letter into fasta strea | ||
chain = line[21] | ||
try: | ||
fastaseq[chain] += residue_letter | ||
except KeyError: | ||
fastaseq[chain] = residue_letter | ||
# count up residue number | ||
count = count + 1 | ||
return True | ||
return False | ||
|
||
|
||
def print_help(): | ||
print "clean_pdb.py <pdb> <chain id>" | ||
|
||
print "pdb = file name of the file. Can be with or without the .pdb file handle" | ||
print "chain id = The chain id you are interested in. If more than one chain, " | ||
print "you can pass the chain id without spaces. For example \"AB\" gets you" | ||
print "chain A and B. \"A\" gets you chain A." | ||
print "\n", | ||
print "chain id = nochain. Removes chain identity from output" | ||
print "chain id = ignorechain. Gets all the chains for pdb" | ||
print "\n", | ||
print "written by Phil Bradley, Rhiju Das, Michael Tyka, TJ Brunette, and James Thompson from the Baker Lab. Edits done by Steven Combs, Sam Deluca and Jordan Willis from the Meiler Lab." | ||
sys.exit() | ||
|
||
|
||
if argv.count('-h'): | ||
print_help() | ||
files_to_unlink = [] | ||
try: | ||
assert(len(argv) > 2) | ||
except AssertionError: | ||
print_help() | ||
|
||
pdbname = argv[1].upper() | ||
|
||
if argv[2].strip() != "ignorechain" and argv[2].strip() != "nochain": | ||
chainid = argv[2].upper() | ||
else: | ||
chainid = argv[2] | ||
|
||
if (pdbname[-4:] != '.pdb' and pdbname[-8:] != '.pdb1.gz'): | ||
pdbname += '.pdb' | ||
|
||
# outfile = string.lower(pdbname[0:4]) + chainid + pdbname[4:] | ||
outfile = pdbname[0:-4] + "_" + chainid + ".pdb" | ||
|
||
nopdbout = 0 | ||
if argv.count('nopdbout'): | ||
nopdbout = 1 | ||
|
||
removechain = 0 | ||
if argv.count('nochain'): | ||
removechain = 1 | ||
|
||
ignorechain = 0 | ||
if argv.count('ignorechain'): | ||
ignorechain = 1 | ||
|
||
netpdbname = pdbname | ||
if not exists(netpdbname): | ||
netpdbname = pdbname | ||
|
||
fixed_pdb = pdbname | ||
print "Looking for: ", fixed_pdb | ||
if os.path.isfile(fixed_pdb): | ||
print "Found preoptimised or otherwise fixed PDB file. " | ||
netpdbname = fixed_pdb | ||
else: | ||
print "File %s doesn't exist, downloading from internet." % (netpdbname) | ||
netpdbname = download_pdb(pdbname[0:4], '.') | ||
files_to_unlink.append(netpdbname) | ||
|
||
if netpdbname[-3:] == '.gz': | ||
lines = popen('zcat '+netpdbname, 'r').readlines() | ||
else: | ||
lines = open(netpdbname, 'r').readlines() | ||
|
||
|
||
oldresnum = ' ' | ||
count = 1 | ||
modifiedres = '' | ||
|
||
|
||
residue_buffer = [] | ||
residue_letter = '' | ||
residue_invalid = False | ||
|
||
if chainid == '_': | ||
chainid = ' ' | ||
|
||
for i in range(len(lines)): | ||
line = lines[i] | ||
|
||
if len(line) > 5 and line[:6] == 'ENDMDL': break # Its an NMR model. | ||
chainid = [i for i in chainid] | ||
if (line[21] in chainid or ignorechain or removechain): | ||
line_edit = line | ||
if line[0:3] == 'TER': | ||
continue | ||
elif (line[0:6] == 'HETATM'): | ||
ok = False | ||
|
||
# Is it a modified residue ? | ||
if modres.has_key(line[17:20]): | ||
# if so replace it with its canonical equivalent ! | ||
line_edit = 'ATOM '+line[6:17]+modres[line[17:20]] + line[20:] | ||
modifiedres = modifiedres + line[17:20] + ', ' | ||
# dont count MSEs as modiied residues (cos they're so common and get_pdb deal with them previosuly) | ||
if line[17:20] != "MSE": | ||
shit_stat_modres = True | ||
ok = True | ||
|
||
# other substitution (of atoms mainly) | ||
if (line[17:20] == 'MSE'): # Selenomethionine | ||
if (line_edit[12:14] == 'SE'): | ||
line_edit = line_edit[0:12]+' S'+line_edit[14:] | ||
if len(line_edit) > 75: | ||
if (line_edit[76:78] == 'SE'): | ||
line_edit = line_edit[0:76]+' S'+line_edit[78:] | ||
|
||
if not ok: | ||
continue # skip this atom if we havnt found a conversion | ||
|
||
if line_edit[0:4] == 'ATOM': # or line_edit[0:6] == 'HETATM': | ||
|
||
# if line_edit[13:14]=='P': #Nucleic acid? Skip. | ||
# resnum = line_edit[23:26] | ||
# oldresnum = resnum | ||
# while (resnum == oldresnum): | ||
# print "HERE" | ||
# i += 1 | ||
# line = lines[i] | ||
# resnum = line_edit[23:26] | ||
|
||
resnum = line_edit[22:27] | ||
|
||
insres = line[26] | ||
if insres != ' ': | ||
shit_stat_insres = True | ||
|
||
altpos = line[16] | ||
if altpos != ' ': | ||
shit_stat_altpos = True | ||
# Is thresidue_letter | ||
if not resnum == oldresnum: | ||
if residue_buffer != []: # is there a residue in the buffer ? | ||
if not residue_invalid: | ||
if not check_and_print_pdb(count, residue_buffer, residue_letter): | ||
# if unsuccessful | ||
shit_stat_misdns = True | ||
else: | ||
count = count + 1 | ||
|
||
residue_buffer = [] | ||
residue_letter = "" | ||
residue_invalid = False | ||
|
||
longname = line_edit[17:20] | ||
if longer_names.has_key(longname): | ||
residue_letter = longer_names[longname] | ||
else: | ||
residue_letter = 'X' | ||
residue_invalid = True | ||
|
||
oldresnum = resnum | ||
|
||
# What does this do ? | ||
if line_edit[16:17] == 'A': | ||
line_edit = line_edit[:16]+' '+line_edit[17:] | ||
|
||
if line_edit[16:17] != ' ': | ||
continue | ||
|
||
if removechain: | ||
line_edit = line_edit[0:21]+' '+line_edit[22:] | ||
|
||
residue_buffer.append(line_edit) | ||
|
||
|
||
if not check_and_print_pdb(count, residue_buffer, residue_letter): | ||
# if unsuccessful | ||
shit_stat_misdns = True | ||
else: | ||
count = count + 1 | ||
|
||
|
||
flag_altpos = "---" | ||
if shit_stat_altpos: | ||
flag_altpos = "ALT" | ||
flag_insres = "---" | ||
if shit_stat_insres: | ||
flag_insres = "INS" | ||
flag_modres = "---" | ||
if shit_stat_modres: | ||
flag_modres = "MOD" | ||
flag_misdns = "---" | ||
if shit_stat_misdns: | ||
flag_misdns = "DNS" | ||
|
||
nres = len("".join(fastaseq.values())) | ||
|
||
flag_successful = "OK" | ||
if nres <= 0: | ||
flag_successful = "BAD" | ||
|
||
|
||
print netpdbname, pdbname, "".join(chainid), "%5d" % nres, flag_altpos, flag_insres, flag_modres, flag_misdns, flag_successful | ||
|
||
|
||
if chainid == ' ': | ||
chainid = '_' | ||
if nres > 0: | ||
if (nopdbout == 0): | ||
# outfile = string.lower( basename(outfile) ) | ||
outfile = outfile.replace('.pdb1.gz', '.pdb') | ||
outid = open(outfile, 'w') | ||
outid.write(pdbfile) | ||
outid.write("TER\n") | ||
outid.close() | ||
|
||
fastaid = stdout | ||
if argv[2] != "ignorechain" and argv[2] != "nochain": | ||
for chain in fastaseq: | ||
fastaid.write('>'+pdbname[0:4]+"_"+chain+'\n') | ||
fastaid.write(fastaseq[chain]) | ||
fastaid.write('\n') | ||
handle = open(pdbname[0:4]+"_"+"".join(chain) + ".fasta", 'w') | ||
handle.write('>'+pdbname[0:4]+"_"+"".join(chain)+'\n') | ||
handle.write(fastaseq[chain]) | ||
handle.write('\n') | ||
handle.close() | ||
else: | ||
fastaseq = ["".join(fastaseq.values())] | ||
fastaid.write('>'+pdbname[0:4]+"_"+argv[2]+'\n') | ||
fastaid.writelines(fastaseq) | ||
fastaid.write('\n') | ||
handle = open(pdbname[0:4]+"_"+argv[2] + ".fasta", 'w') | ||
handle.write('>'+pdbname[0:4]+"_"+argv[2]+'\n') | ||
handle.writelines(fastaseq) | ||
handle.write('\n') | ||
handle.close() | ||
|
||
if len(files_to_unlink) > 0: | ||
for file in files_to_unlink: | ||
os.unlink(file) |
Oops, something went wrong.