Skip to content

Commit

Permalink
draft finish
Browse files Browse the repository at this point in the history
  • Loading branch information
Tao Zhu committed Jul 19, 2019
1 parent 6ac1485 commit bfed0a5
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 72 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# pysam

# primer3-py
# flask
55 changes: 29 additions & 26 deletions src/make_primers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,40 @@ def calculate_GC(seq):
gc = len([x for x in seq.upper() if x=='G' or x=='C'])
return gc/len(seq)*100

def make_primers(query_file):
def make_primers(query):
'''
In case of user's input, make list of primer dicts similar as the result of design_primer module
Input:
query: a string in multi-lines
'''
primers = {}
with open(query_file) as f:
for line in f:
line_data = re.split(r'\s+', line.strip())
if len(line_data)==4:
(id, rank, seq_F, seq_R) = line_data
rank = int(rank)
else:
(id, seq_F, seq_R) = line_data
rank = 0
if id not in primers:
primers[id] = {}
primers[id]['PRIMER_PAIR_NUM_RETURNED'] = rank+1
primers[id][f'PRIMER_PAIR_{rank}_PENALTY'] = 0
primers[id][f'PRIMER_LEFT_{rank}_SEQUENCE'] = seq_F
primers[id][f'PRIMER_RIGHT_{rank}_SEQUENCE'] = seq_R
primers[id][f'PRIMER_LEFT_{rank}'] = [-1, len(seq_F)]
primers[id][f'PRIMER_RIGHT_{rank}'] = [-1, len(seq_R)]
primers[id][f'PRIMER_LEFT_{rank}_TM'] = primer3.calcTm(seq_F)
primers[id][f'PRIMER_RIGHT_{rank}_TM'] = primer3.calcTm(seq_R)
primers[id][f'PRIMER_LEFT_{rank}_GC_PERCENT'] = calculate_GC(seq_F)
primers[id][f'PRIMER_RIGHT_{rank}_GC_PERCENT'] = calculate_GC(seq_R)
primers[id][f'PRIMER_PAIR_{rank}_PRODUCT_SIZE'] = -1

for line in query.splitlines():
line_data = re.split(r'\s+', line.strip())
if len(line_data)==4:
(id, rank, seq_F, seq_R) = line_data
rank = int(rank)
else:
(id, seq_F, seq_R) = line_data
rank = 0

if id not in primers:
primers[id] = {}
primers[id]['PRIMER_PAIR_NUM_RETURNED'] = rank+1
primers[id][f'PRIMER_PAIR_{rank}_PENALTY'] = 0
primers[id][f'PRIMER_LEFT_{rank}_SEQUENCE'] = seq_F
primers[id][f'PRIMER_RIGHT_{rank}_SEQUENCE'] = seq_R
primers[id][f'PRIMER_LEFT_{rank}'] = [-1, len(seq_F)]
primers[id][f'PRIMER_RIGHT_{rank}'] = [-1, len(seq_R)]
primers[id][f'PRIMER_LEFT_{rank}_TM'] = primer3.calcTm(seq_F)
primers[id][f'PRIMER_RIGHT_{rank}_TM'] = primer3.calcTm(seq_R)
primers[id][f'PRIMER_LEFT_{rank}_GC_PERCENT'] = calculate_GC(seq_F)
primers[id][f'PRIMER_RIGHT_{rank}_GC_PERCENT'] = calculate_GC(seq_R)
primers[id][f'PRIMER_PAIR_{rank}_PRODUCT_SIZE'] = -1

return primers

if __name__ == "__main__":
primers = make_primers('tests/query_check')
print(primers)
with open('tests/query_check') as f:
primers = make_primers(f.read())
print(primers)
59 changes: 31 additions & 28 deletions src/make_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,37 +25,39 @@ def faidx(template_file, region_string):
result_seqs[seq_split[0]] = ''.join(seq_split[1:])
return result_seqs

def build(query_file, template_file, primer_type):
def build(query, template_file, primer_type):
'''
return a list of primer sites dict that can be passed to the design_primer module
[{
'id': ,
'template': ,
'type': ,
'pos': ,
'length':,
'size_min':,
'size_max':
}]
Input:
query: a string in multi-lines
Return:
a list of primer sites dict that can be passed to the design_primer module
[{
'id': ,
'template': ,
'type': ,
'pos': ,
'length':,
'size_min':,
'size_max':
}]
'''
primer_sites = []
retrieve_region2raw_region = {}
with open(query_file) as f:
for line in f:
query_data = re.split(r'\s+', line.strip())
(chr, pos, length, size_min, size_max) = (query_data[0], 1, 1, 70, 1000)
if len(query_data)>1 : # seq ID and pos
pos = int(query_data[1].replace(',', ''))
if len(query_data)>2 :
length = int(query_data[2])
if len(query_data)>3 :
size_min = int(query_data[3])
size_max = int(query_data[4])
for line in query.splitlines():
query_data = re.split(r'\s+', line.strip())
(chr, pos, length, size_min, size_max) = (query_data[0], 1, 1, 70, 1000)
if len(query_data)>1 : # seq ID and pos
pos = int(query_data[1].replace(',', ''))
if len(query_data)>2 :
length = int(query_data[2])
if len(query_data)>3 :
size_min = int(query_data[3])
size_max = int(query_data[4])

# retrieve_start and retrieve_end are used by samtools to extract template sequences
retrieve_start = max(pos-size_max, 1)
retrieve_end = pos+length+size_max
retrieve_region2raw_region[f'{chr}:{retrieve_start}-{retrieve_end}'] = [chr, pos, length, size_min, size_max, retrieve_start]
# retrieve_start and retrieve_end are used by samtools to extract template sequences
retrieve_start = max(pos-size_max, 1)
retrieve_end = pos+length+size_max
retrieve_region2raw_region[f'{chr}:{retrieve_start}-{retrieve_end}'] = [chr, pos, length, size_min, size_max, retrieve_start]

retrieve_region_string = '\n'.join(retrieve_region2raw_region.keys())

Expand All @@ -75,5 +77,6 @@ def build(query_file, template_file, primer_type):
return primer_sites

if __name__ == "__main__":
primer_sites = build(query_file='tests/query_design', template_file='tests/example.fa', primer_type='SEQUENCE_TARGET')
print(json.dumps(primer_sites))
with open('tests/query_design') as f:
primer_sites = build(query=f.read(), template_file='tests/example.fa', primer_type='SEQUENCE_TARGET')
print(json.dumps(primer_sites, indent=4))
29 changes: 18 additions & 11 deletions src/output.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
import json
import sys
import os

def text(primers, dbs):
def after_check(primers, dbs, file):
print('#Site_ID', 'Primer_Rank', 'Primer_Seq_Left', 'Primer_Seq_Right', 'Target_Amplicon_Size', \
'Primer_Pair_Penalty_Score', 'Database', 'Possible_Amplicon_Number', \
'Primer_Rank_in_Primer3_output', sep='\t')
'Primer_Rank_in_Primer3_output', 'Tm_left', 'Tm_Right', sep='\t', file=file)
for db in dbs:
for (id, primers) in primers.items():
if 'PRIMER_PAIR_NUM_RETURNED_FINAL' not in primers:
print(id, 'No_Primer\t', sep='\t', end='')
# No primers
if primers['PRIMER_PAIR_NUM_RETURNED']==0:
print(id, 'No_Primer\t', sep='\t', end='', file=file)
if 'PRIMER_LEFT_EXPLAIN' in primers:
print(primers['PRIMER_LEFT_EXPLAIN'].replace(' ', '_'), \
primers['PRIMER_RIGHT_EXPLAIN'].replace(' ', '_'), \
primers['PRIMER_PAIR_EXPLAIN'].replace(' ', '_'), \
sep='\t')
print('###')
sep='\t', file=file)
print('###', file=file)
continue

for amplicon_rank in range(0, primers['PRIMER_PAIR_NUM_RETURNED_FINAL']):
db_base = os.path.basename(db)
print_data = []
print_data.append(id)
print_data.append(str(amplicon_rank))
Expand All @@ -25,13 +29,16 @@ def text(primers, dbs):
print_data.append(primers[f'PRIMER_RIGHT_{raw_rank}_SEQUENCE'])
print_data.append(str(primers[f'PRIMER_PAIR_{raw_rank}_PRODUCT_SIZE']))
print_data.append(str(primers[f'PRIMER_PAIR_{raw_rank}_PENALTY']))
print_data.append(db)
print_data.append(str(len(primers[db][f'PRIMER_PAIR_{raw_rank}_AMPLICONS'])))
print_data.append(db_base)
print_data.append(str(len(primers[db_base][f'PRIMER_PAIR_{raw_rank}_AMPLICONS'])))
print_data.append(str(raw_rank))
print('\t'.join(print_data))
print('###')
print_data.append(str(round(primers[f'PRIMER_LEFT_{raw_rank}_TM'], 2)))
print_data.append(str(round(primers[f'PRIMER_RIGHT_{raw_rank}_TM'], 2)))
print('\t'.join(print_data), file=file)
print('###', file=file)


if __name__ == "__main__":
primers = json.load(open('tests/sort_primers.json'))
dbs = ['example.fa']
text(primers, dbs)
after_check(primers, dbs, file=sys.stdout)
99 changes: 99 additions & 0 deletions src/primertool
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3

'''Here is the title
Here is the document
'''

__author__ = 'Tao Zhu'
__copyright__ = 'Copyright 2019'
__license__ = 'GPL'
__version__ = '0.1'
__email__ = '[email protected]'
__status__ = 'Development'

import argparse
import re
import os
import json
import shutil

from distutils.version import LooseVersion

import make_sites, make_primers, design_primer, run_blast, sort_primers, output

parser = argparse.ArgumentParser(description='PrimerServer2: a high-throughput primer \
design and specificity-checking platform', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('query', help='query file. (STDIN is acceptable)', type=argparse.FileType('r'))
parser.add_argument('template', help='template file in FASTA format')

group = parser.add_mutually_exclusive_group()
group.add_argument('--no-specificity', help="Don't check specificity; Only design primers", action='store_true')
group.add_argument('--only-specificity', help="Only check specificity and skip primer designs", action='store_true')
parser.add_argument('--type', choices=['SEQUENCE_TARGET', 'SEQUENCE_INCLUDED_REGION', 'FORCE_END'],\
help='primer types', default='SEQUENCE_TARGET')
parser.add_argument('-p', '--cpu', type=int, help='Used CPU number.', default=2)
parser.add_argument('-l', '--checking-size-min', type=int, help='Lower limit of the checking amplicon size range (bp).', \
default=70)
parser.add_argument('-u', '--checking-size-max', type=int, help='Upper limit of the checking amplicon size range (bp).', \
default=1000)
parser.add_argument('-r', '--primer-num-retain', type=int, help='The maximum number of primers for each site to return.', \
default=10)
parser.add_argument('-a', '--report-amplicon-seqs', help="Get amplicon seqs (might be slow)", action='store_true')
parser.add_argument('--json-debug', help="Output debug information in JSON mode", action='store_true')
parser.add_argument('-o', '--out', help="Output primers in JSON format", type=argparse.FileType('w'))
parser.add_argument('-t', '--tsv', help="Output primers in TSV format", type=argparse.FileType('w'))
args = parser.parse_args()

def error(msg):
if args.json_debug is True:
print(json.dumps({'ERROR': msg}))
raise Exception(msg)

############### Check Environments ###############
if shutil.which('samtools') is None:
error('No samtools detected in your system')

samtools_version = os.popen('samtools --version').readlines()[0].strip().split(' ')[1]
if LooseVersion(samtools_version) < LooseVersion('1.9'):
error(f'Your samtools version is v{samtools_version}, but >=v1.9 is required')

if shutil.which('blastn') is None:
error('No NCBI-BLAST+ (blastn) detected in your system')

if shutil.which('makeblastdb') is None:
error('No NCBI-BLAST+ (makeblastdb) detected in your system')

############### Check Template File (FASTA) #######
if os.path.isfile(args.template) is False:
error(f'File not found: {args.template}')
if os.path.isfile(args.template+'.fai') is False:
code = os.system(f'samtools faidx {args.template} 2>/dev/null')
if code != 0:
error(f'File {args.template} cannot be indexed by samtools faidx. Perhaps it is not in FASTA format')

############### Run ###############################
#### primers ####
query_string = args.query.read()
if args.only_specificity is True:
primers = make_primers.make_primers(query=query_string)
else:
sites = make_sites.build(query=query_string, template_file=args.template, primer_type=args.type)
primers = design_primer.multiple(sites, cpu=args.cpu)

#### specificity ####
dbs = [args.template]
if args.no_specificity is False:
primers = run_blast.run_blast_parallel(primers=primers, dbs=dbs, cpu=args.cpu,\
checking_size_max=args.checking_size_max, checking_size_min=args.checking_size_min, \
report_amplicon_seq=args.report_amplicon_seqs)
primers = sort_primers.sort_rank(primers=primers, dbs=dbs, max_num_return=args.primer_num_retain)

#### output #########
if args.out is not None:
print(json.dumps(primers, indent=4), file=args.out)
else:
print(json.dumps(primers, indent=4))

if args.tsv is not None:
if args.no_specificity is False:
output.after_check(primers, dbs, file=args.tsv)
11 changes: 6 additions & 5 deletions src/run_blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def run_blast_parallel(primers, dbs, cpu=2, checking_size_min=70, checking_size_
p3_input = {
'id': id,
'rank': rank,
'db': 'tests/'+ db,
'db': db,
'seq_L': primer[f'PRIMER_LEFT_{rank}_SEQUENCE'],
'seq_R': primer[f'PRIMER_RIGHT_{rank}_SEQUENCE'],
'checking_size_min': checking_size_min,
Expand All @@ -63,7 +63,8 @@ def run_blast_parallel(primers, dbs, cpu=2, checking_size_min=70, checking_size_
return primers

if __name__ == "__main__":
primers = make_primers('tests/query_check_multiple')
dbs = ['example.fa']
report_primers = run_blast_parallel(primers, dbs, cpu=10, report_amplicon_seq=True)
print(json.dumps(report_primers))
with open('tests/query_check_multiple') as f:
primers = make_primers(query=f.read())
dbs = ['tests/example.fa']
report_primers = run_blast_parallel(primers, dbs, cpu=10, report_amplicon_seq=True)
print(json.dumps(report_primers, indent=4))
3 changes: 2 additions & 1 deletion src/sort_primers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os

def sort_rank(primers, dbs, max_num_return=10):
'''
Expand All @@ -14,7 +15,7 @@ def sort_rank(primers, dbs, max_num_return=10):
'''
for (id, primer) in primers.items():
rank_to_amplicon_valid_num = {}
main_db = dbs[0]
main_db = os.path.basename(dbs[0])
for rank in range(0, primer['PRIMER_PAIR_NUM_RETURNED']):
if f'PRIMER_PAIR_{rank}_AMPLICONS' not in primer[main_db]:
continue
Expand Down

0 comments on commit bfed0a5

Please sign in to comment.