diff --git a/augur/align.py b/augur/align.py index 4c7d5d99c..6f15228ed 100644 --- a/augur/align.py +++ b/augur/align.py @@ -196,7 +196,7 @@ def read_sequences(*fnames): seqs = {} try: for fname in fnames: - for record in SeqIO.parse(fname, 'fasta'): + for record in SeqIO.parse(fname, 'fasta-pearson'): if record.name in seqs and record.seq != seqs[record.name].seq: raise AlignmentError("Detected duplicate input strains \"%s\" but the sequences are different." % record.name) # if the same sequence then we can proceed (and we only take one) @@ -240,7 +240,7 @@ def read_reference(ref_fname): raise AlignmentError("ERROR: Cannot read reference sequence." "\n\tmake sure the file \"%s\" exists"%ref_fname) try: - ref_seq = SeqIO.read(ref_fname, 'genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta') + ref_seq = SeqIO.read(ref_fname, 'genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta-pearson') except: raise AlignmentError("ERROR: Cannot read reference sequence." "\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname) diff --git a/augur/ancestral.py b/augur/ancestral.py index aa4f805f4..0cb3e9929 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -398,7 +398,7 @@ def run(args): aln = args.alignment ref = None if args.root_sequence: - for fmt in ['fasta', 'genbank']: + for fmt in ['fasta-pearson', 'genbank']: try: ref = str(SeqIO.read(args.root_sequence, fmt).seq).upper() break diff --git a/augur/reconstruct_sequences.py b/augur/reconstruct_sequences.py index 478801855..acc11e348 100644 --- a/augur/reconstruct_sequences.py +++ b/augur/reconstruct_sequences.py @@ -73,7 +73,7 @@ def run(args): if(is_vcf): node_data["nodes"][root_node]['aa_sequences'] = {} with open_file(args.vcf_aa_reference) as handle: - for record in SeqIO.parse(handle, "fasta"): + for record in SeqIO.parse(handle, "fasta-pearson"): if record.id==args.gene: #'root' may not be same as 'reference', so apply any mutations at root here! node_data["nodes"][root_node]['aa_sequences'][record.id] = get_sequence(str(record.seq), node_data["nodes"][root_node]["aa_muts"][record.id]) diff --git a/augur/sequence_traits.py b/augur/sequence_traits.py index 1d09ac6b2..1fa2ef5e1 100644 --- a/augur/sequence_traits.py +++ b/augur/sequence_traits.py @@ -91,7 +91,7 @@ def mutation_struct(): samps = header[sampLoc:] nsamp = len(samps) - for refSeq in SeqIO.parse(ref_file, format='fasta'): + for refSeq in SeqIO.parse(ref_file, format='fasta-pearson'): prots[refSeq.name]['reference'] = str(refSeq.seq) posN = np.unique(prots[refSeq.name]['positions']) prots[refSeq.name]['positions'] = list(posN)