Replace usage of Bio.SeqIO.read with read_sequence

This centralizes customization of the Biopython call.
nextstrain · Jan 21, 2025 · cc365cb · cc365cb
1 parent 7a2b08e
commit cc365cb
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 8 deletions.
diff --git a/augur/align.py b/augur/align.py
@@ -8,7 +8,7 @@
 from Bio import AlignIO, SeqIO, Seq, Align
 from .argparse_ import ExtendOverwriteDefault
 from .io.file import open_file
-from .io.sequences import read_sequences as io_read_sequences
+from .io.sequences import read_sequence, read_sequences as io_read_sequences
 from .io.shell_command_runner import run_shell_command
 from .io.vcf import shquote
 from .utils import nthreads_value
@@ -241,7 +241,7 @@ def read_reference(ref_fname):
         raise AlignmentError("ERROR: Cannot read reference sequence."
                              "\n\tmake sure the file \"%s\" exists"%ref_fname)
     try:
-        ref_seq = SeqIO.read(ref_fname, 'genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta')
+        ref_seq = read_sequence(ref_fname, format='genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta')
     except:
         raise AlignmentError("ERROR: Cannot read reference sequence."
                 "\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname)

diff --git a/augur/ancestral.py b/augur/ancestral.py
@@ -32,6 +32,7 @@
 from .utils import parse_genes_argument, read_tree, InvalidTreeError, write_json, get_json_name, \
     genome_features_to_auspice_annotation
 from .io.file import open_file
+from .io.sequences import read_sequence
 from .io.vcf import is_vcf as is_filename_vcf
 from treetime.vcf_utils import read_vcf, write_vcf
 from collections import defaultdict
@@ -400,7 +401,7 @@ def run(args):
         if args.root_sequence:
             for fmt in ['fasta', 'genbank']:
                 try:
-                    ref = str(SeqIO.read(args.root_sequence, fmt).seq).upper()
+                    ref = str(read_sequence(args.root_sequence, format=fmt).seq).upper()
                     break
                 except:
                     pass

diff --git a/augur/export_v1.py b/augur/export_v1.py
@@ -12,7 +12,7 @@
 from .argparse_ import ExtendOverwriteDefault
 from .errors import AugurError
 from .io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_metadata
-from .io.sequences import read_sequences
+from .io.sequences import read_sequence, read_sequences
 from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors
 
 def convert_tree_to_json_structure(node, metadata, div=0, strains=None):
@@ -298,8 +298,7 @@ def get_root_sequence(root_node, ref=None, translations=None):
     '''
     root_sequence = {}
     if ref and translations:
-        from Bio import SeqIO
-        refseq = SeqIO.read(ref, 'fasta')
+        refseq = read_sequence(ref)
         root_sequence['nuc']=str(refseq.seq)
         for gene in read_sequences(translations):
             root_sequence[gene.id] = str(gene.seq)

diff --git a/augur/utils.py b/augur/utils.py
@@ -11,6 +11,7 @@
 
 from augur.data import as_file
 from augur.io.file import PANDAS_READ_CSV_OPTIONS, open_file
+from augur.io.sequences import read_sequence
 from augur.io.print import print_err
 
 from augur.types import ValidationMode
@@ -409,8 +410,7 @@ def _read_genbank(reference, feature_names):
         If 'nuc' annotation not parsed
         If a CDS feature is given the name 'nuc'
     """
-    from Bio import SeqIO
-    gb = SeqIO.read(reference, 'genbank')
+    gb = read_sequence(reference, format='genbank')
     features = {
         'nuc': _read_nuc_annotation_from_genbank(gb, reference)
     }