Skip to content

Commit

Permalink
Merge pull request #65 from grexor/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
grexor authored Dec 10, 2024
2 parents 4172189 + c21a8d6 commit 6ce4fb7
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pybio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def genome_prepare(species, genome_version, args):
if pybio.utils.is_tool("STAR") and not args.nostar:
star_folder = os.path.join(pybio.config.genomes_folder, f"{species}.assembly.{genome_version}.star")
if not pybio.core.genomes.genomes_present.get(species, {}).get(genome_version, {}).get("STAR", False) or not os.path.exists(star_folder):
return_code = pybio.core.genomes.star_index(species, genome_version, threads=args.threads)
return_code = pybio.core.genomes.star_index(species, genome_version, args)
if return_code==0:
pybio.core.genomes.genomes_present[species][genome_version]["STAR"] = True
# write gingo file
Expand Down
15 changes: 11 additions & 4 deletions pybio/core/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def download_annotation(species, genome_version):
command = script.format(shell=pybio.config.shell, gtf_url=gtf_url, gdir=pybio.config.genomes_folder, species=species, genome_version=genome_version)
return os.system(command)

def star_index(species, genome_version, threads=1):
def star_index(species, genome_version, args):
species_capital = species.capitalize()
assembly = species_db.get(species, {}).get("assembly", species)
ensembl_version = genome_version.replace("ensembl", "")
Expand All @@ -394,12 +394,19 @@ def star_index(species, genome_version, threads=1):
mkdir {species}.assembly.{genome_version}.star >/dev/null 2>&1
cd {species}.assembly.{genome_version}.star
gunzip -f -k ../{species}.annotation.{genome_version}/{species}.gtf.gz # -k to keep both .gz and uncompressed GTF, some tools require uncompressed GTF
STAR --runMode genomeGenerate --genomeSAindexNbases {genomeSAindexNbases} --genomeDir ../{species}.assembly.{genome_version}.star --genomeFastaFiles ../{species}.assembly.{genome_version}/{species}.fasta --runThreadN {threads} --sjdbGTFfile ../{species}.annotation.{genome_version}/{species}.gtf
STAR --runMode genomeGenerate {genomeChrBinNbits} --genomeSAindexNbases {genomeSAindexNbases} --genomeDir ../{species}.assembly.{genome_version}.star --genomeFastaFiles ../{species}.assembly.{genome_version}/{species}.fasta --runThreadN {threads} --sjdbGTFfile ../{species}.annotation.{genome_version}/{species}.gtf
"""
fasta_file = f"{pybio.config.genomes_folder}/{species}.assembly.{genome_version}/{species}.fasta"
fasta_size = os.path.getsize(fasta_file)
genomeSAindexNbases = int(min(14, math.log(fasta_size, 2)/2 - 1))
command = script.format(shell=pybio.config.shell, threads=threads, genomeSAindexNbases=genomeSAindexNbases, gdir=pybio.config.genomes_folder, species=species, species_capital=species_capital, assembly=assembly, ensembl_version=ensembl_version, genome_version=genome_version)
if not args.genomeSAindexNbases:
genomeSAindexNbases = int(min(14, math.log(fasta_size, 2)/2 - 1))
else:
genomeSAindexNbases = int(args.genomeSAindexNbases)
if args.genomeChrBinNbits:
genomeChrBinNbits = f"--genomeChrBinNbits {args.genomeChrBinNbits}"
else:
genomeChrBinNbits = ""
command = script.format(shell=pybio.config.shell, threads=args.threads, genomeChrBinNbits=genomeChrBinNbits, genomeSAindexNbases=genomeSAindexNbases, gdir=pybio.config.genomes_folder, species=species, species_capital=species_capital, assembly=assembly, ensembl_version=ensembl_version, genome_version=genome_version)
return os.system(command)

def salmon_index(species, genome_version):
Expand Down
21 changes: 13 additions & 8 deletions pybio/pybio
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ parser.add_argument("-gtf", "--gtf", help="GTF file to import")
parser.add_argument("-threads", "--threads", '-t', '--t', default="1", help="Number of threads to use (default: 1)")
parser.add_argument("-help", "-h", "--help", action="store_true")
parser.add_argument("-xs", "--xs", help="Add '--outSAMstrandField intronMotif'", action="store_false")
parser.add_argument("-genomeSAindexNbases", "--genomeSAindexNbases", help="STAR genomeSAindexNbases")
parser.add_argument("-genomeChrBinNbits", "--genomeChrBinNbits", help="STAR genomeChrBinNbits")
args = parser.parse_args()

help_0 = """
Expand Down Expand Up @@ -87,9 +89,9 @@ Example:
$ pybio sam2bam file1.sam file1.bam
"""

print(f"[pybio] v{pybio.version}, https://github.com/grexor/pybio")
print(f"[pybio] config file: {pybio.config.config_fname()}")
print(f"[pybio] genomes folder: {pybio.config.genomes_folder}")
print(f"pybio | v{pybio.version}, https://github.com/grexor/pybio")
print(f"pybio | config file: {pybio.config.config_fname()}")
print(f"pybio | genomes folder: {pybio.config.genomes_folder}")
print()

if args.version:
Expand Down Expand Up @@ -118,12 +120,12 @@ def determine_species(species):
return []

def display_potential_hits(potential_hits, search_text):
print(f"We found {len(potential_hits)} genome hits for your provided genome species `{search_text}`.\n")
print(f"pybio found {len(potential_hits)} genome hits for your provided genome species `{search_text}`.\n")
for hit in potential_hits:
print(f"Species = '{hit[1]}', display name = '{hit[2]}'")
print(f"Species = '\033[32;1m{hit[1]}\033[0m', display name = '{hit[2]}'")
if len(potential_hits)>0:
print(f"\nFor example, to download the first hit from the list above, you could write:\n")
print(f"$ pybio genome {potential_hits[0][1]}")
print(f"$ \033[32;1mpybio genome {potential_hits[0][1]}\033[0m")
print()
sys.exit(1)

Expand All @@ -141,7 +143,7 @@ def resolve_species_version(species, args, downloaded_only=False):
genome_version = args.commands[2]
return species.lower(), genome_version, potential_hits
if len(args.commands)>=2:
if args.commands[0] not in ["search", "genome", "path", "species"]:
if args.commands[0] not in ["search", "genome", "genomes", "path", "species"]:
genome_version = args.commands[1]
return species.lower(), genome_version, potential_hits
genome_version = determine_genome_version(species, downloaded_only=downloaded_only)
Expand All @@ -167,7 +169,7 @@ def determine_genome_version(species, downloaded_only):
return pybio.core.genomes.species_db.get(species, {}).get("genome_version", None)

def handle_genome(args):
search_string = args.commands[1] if args.commands[0] in ["genome", "search"] else args.commands[0]
search_string = args.commands[1] if args.commands[0] in ["genome", "genomes", "search"] else args.commands[0]
species, genome_version, potential_hits = resolve_species_version(search_string, args)
if len(potential_hits)>1:
display_potential_hits(potential_hits, species)
Expand Down Expand Up @@ -225,6 +227,9 @@ if len(args.commands)>0:
sys.exit()

if args.commands[0] in ["genome", "genomes"]:
if len(args.commands)<2:
print("Please provide a search term for the species")
sys.exit()
handle_genome(args)
sys.exit()

Expand Down

0 comments on commit 6ce4fb7

Please sign in to comment.