Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added SLURM support #48

Merged
merged 1 commit into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions splicekit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def junctions_master():
if splicekit.config.platform=="desktop":
splicekit.core.mprocess("jobs/count_junctions/process.sh")
splicekit.core.junctions.make_master()
if splicekit.config.platform=="SLURM":
os.system("jobs=( $(ls jobs/count_junctions/*.job) ); g=10; " "for((i=0; i < ${#jobs[@]}; i+=g)); do " "part=( \"${jobs[@]:i:g}\" ); " "for job_fname in ${part[*]}; do " "echo \"splicekit | features | junctions | submitted $job_fname\"; " "sbatch --mem=8G --parsable ${job_fname} & " "done; wait; " "echo \"splicekit | features | junctions | processing next 10\"; " "done; " "echo \"splicekit | features | junctions | processing complete\"")
os.system("sbatch --partition=short --mem=16G --output=/dev/null --error=/dev/null " "--wrap=\"python -c 'import splicekit; splicekit.core.junctions.make_master()'\"")

def junctions():
splicekit.core.annotation.make_comparisons()
Expand Down
30 changes: 28 additions & 2 deletions splicekit/core/anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,33 @@ def write_jobs_featureCounts(library_type='single-end', library_strand='NONE'):
jobs_dir = f'jobs/count_{anchor_type}_anchors'
logs_dir = f'logs/count_{anchor_type}_anchors'

job_anchors="""
if config.platform == 'SLURM':

job_anchors="""
#!/bin/bash
#SBATCH --job-name={anchor_type}_anchors_{sample_id} # Job name
#SBATCH --ntasks=12 # Number of tasks
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --partition=short # Select queue
#SBATCH --output={logs_dir}/{anchor_type}_anchors_{sample_id}.out # Output file
#SBATCH --error={logs_dir}/{anchor_type}_anchors_{sample_id}.err # Error file

module load Subread/2.0.3-GCC-9.3.0
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t anchor -g {anchor_type}_anchor_id -a {gtf_fname} -o {out_fname} {sam_fname}
# featureCount outputs command as first line of file, get rid of this first line and replace header for further parsing
# next, we are only interested in the 1st and 7th column (anchor_id and count)
cp {out_fname} {out_fname}_temp
# make header line of file and overwrite out_fname as new file
echo "{header_line}" >| {out_fname}
tail -n +3 {out_fname}_temp| cut -f1,7 >> {out_fname}
rm {out_fname}_temp
# move summary from featureCount to logs
mv {out_fname}.summary {logs_dir}/
gzip -f {out_fname}
"""
else:

job_anchors="""
#!/bin/bash
#BSUB -J {anchor_type}_anchors_{sample_id} # Job name
#BSUB -n 12 # number of tasks
Expand All @@ -92,7 +118,7 @@ def write_jobs_featureCounts(library_type='single-end', library_strand='NONE'):
# move summary from featureCount to logs
mv {out_fname}.summary {logs_dir}/
gzip -f {out_fname}
"""
"""

job_sh_anchors="""
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t anchor -g {anchor_type}_anchor_id -a {gtf_fname} -o {out_fname} {sam_fname}
Expand Down
42 changes: 38 additions & 4 deletions splicekit/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,23 @@ def make_comparisons():
annotation.samples = [str(el) for el in annotation.samples]

def write_comparisons():
job_rmats="""
if config.platform == 'SLURM':
job_rmats="""
#!/bin/bash
#SBATCH --job-name={job_name} # Job name
#SBATCH --ntasks=1 # Number of tasks
#SBATCH --mem=8G # Allocate memory
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --partition=short # Select queue
#SBATCH --output=logs/rmats/{comparison_name}.out # Output file
#SBATCH --error=logs/rmats/{comparison_name}.err # Error file

{container} run_rmats --b1 results/rmats/{comparison_name}_test.tab --b2 results/rmats/{comparison_name}_control.tab --gtf {gtf_path} -t paired --readLength 150 --variable-read-length --allow-clipping --nthread 4 --od results/rmats/{comparison_name}_results --tmp results/rmats/{comparison_name}_temp
"""

else:

job_rmats="""
#!/bin/bash
#BSUB -J {job_name} # job name
#BSUB -n 1 # number of tasks
Expand All @@ -148,7 +164,7 @@ def write_comparisons():
#BSUB -e logs/rmats/{comparison_name}.err # error file

{container} run_rmats --b1 results/rmats/{comparison_name}_test.tab --b2 results/rmats/{comparison_name}_control.tab --gtf {gtf_path} -t paired --readLength 150 --variable-read-length --allow-clipping --nthread 4 --od results/rmats/{comparison_name}_results --tmp results/rmats/{comparison_name}_temp
"""
"""

comps_table = open("annotation/comparisons.tab", "wt")
header = ["comparison", "control_samples", "test_samples", "control_group_id", "test_group_id"]
Expand Down Expand Up @@ -186,7 +202,25 @@ def write_comparisons():
write_edgeR_jobs()

def write_edgeR_jobs():
job_edgeR="""
if config.platform == 'SLURM':
job_edgeR="""
#!/bin/bash
#SBATCH --job-name={job_name} # Job name
#SBATCH --ntasks=1 # Number of tasks
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --mem=8G # Allocate memory
#SBATCH --partition=short # Select queue
#SBATCH --output=logs/edgeR/{atype}/{comparison_name}.out # Output file
#SBATCH --error=logs/edgeR/{atype}/{comparison_name}.err # Error file

module load R
{container} R --no-save --args {input_folder} {atype} {control_name} {test_name} {comparison_name} {sample_membership} {filter_low} < {core_path}/comps_edgeR.R
"""


else:

job_edgeR="""
#!/bin/bash
#BSUB -J {job_name} # job name
#BSUB -n 1 # number of tasks
Expand All @@ -198,7 +232,7 @@ def write_edgeR_jobs():

ml R
{container} R --no-save --args {input_folder} {atype} {control_name} {test_name} {comparison_name} {sample_membership} {filter_low} < {core_path}/comps_edgeR.R
"""
"""

job_sh_edgeR="""{container} R --no-save --args {input_folder} {atype} {control_name} {test_name} {comparison_name} {sample_membership} {filter_low} < {core_path}/comps_edgeR.R"""
fsh_exons = open(f"jobs/edgeR/exons/process.sh", "wt")
Expand Down
28 changes: 27 additions & 1 deletion splicekit/core/exons.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,33 @@ def write_jobs_featureCounts(library_type='single-end', library_strand='NONE'):
jobs_dir = f'jobs/count_exons'
logs_dir = f'logs/count_exons'

job_exons="""
if config.platform == 'SLURM':
job_exons="""
#!/bin/bash
#SBATCH --job-name=count_exons_{sample_id} # Job name
#SBATCH --ntasks=12 # Number of tasks
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --partition=short # Select queue
#SBATCH --output={logs_dir}/exons_{sample_id}.out # Output file
#SBATCH --error={logs_dir}/exons_{sample_id}.err # Error file

module load Subread/2.0.3-GCC-9.3.0
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t exon -g exon_id -a {gtf_fname} -o {out_fname} {sam_fname}
# featureCount outputs command as first line of file, get rid of this first line and replace header for further parsing
# next, we are only interested in the 1st and 7th column (exon_id and count)
cp {out_fname} {out_fname}_temp
# make header line of file and overwrite out_fname as new file
echo "{header_line}" >| {out_fname}
tail -n +3 {out_fname}_temp| cut -f1,7 >> {out_fname}
rm {out_fname}_temp
gzip -f {out_fname}
# move summary from featureCount to logs
mv {out_fname}.summary {logs_dir}/
gzip -f {out_fname}
"""

else:
job_exons="""
#!/bin/bash
#BSUB -J count_exons_{sample_id} # Job name
#BSUB -n 12 # number of tasks
Expand Down
75 changes: 51 additions & 24 deletions splicekit/core/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,30 +75,57 @@ def write_jobs_featureCounts(library_type='single-end', library_strand='NONE'):
jobs_dir = f"jobs/count_genes"
logs_dir = f"logs/count_genes"

job_genes="""
#!/bin/bash
#BSUB -J count_genes_{sample_id} # Job name
#BSUB -n 12 # number of tasks
#BSUB -R "span[hosts=1]" # Allocate all tasks in 1 host
#BSUB -q short # Select queue
#BSUB -o {logs_dir}/genes_{sample_id}.out # Output file
#BSUB -e {logs_dir}/genes_{sample_id}.err # Error file

ml .testing
ml Subread/2.0.3-GCC-9.3.0
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t exon -g gene_id -a {gtf_fname} -o {out_fname} {sam_fname}
# featureCount outputs command as first line of file, get rid of this first line and replace header for further parsing
# next, we are only interested in the 1st and 7th column (gene_id and count)
cp {out_fname} {out_fname}_temp
# make header line of file and overwrite out_fname as new file
echo "{header_line}" >| {out_fname}
# sum exon counts to gene counts with awk
tail -n +3 {out_fname}_temp| cut -f1,7 | awk '{{sum[$1]+=$2}} END {{OFS="\\t"; for (i in sum) print i, sum[i]}}' | sort -n >> {out_fname}
rm {out_fname}_temp
# move summary from featureCount to logs
mv {out_fname}.summary {logs_dir}/
gzip -f {out_fname}
"""
if config.platform == 'SLURM':
job_genes="""
#!/bin/bash
#SBATCH --job-name=count_genes_{sample_id} # Job name
#SBATCH --ntasks=12 # Number of tasks
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --partition=short # Select queue
#SBATCH --output={logs_dir}/genes_{sample_id}.out # Output file
#SBATCH --error={logs_dir}/genes_{sample_id}.err # Error file


module load Subread/2.0.3-GCC-9.3.0
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t exon -g gene_id -a {gtf_fname} -o {out_fname} {sam_fname}
# featureCount outputs command as first line of file, get rid of this first line and replace header for further parsing
# next, we are only interested in the 1st and 7th column (gene_id and count)
cp {out_fname} {out_fname}_temp
# make header line of file and overwrite out_fname as new file
echo "{header_line}" >| {out_fname}
# sum exon counts to gene counts with awk
tail -n +3 {out_fname}_temp| cut -f1,7 | awk '{{sum[$1]+=$2}} END {{OFS="\\t"; for (i in sum) print i, sum[i]}}' | sort -n >> {out_fname}
rm {out_fname}_temp
# move summary from featureCount to logs
mv {out_fname}.summary {logs_dir}/
gzip -f {out_fname}
"""

else:
job_genes="""
#!/bin/bash
#BSUB -J count_genes_{sample_id} # Job name
#BSUB -n 12 # number of tasks
#BSUB -R "span[hosts=1]" # Allocate all tasks in 1 host
#BSUB -q short # Select queue
#BSUB -o {logs_dir}/genes_{sample_id}.out # Output file
#BSUB -e {logs_dir}/genes_{sample_id}.err # Error file

ml .testing
ml Subread/2.0.3-GCC-9.3.0
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t exon -g gene_id -a {gtf_fname} -o {out_fname} {sam_fname}
# featureCount outputs command as first line of file, get rid of this first line and replace header for further parsing
# next, we are only interested in the 1st and 7th column (gene_id and count)
cp {out_fname} {out_fname}_temp
# make header line of file and overwrite out_fname as new file
echo "{header_line}" >| {out_fname}
# sum exon counts to gene counts with awk
tail -n +3 {out_fname}_temp| cut -f1,7 | awk '{{sum[$1]+=$2}} END {{OFS="\\t"; for (i in sum) print i, sum[i]}}' | sort -n >> {out_fname}
rm {out_fname}_temp
# move summary from featureCount to logs
mv {out_fname}.summary {logs_dir}/
gzip -f {out_fname}
"""

job_sh_genes="""
{container} featureCounts {library_type_insert}-s {library_strand_insert} -M -O -T 12 -F GTF -f -t exon -g gene_id -a {gtf_fname} -o {out_fname} {sam_fname}
Expand Down
21 changes: 19 additions & 2 deletions splicekit/core/jbrowse2.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,24 @@ def write_sample_jobs(force_samples):
os.system("rm -r jobs/jobs_jbrowse/* >/dev/null 2>&1") # clean up previous jobs

# create bigwig and then cram files
job_bw="""
if config.platforn == 'SLURM':
job_bw="""
#!/bin/bash
#SBATCH --job-name={sample}_jbrowse # Job name
#SBATCH --ntasks=4 # Number of tasks
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --partition=short # Select queue
#SBATCH --output=logs/logs_jbrowse/{sample}.out # Output file
#SBATCH --error=logs/logs_jbrowse/{sample}.err # Error file

{container} bamCoverage --ignoreDuplicates --binSize {bamCoverage_binSize} -b {bam_fname} -o {bigwig_fname} -of bigwig
{container} samtools view -C -T {genome_fa} {bam_fname} -O CRAM -o {cram_fname}
{container} samtools index {cram_fname}
"""

else:

job_bw="""
#!/bin/bash
#BSUB -J {sample}_jbrowse # job name
#BSUB -n 4 # number of tasks
Expand All @@ -65,7 +82,7 @@ def write_sample_jobs(force_samples):
{container} bamCoverage --ignoreDuplicates --binSize {bamCoverage_binSize} -b {bam_fname} -o {bigwig_fname} -of bigwig
{container} samtools view -C -T {genome_fa} {bam_fname} -O CRAM -o {cram_fname}
{container} samtools index {cram_fname}
"""
"""

job_sh_bw="""
{container} bamCoverage --ignoreDuplicates --binSize {bamCoverage_binSize} -b {bam_fname} -o {bigwig_fname} -of bigwig 2> logs/logs_jbrowse/{sample}.err
Expand Down
2 changes: 1 addition & 1 deletion splicekit/splicekit.config.template
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ protein = "K562.TARDBP.0"
protein_label = "tdp43" # used in file names and titles

# platform and container
platform = "desktop" # "desktop" or "cluster" (HPC with SLURM)
platform = "desktop" # "desktop" or "cluster" or "SLURM" (Cluster refers to HPC systems with LSF queue manager, SLURM refers to HPC systems with SLURM queue manager)

# leaving the container empty will use software dependencies installed on your system
container = ""
Expand Down