diff --git a/conf/modules/msisensorpro.config b/conf/modules/msisensorpro.config index 8253cccc50..1526a61f13 100644 --- a/conf/modules/msisensorpro.config +++ b/conf/modules/msisensorpro.config @@ -22,4 +22,14 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: 'MSISENSORPRO_MSITUMORONLY' { + ext.args = { params.wes ? '-c 20' : '-c 15' } // default values by MSIsensorpro + ext.prefix = { "${meta.id}.tumor_only" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/main.nf b/main.nf index effa97ef16..1393a4e429 100755 --- a/main.nf +++ b/main.nf @@ -76,6 +76,7 @@ include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nf include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' +include { MSISENSORPRO_SCAN } from './modules/nf-core/msisensorpro/scan/main' // Initialize fasta file with meta map: fasta = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() @@ -90,6 +91,7 @@ germline_resource = params.germline_resource ? Channel.fromPath(para known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) +msisensorpro_baseline = params.msisensorpro_baseline ? Channel.fromPath(params.msisensorpro_baseline).collect() : Channel.empty() pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) @@ -165,8 +167,18 @@ workflow NFCORE_SAREK { aligner == "bwa-mem2" ? bwamem2 : dragmap - // TODO: add a params for msisensorpro_scan - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan + // Reference msi list for MSIsensorpro + if (params.tools && params.tools.split(',').contains('msisensorpro')) { + if (params.msisensorpro_scan) { + msisensorpro_scan = Channel.fromPath(params.msisensorpro_scan).collect() + } else { + MSISENSORPRO_SCAN(fasta) + msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } + versions = versions.mix(MSISENSORPRO_SCAN.out.versions) + } + } else { + msisensorpro_scan = Channel.empty() + } // For ASCAT, extracted from zip or tar.gz files allele_files = PREPARE_GENOME.out.allele_files @@ -294,6 +306,7 @@ workflow NFCORE_SAREK { loci_files, mappability, msisensorpro_scan, + msisensorpro_baseline, ngscheckmate_bed, pon, pon_tbi, diff --git a/modules/nf-core/msisensorpro/msisomatic/main.nf b/modules/nf-core/msisensorpro/msisomatic/main.nf index 9b0084d949..52a33705a3 100644 --- a/modules/nf-core/msisensorpro/msisomatic/main.nf +++ b/modules/nf-core/msisensorpro/msisomatic/main.nf @@ -26,7 +26,7 @@ process MSISENSORPRO_MSISOMATIC { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def fasta = fasta ? "-g ${fasta}" : "" - def intervals = intervals ? " -e ${intervals} " : "" + def intervals = intervals ? "-e ${intervals} " : "" """ msisensor-pro \\ msi \\ diff --git a/modules/nf-core/msisensorpro/msitumoronly/environment.yml b/modules/nf-core/msisensorpro/msitumoronly/environment.yml new file mode 100644 index 0000000000..47842c75b3 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/environment.yml @@ -0,0 +1,7 @@ +name: msisensorpro_msitumoronly +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::msisensor-pro=1.2.0 diff --git a/modules/nf-core/msisensorpro/msitumoronly/main.nf b/modules/nf-core/msisensorpro/msitumoronly/main.nf new file mode 100644 index 0000000000..92dc3b6b56 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/main.nf @@ -0,0 +1,46 @@ +process MSISENSORPRO_MSITUMORONLY { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : + 'biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" + + input: + tuple val(meta), path(tumor), path(tumor_index), path(intervals) + path (fasta) + path (msisensor_baseline) + + output: + tuple val(meta), path("${prefix}") , emit: output_report + tuple val(meta), path("${prefix}_dis") , emit: output_dis + tuple val(meta), path("${prefix}_all") , emit: output_all + tuple val(meta), path("${prefix}_unstable"), emit: output_unstable + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def fasta = fasta ? "-g ${fasta}" : "" + def intervals = intervals ? "-e ${intervals} " : "" + """ + msisensor-pro \\ + pro \\ + -d ${msisensor_baseline} \\ + -t ${tumor} \\ + ${fasta} \\ + -o $prefix \\ + -b ${task.cpus} \\ + ${intervals} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/msisensorpro/msitumoronly/meta.yml b/modules/nf-core/msisensorpro/msitumoronly/meta.yml new file mode 100644 index 0000000000..d6b81a5913 --- /dev/null +++ b/modules/nf-core/msisensorpro/msitumoronly/meta.yml @@ -0,0 +1,68 @@ +name: msisensorpro_msitumoronly +description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients with next generation sequencing data. It accepts the whole genome sequencing, whole exome sequencing and target region (panel) sequencing data as input +keywords: + - micro-satellite-scan + - msisensor-pro + - msi + - somatic + - tumor-only +tools: + - msisensorpro: + description: Microsatellite Instability (MSI) detection using high-throughput sequencing data. + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: https://github.com/xjtu-omics/msisensor-pro/wiki + tool_dev_url: https://github.com/xjtu-omics/msisensor-pro + doi: "10.1016/j.gpb.2020.02.001" + licence: ["Custom Licence"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - tumor_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: bed file containing interval information, optional + pattern: "*.{bed}" + - fasta: + type: file + description: Reference genome + pattern: "*.{fasta}" + - msisensor_baseline: + type: file + description: File containing custom list of msi regions from a panel of normals to be used as a baseline +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output_report: + type: file + description: File containing final report with all detected microsatellites, unstable somatic microsatellites, msi score + - output_dis: + type: file + description: File containing distribution results + - output_all: + type: file + description: File containing all sites + - output_unstable: + type: file + description: File containing unstable sites + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@bounlu" +maintainers: + - "@FriederikeHanssen" diff --git a/nextflow.config b/nextflow.config index de5e6d4ca6..e3ae609cc4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -73,6 +73,8 @@ params { ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + msisensorpro_scan = null // by default the reference is build from the fasta file + msisensorpro_baseline = null // by default tumor-only mode is not used in MSIsensorpro only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' diff --git a/nextflow_schema.json b/nextflow_schema.json index 5cdf35d555..618e1e5f5d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -758,6 +758,16 @@ "description": "Path to Control-FREEC mappability file.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, + "msisensorpro_scan": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "description": "Path to MSIsensorpro reference genome microsatellites information file." + }, + "msisensorpro_baseline": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "description": "Path to MSIsensorpro custom baseline file for tumor-only analysis." + }, "ngscheckmate_bed": { "type": "string", "fa_icon": "fas fa-file", diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index 1f8e26d3c7..854f4eefb1 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -45,13 +45,12 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { wes // boolean: [mandatory] [default: false] whether targeted data is processed main: - versions = Channel.empty() + versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config vcf_freebayes = Channel.empty() vcf_manta = Channel.empty() vcf_strelka = Channel.empty() - out_msisensorpro = Channel.empty() vcf_mutect2 = Channel.empty() vcf_tiddit = Channel.empty() out_indexcov = Channel.empty() @@ -191,11 +190,10 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { } // MSISENSOR - if (tools.split(',').contains('msisensorpro')) { + if (tools.split(',').contains('msisensorpro') && msisensorpro_scan) { MSISENSORPRO_MSISOMATIC(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_scan) versions = versions.mix(MSISENSORPRO_MSISOMATIC.out.versions) - out_msisensorpro = out_msisensorpro.mix(MSISENSORPRO_MSISOMATIC.out.output_report) } // MUTECT2 @@ -206,7 +204,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { // joint_mutect2 mode needs different meta.map than regular mode cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> joint_mutect2 ? - //we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step + // we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step [ meta + [ id:meta.patient ], [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] : [ meta, [ normal_cram, tumor_cram ], [ normal_crai, tumor_crai ] ] }, @@ -249,7 +247,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { emit: out_indexcov - out_msisensorpro vcf_all vcf_freebayes vcf_manta diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index 8016391cfc..8213045579 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -11,6 +11,7 @@ include { BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC } from '../bam_variant_cal include { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA } from '../bam_variant_calling_tumor_only_manta/main' include { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 } from '../bam_variant_calling_tumor_only_mutect2/main' include { BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ } from '../bam_variant_calling_tumor_only_lofreq/main' +include { MSISENSORPRO_MSITUMORONLY } from '../../../modules/nf-core/msisensorpro/msitumoronly/main' workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { take: @@ -32,15 +33,16 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped mappability + msisensorpro_baseline // channel: [optional] msisensorpro_baseline panel_of_normals // channel: [optional] panel_of_normals panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode wes // boolean: [mandatory] [default: false] whether targeted data is processed main: - versions = Channel.empty() + versions = Channel.empty() - //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + // TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config vcf_freebayes = Channel.empty() vcf_manta = Channel.empty() vcf_mpileup = Channel.empty() @@ -109,6 +111,13 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } + // MSISENSOR + if (tools.split(',').contains('msisensorpro') && msisensorpro_baseline) { + MSISENSORPRO_MSITUMORONLY(cram.combine(intervals_bed_combined), fasta.map{ meta, fasta -> [ fasta ] }, msisensorpro_baseline) + + versions = versions.mix(MSISENSORPRO_MSITUMORONLY.out.versions) + } + // MUTECT2 if (tools.split(',').contains('mutect2')) { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2( diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 772af47b37..a7e7614310 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -12,7 +12,6 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf- include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' @@ -50,7 +49,6 @@ workflow PREPARE_GENOME { DRAGMAP_HASHTABLE(fasta) // If aligner is dragmap GATK4_CREATESEQUENCEDICTIONARY(fasta) - MSISENSORPRO_SCAN(fasta) SAMTOOLS_FAIDX(fasta, [ [ id:'no_fai' ], [] ] ) // the following are flattened and mapped in case the user supplies more than one value for the param @@ -105,7 +103,6 @@ workflow PREPARE_GENOME { versions = versions.mix(BWAMEM2_INDEX.out.versions) versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - versions = versions.mix(MSISENSORPRO_SCAN.out.versions) versions = versions.mix(SAMTOOLS_FAIDX.out.versions) versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) versions = versions.mix(TABIX_DBSNP.out.versions) @@ -125,7 +122,6 @@ workflow PREPARE_GENOME { germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi - msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi allele_files // path: allele_files diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf index 1c0d80a1db..8a710dc186 100644 --- a/subworkflows/local/samplesheet_to_channel/main.nf +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -155,7 +155,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] def requested_tools_requiring_normal_samples = [] tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> if (tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) @@ -166,7 +166,7 @@ workflow SAMPLESHEET_TO_CHANNEL{ } } - // Fails when wrongfull extension for intervals file + // Fails when wrongful extension for intervals file if (wes && !step == 'annotate') { if (intervals && !intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf index ce568284c7..44295cfb25 100644 --- a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -102,6 +102,8 @@ workflow PIPELINE_INITIALISATION { params.known_snps, params.known_snps_tbi, params.mappability, + params.msisensorpro_scan, + params.msisensorpro_baseline, params.multiqc_config, params.ngscheckmate_bed, params.pon, diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index f554cd9ddd..f13edf2ce4 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -127,6 +127,7 @@ workflow SAREK { loci_files mappability msisensorpro_scan + msisensorpro_baseline ngscheckmate_bed pon pon_tbi @@ -740,6 +741,7 @@ workflow SAREK { intervals_bed_combined, intervals_bed_gz_tbi_combined, // [] if no_intervals, else interval_bed_combined_gz, interval_bed_combined_gz_tbi mappability, + msisensorpro_baseline, pon, pon_tbi, params.joint_mutect2,