-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRNAseq1.nf
106 lines (88 loc) · 2.88 KB
/
RNAseq1.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// runs as nextflow run my_rnaseq.nf -with-docker
// or add docker.enabled = true to nextflow.config file
params.reads = "$projectDir/data/ggal/*_{1,2}.fq"
params.transcriptome_file = "$projectDir/data/ggal/transcriptome.fa"
params.multiqc = "$projectDir/multiqc"
params.outdir = "$projectDir/my_rnaseq_data/result"
log.info """\
Simple RNAseq Workflow
Raw Reads : "${params.reads}"
Transcriptome : "${params.transcriptome_file}"
multiqc report : "${params.multiqc}"
output directory : "${params.outdir}"
"""
/*
* define the INDEX process that creates a binary index
* given the transcriptome file
*/
process INDEX {
cpus 8
memory '4 GB'
// https://www.nextflow.io/docs/latest/process.html#directives
// above is called directives sets resources used by process should be above all
input:
path transcriptome
output:
path 'salmon_index'
script:
"""
salmon index --threads $task.cpus -t $transcriptome -i salmon_index
"""
}
// salmon quantification
process QUANTIFICATION {
cpus 8
tag "Salmon on $sample_id"
// publish into a directory
publishDir "my_rnaseq_data/rnaseq"
input:
// you can use any name "sam" also just specify its a file(path)
path salmon_index
tuple val(sample_id), path(reads)
output:
path "$sample_id"
// can quote same terminology as bash if specifiled in input
script:
"""
salmon quant --threads $task.cpus --libType=U -i $salmon_index -1 ${reads[0]} -2 ${reads[1]} -o $sample_id
"""
}
process FASTQC {
tag "FASTQC on $sample_id"
input:
tuple val(sample_id), path(reads)
output:
path "fastqc_${sample_id}_logs"
script:
"""
mkdir fastqc_${sample_id}_logs
fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads}
"""
}
process MULTIQC {
publishDir params.outdir, mode:'copy'
input:
path '*'
output:
path 'multiqc_report.html'
script:
"""
multiqc .
"""
}
/* create channel using set operator and check if file exist using method from "channel factory"
The declaration of a channel can be before the workflow scope or within it.
As long as it is upstream of the process that requires the specific channel.*/
channel.fromFilePairs(params.reads, checkIfExists: true).set{ read_pairs_ch }
workflow{
index_ch=INDEX(params.transcriptome_file)
index_ch.view()
// prints the a list of prefix and a list of reads files for each prefix
read_pairs_ch.view()
quant_ch=QUANTIFICATION(index_ch,read_pairs_ch)
fastqc_ch = FASTQC(read_pairs_ch)
/*We only want one task of MultiQC to be executed to produce one report. Therefore,
we use the mix channel operator to combine the two channels followed by the collect operator,
to return the complete channel contents as a single element.*/
MULTIQC(quant_ch.mix(fastqc_ch).collect())
}