diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 544a5a8..6d0938e 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,19 +11,6 @@ on: types: [published] jobs: - EditorConfig: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile\|.sra') - Prettier: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875..42e519b 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.nf-core.yml b/.nf-core.yml index 1f79c1a..3d97354 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -34,5 +34,8 @@ lint: - custom_config - manifest.name - manifest.homePage + - params.max_cpus + - params.max_memory + - params.max_time readme: - nextflow_badge diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb..98a90d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,3 +3,8 @@ repos: rev: "v2.7.1" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f2017f..49dd608 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,22 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.2.0] + +### `Changed` + +- Modified the template for input csv file to include a `sample_name` column in addition to `sample` in-line with changes to [IRIDA-Next update] as seen with the [speciesabundance pipeline] + - If `sample_name` is supplied, then the reads will have `sample_name` prefixed before the accession code + - `sample_name` special characters will be replaced with `"_"` +- Reverted `fasterq-dump` version to 2.11.0 from 3.0.8 due to [issue #865]. Solution proposed by `fetchngs` in [PR #261] +- Fixed linting issues in CI caused by `nf-core` 3.0.1 +- Updated `nf-test` snapshots and added new tests for `sample_name` feature + +[IRIDA-Next update]: https://github.com/phac-nml/irida-next/pull/678 +[speciesabundance pipeline]: https://github.com/phac-nml/speciesabundance/pull/24 +[issue #865]: https://github.com/ncbi/sra-tools/issues/865 +[PR #261]: https://github.com/nf-core/fetchngs/pull/261 + ## [1.1.1] - 2024-04-19 ### Added @@ -29,3 +45,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Initial release of fetchdatairidanext pipeline which will download reads from NCBI/INSDC archives. + +[1.2.0]: https://github.com/phac-nml/fetchdatairidanext/releases/tag/1.2.0 +[1.1.1]: https://github.com/phac-nml/fetchdatairidanext/releases/tag/1.1.1 +[1.1.0]: https://github.com/phac-nml/fetchdatairidanext/releases/tag/1.1.0 +[1.0.1]: https://github.com/phac-nml/fetchdatairidanext/releases/tag/1.0.1 +[1.0.0]: https://github.com/phac-nml/fetchdatairidanext/releases/tag/1.0.0 diff --git a/README.md b/README.md index 2390e32..26f4b59 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,22 @@ That is, there are two columns: The structure of this file is defined in [assets/schema_input.json](assets/schema_input.json). An example of this file is provided at [assets/samplesheet.csv](assets/samplesheet.csv). +## IRIDA-Next Optional Input Configuration + +`fetchdatairidanext` accepts the [IRIDA-Next](https://github.com/phac-nml/irida-next) format for samplesheets which can contain an additional column: `sample_name` + +`sample_name`: An **optional** column, to add the `sample_name` prefix before the accession code. + +`sample_name`, allows more flexibility in naming reads. Unlike `sample`, `sample_name` is not required to contain unique values. Non-alphanumeric characters (excluding `_`,`-`,`.`) will be replaced with `"_"`. `sample_name` can be provided without renaming by changing parameters. + +An [example samplesheet](tests/data/samplesheets/samplesheet-sample_name.csv) has been provided with the pipeline. + # Parameters The main parameters are `--input` as defined above and `--output` for specifying the output results directory. You may wish to provide `-profile singularity` to specify the use of singularity containers (or `-profile docker` for docker) and `-r [branch]` to specify which GitHub branch you would like to run. +`--rename_with_samplename` (Default: `true`) When `false`, samplesheet column `sample_name` not used for reads-renaming. + Other parameters (defaults from nf-core) are defined in [nextflow_schema.json](nextflow_schema.json). # Running diff --git a/assets/schema_input.json b/assets/schema_input.json index edcf572..077a2f5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/phac-nml/fetchdatairidanext/main/assets/schema_input.json", "title": "phac-nml/fetchdatairidanext pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -9,11 +9,16 @@ "properties": { "sample": { "type": "string", - "pattern": "^\\S+$", - "meta": ["id"], + "pattern": "^[A-Za-z0-9_.-]+$", + "meta": ["irida_id"], "unique": true, "errorMessage": "Sample name must be provided and cannot contain spaces" }, + "sample_name": { + "type": "string", + "meta": ["id"], + "errorMessage": "Optional. Used to override reads filename when used in tools like IRIDA-Next" + }, "insdc_accession": { "type": "string", "pattern": "^(SRR|ERR|DRR)\\S+$", diff --git a/conf/iridanext.config b/conf/iridanext.config index b336dfb..739532c 100644 --- a/conf/iridanext.config +++ b/conf/iridanext.config @@ -5,7 +5,7 @@ iridanext { overwrite = true validate = true files { - idkey = "id" + idkey = "irida_id" global = ["**/prefetch/failures_report.csv"] samples = ["**/reads/*.fastq.gz"] } diff --git a/conf/modules.config b/conf/modules.config index 7573f95..54b5ea8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -37,5 +37,20 @@ process { mode: params.publish_dir_mode, pattern: 'reads/*.fastq.gz' ] + def fasterq_rename = {String sample_name, String accession -> "--outfile ${sample_name}_${accession}"} + def add_extension = {String sample_name, String accession -> "${sample_name}_${accession}"} + + + + ext.args = { + [ + (meta.id && params.rename_with_samplename) ? fasterq_rename(meta.id, meta.insdc_accession) : "" + ].join(" ") + } + ext.args2 = { + [ + (meta.id && params.rename_with_samplename) ? add_extension(meta.id, meta.insdc_accession) : meta.insdc_accession + ].join(" ") + } } } diff --git a/docs/output.md b/docs/output.md index 7e95254..8247aae 100644 --- a/docs/output.md +++ b/docs/output.md @@ -29,7 +29,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `sratools/` - Sequence data in SRA format: `INSDC_ACCESSION/INSDC_ACCESSION.sra` - `reads/` - - Reads in fastq format: `INSDC_ACCESSION.fastq.gz` + - Reads in fastq format: `INSDC_ACCESSION.fastq.gz` (or alternatively `SAMPLE_NAME_INSDC_ACCESSION.fastq.gz` if `sample_name` provided) diff --git a/docs/usage.md b/docs/usage.md index a261f28..060b3cf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -31,6 +31,26 @@ SAMPLE2,SRR13191702 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +### IRIDA-Next Optional Samplesheet Configuration + +`fetchdatairidanext` also accepts the [IRIDA-Next](https://github.com/phac-nml/irida-next) format for samplesheets which contain the following columns: `sample`, `sample_name`, `insdc_accession`. The `sample` column values within a samplesheet should be unique. + +A final samplesheet file consisting of mlst_alleles and addresses may look something like the one below: + +```console +sample,sample_name,insdc_accession +SAMPLE1,S1,ERR1109373 +SAMPLE2,,SRR13191702 +``` + +| Column | Description | +| ----------------- | ----------------------------------------------------------------------------------- | +| `sample` | Custom sample name. Samples should be unique within a samplesheet. | +| `sample_name` | Provides custom prefix to read filenames | +| `insdc_accession` | The accession (run accession) from one of the INSDC databases (NCBI, ENA, or DDBJ). | + +An [example samplesheet](tests/data/add-samplesheet.csv) has been provided with the pipeline. + ## Running the pipeline The typical command for running the pipeline is as follows: @@ -132,6 +152,10 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. +### `--rename_with_samplename` + +When `sample_name` is included in the sample sheet, it will be prefixed to read filenames (Default: true) + ## Custom configuration ### Resource requests diff --git a/modules.json b/modules.json index 6250d22..cb385c1 100644 --- a/modules.json +++ b/modules.json @@ -15,9 +15,14 @@ "git_sha": "e719354ba77df0a1bd310836aa2039b45c29d620", "installed_by": ["modules"] }, + "sratools/fasterqdump": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "sratools/prefetch": { "branch": "master", - "git_sha": "e719354ba77df0a1bd310836aa2039b45c29d620", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] } } diff --git a/modules/local/prefetchchecker/main.nf b/modules/local/prefetchchecker/main.nf index 4fe0a8d..4917d1d 100644 --- a/modules/local/prefetchchecker/main.nf +++ b/modules/local/prefetchchecker/main.nf @@ -11,11 +11,20 @@ process PREFETCH_CHECKER { exec: task.workDir.resolve("failures_report.csv").withWriter { writer -> - writer.writeLine("sample,error_accession") // header + sample_name = false + failures.each { + if ( it[0].id != null) { + sample_name = true + } + } // Failures - if (failures.size() > 0) { - failures.each { writer.writeLine "${it[0].id},${it[1]}" } + if (failures.size() > 0 && sample_name) { + writer.writeLine("sample,sample_name,error_accession") // header + failures.each { writer.writeLine "${it[0].irida_id},${it[0].id},${it[1]}" } + } else { + writer.writeLine("sample,error_accession") // header + failures.each { writer.writeLine "${it[0].irida_id},${it[1]}" } } } } diff --git a/modules/local/sratools/fasterqdump/environment.yml b/modules/local/sratools/fasterqdump/environment.yml index 4011b69..dd0faa5 100644 --- a/modules/local/sratools/fasterqdump/environment.yml +++ b/modules/local/sratools/fasterqdump/environment.yml @@ -4,5 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::sra-tools=3.0.8 + - bioconda::sra-tools=2.11.0 - conda-forge::pigz=2.6 diff --git a/modules/local/sratools/fasterqdump/main.nf b/modules/local/sratools/fasterqdump/main.nf index 66ba956..28ebe07 100644 --- a/modules/local/sratools/fasterqdump/main.nf +++ b/modules/local/sratools/fasterqdump/main.nf @@ -4,8 +4,8 @@ process SRATOOLS_FASTERQDUMP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:2f4a4c900edd6801ff0068c2b3048b4459d119eb-0' : - 'biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:2f4a4c900edd6801ff0068c2b3048b4459d119eb-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' : + 'biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' }" input: tuple val(meta), path(sra) @@ -22,6 +22,7 @@ process SRATOOLS_FASTERQDUMP { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def key_file = '' @@ -46,8 +47,10 @@ process SRATOOLS_FASTERQDUMP { ${key_file} \\ ${sra} + find reads/ -type f -name "$args2" -exec mv {} {}.fastq \\; + pigz \\ - $args2 \\ + $args3 \\ --no-name \\ --processes $task.cpus \\ reads/*.fastq diff --git a/modules/nf-core/sratools/prefetch/environment.yml b/modules/nf-core/sratools/prefetch/environment.yml index cfc7d9a..6596bc7 100644 --- a/modules/nf-core/sratools/prefetch/environment.yml +++ b/modules/nf-core/sratools/prefetch/environment.yml @@ -1,7 +1,6 @@ -name: sratools_prefetch channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::sra-tools=3.0.8 + - bioconda::sra-tools=3.1.0 + - conda-forge::curl=8.5.0 diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 3c30739..74838d5 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -4,8 +4,8 @@ process SRATOOLS_PREFETCH { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sra-tools:3.0.8--h9f5acd7_0' : - 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" + 'https://depot.galaxyproject.org/singularity/sra-tools:3.1.0--h9f5acd7_0' : + 'biocontainers/sra-tools:3.1.0--h9f5acd7_0' }" input: tuple val(meta), val(id) @@ -13,7 +13,7 @@ process SRATOOLS_PREFETCH { path certificate output: - tuple val(meta), path(id), emit: sra + tuple val(meta), path(id, type: 'dir'), emit: sra path 'versions.yml' , emit: versions when: @@ -32,4 +32,16 @@ process SRATOOLS_PREFETCH { } template 'retry_with_backoff.sh' + + stub: + """ + mkdir $id + touch $id/${id}.sra + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sratools: \$(prefetch --version 2>&1 | grep -Eo '[0-9.]+') + curl: \$(curl --version | head -n 1 | sed 's/^curl //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/sratools/prefetch/meta.yml b/modules/nf-core/sratools/prefetch/meta.yml index ff54229..3a537bf 100644 --- a/modules/nf-core/sratools/prefetch/meta.yml +++ b/modules/nf-core/sratools/prefetch/meta.yml @@ -11,46 +11,49 @@ tools: documentation: https://github.com/ncbi/sra-tools/wiki tool_dev_url: https://github.com/ncbi/sra-tools licence: ["Public Domain"] + identifier: "" input: - - meta: - type: map - description: > - Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - - id: - type: string - description: > - A string denoting an SRA id. - - - ncbi_settings: - type: file - description: > - An NCBI user settings file. - - pattern: "*.mkfg" - - certificate: - type: file - description: > - Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit - - pattern: "*.cart" + - - meta: + type: map + description: > + Groovy Map containing sample information e.g. [ id:'test', single_end:false + ] + - id: + type: string + description: > + A string denoting an SRA id. + - - ncbi_settings: + type: file + description: > + An NCBI user settings file. + pattern: "*.mkfg" + - - certificate: + type: file + description: > + Path to a JWT cart file used to access protected dbGAP data on SRA using the + sra-toolkit + pattern: "*.cart" output: - - meta: - type: map - description: > - Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - sra: - type: directory - description: > - Directory containing the ETL data for the given SRA id. - - pattern: "*/*.sra" + - meta: + type: map + description: > + Groovy Map containing sample information e.g. [ id:'test', single_end:false + ] + pattern: "*/*.sra" + - "id, type: 'dir": + type: map + description: > + Groovy Map containing sample information e.g. [ id:'test', single_end:false + ] + pattern: "*/*.sra" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@Midnighter" maintainers: - "@Midnighter" + - "@gallvp" diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh index a72a4bf..bfee607 100755 --- a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh +++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh @@ -47,9 +47,19 @@ retry_with_backoff !{args2} \ !{args} \ !{id} -[ -f !{id}.sralite ] && vdb-validate !{id}.sralite || vdb-validate !{id} +# check file integrity using vdb-validate or (when archive contains no checksums) md5sum +vdb-validate !{id} > vdb-validate_result.txt 2>&1 || exit 1 +if grep -q "checksums missing" vdb-validate_result.txt; then + VALID_MD5SUMS=$(curl --silent --fail --location --retry 3 --retry-delay 60 'https://locate.ncbi.nlm.nih.gov/sdl/2/retrieve?filetype=run&acc=!{id}') + LOCAL_MD5SUMS=$(md5sum !{id}/* | cut -f1 -d' ') + if ! grep -q -F -f <(echo "$LOCAL_MD5SUMS") <(echo "$VALID_MD5SUMS"); then + echo "MD5 sum check failed" 1>&2 + exit 1 + fi +fi cat <<-END_VERSIONS > versions.yml "!{task.process}": sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+') + curl: $(curl --version | head -n 1 | sed 's/^curl //; s/ .*$//') END_VERSIONS diff --git a/modules/nf-core/sratools/prefetch/tests/main.nf.test b/modules/nf-core/sratools/prefetch/tests/main.nf.test index ed710ba..92034d4 100644 --- a/modules/nf-core/sratools/prefetch/tests/main.nf.test +++ b/modules/nf-core/sratools/prefetch/tests/main.nf.test @@ -10,9 +10,6 @@ nextflow_process { test("sratools/prefetch") { when { - params { - outdir = "output" - } process { """ input[0] = Channel.of([ [ id:'test', single_end:false ], 'DRR000774' ]) @@ -33,9 +30,6 @@ nextflow_process { test("sratools/prefetch with sralite") { when { - params { - outdir = "output" - } process { """ input[0] = Channel.of([ [ id:'test', single_end:false ], 'SRR1170046' ]) @@ -52,4 +46,26 @@ nextflow_process { ) } } + + test("sratools/prefetch/stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ [ id:'test', single_end:false ], 'DRR000774' ]) + input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true) + input[2] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap b/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap index ab1d208..82a1969 100644 --- a/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap +++ b/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap @@ -1,4 +1,43 @@ { + "sratools/prefetch/stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "DRR000774.sra:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee" + ], + "sra": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "DRR000774.sra:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T20:07:31.627115" + }, "sratools/prefetch with sralite": { "content": [ { @@ -14,7 +53,7 @@ ] ], "1": [ - "versions.yml:md5,c967dea4135cb75490e1e801c4639efc" + "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee" ], "sra": [ [ @@ -28,11 +67,15 @@ ] ], "versions": [ - "versions.yml:md5,c967dea4135cb75490e1e801c4639efc" + "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee" ] } ], - "timestamp": "2023-10-13T12:11:24.563510389" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:02.309737" }, "sratools/prefetch": { "content": [ @@ -49,7 +92,7 @@ ] ], "1": [ - "versions.yml:md5,c967dea4135cb75490e1e801c4639efc" + "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee" ], "sra": [ [ @@ -63,10 +106,14 @@ ] ], "versions": [ - "versions.yml:md5,c967dea4135cb75490e1e801c4639efc" + "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee" ] } ], - "timestamp": "2023-10-13T12:11:02.75256571" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:48:37.428307" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index de1725a..6f29485 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,6 +12,10 @@ params { // Input options input = null + // Fetchdatairidanext options + + rename_with_samplename = true + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -28,7 +32,7 @@ params { config_profile_description = null config_profile_contact = null config_profile_url = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -78,7 +82,7 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true + docker.runOptions = '-u $(id -u):$(id -g)' conda.enabled = false singularity.enabled = false podman.enabled = false @@ -198,7 +202,7 @@ manifest { description = """IRIDA Next pipeline for fetching data from NCBI""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.1' + version = '1.2.0' doi = '' defaultBranch = 'main' } diff --git a/nextflow_schema.json b/nextflow_schema.json index e7709da..508fa6b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/phac-nml/fetchdatairidanext/main/nextflow_schema.json", "title": "phac-nml/fetchdatairidanext pipeline parameters", "description": "IRIDA Next NCBI Download pipeline", @@ -40,6 +40,18 @@ } } }, + "additional_parameters": { + "title": "Additional Parameters", + "type": "object", + "default": "", + "properties": { + "rename_with_samplename": { + "type": "boolean", + "description": "Filenames for reads will have 'SAMPLE_NAME' prefixed to 'INSDC_ACCESSION' (True)", + "default": true + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -205,6 +217,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/additional_parameters" + }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/tests/data/add-samplesheet.csv b/tests/data/add-samplesheet.csv new file mode 100644 index 0000000..f4e437d --- /dev/null +++ b/tests/data/add-samplesheet.csv @@ -0,0 +1,5 @@ +sample,sample_name,insdc_accession +SAMPLE1,S 1,ERR1109373 +SAMPLE2,S2,ERR1109373 +SAMPLE3,S2,SRR13191702 +SAMPLE4,,SRR13191702 diff --git a/tests/data/ncbi_user_settings.mkfg b/tests/data/ncbi_user_settings.mkfg new file mode 100644 index 0000000..3c8df9f --- /dev/null +++ b/tests/data/ncbi_user_settings.mkfg @@ -0,0 +1,2 @@ +/LIBS/GUID = "5b0d4b7d-88c7-4802-98fd-e3afd06feb32" +/libs/cloud/report_instance_identity = "true" diff --git a/tests/modules/fasterqdump/main.nf.test b/tests/modules/fasterqdump/main.nf.test index 104c31f..52f3d11 100644 --- a/tests/modules/fasterqdump/main.nf.test +++ b/tests/modules/fasterqdump/main.nf.test @@ -11,8 +11,8 @@ nextflow_process { when { process { """ - input[0] = Channel.of([ [id: "SAMPLE1"], file("$baseDir/tests/data/ERR1109373.sra", checkIfExists: true) ]) - input[1] = [] + input[0] = Channel.of([ [irida_id: "SAMPLE1"], file("$baseDir/tests/data/ERR1109373.sra", checkIfExists: true) ]) + input[1] = file('$baseDir/tests/data/ncbi_user_settings.mkfg', checkIfExists: true) input[2] = [] """ } diff --git a/tests/modules/fasterqdump/main.nf.test.snap b/tests/modules/fasterqdump/main.nf.test.snap index 6c6e406..8399ab9 100644 --- a/tests/modules/fasterqdump/main.nf.test.snap +++ b/tests/modules/fasterqdump/main.nf.test.snap @@ -5,7 +5,7 @@ "0": [ [ { - "id": "SAMPLE1" + "irida_id": "SAMPLE1" }, [ "ERR1109373.fastq.gz:md5,b9acccb3c5d317a99f604375a09991aa", @@ -15,12 +15,12 @@ ] ], "1": [ - "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a" + "versions.yml:md5,6ff2d50b15c3f0eb9c72cd13a4a20295" ], "reads": [ [ { - "id": "SAMPLE1" + "irida_id": "SAMPLE1" }, [ "ERR1109373.fastq.gz:md5,b9acccb3c5d317a99f604375a09991aa", @@ -30,10 +30,14 @@ ] ], "versions": [ - "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a" + "versions.yml:md5,6ff2d50b15c3f0eb9c72cd13a4a20295" ] } ], - "timestamp": "2024-01-25T23:20:38.2550152" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-11T20:36:55.563890217" } } \ No newline at end of file diff --git a/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test b/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test index e4ee30f..1050501 100644 --- a/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test +++ b/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test @@ -16,8 +16,8 @@ nextflow_workflow { workflow { """ input[0] = Channel.of( - [[ id:'test_single_end', single_end:true ], 'DRR000774'], - [[ id:'test_paired_end', single_end:false ], 'SRR11140744'] + [[ irida_id:'test_single_end', single_end:true ], 'DRR000774'], + [[ irida_id:'test_paired_end', single_end:false ], 'SRR11140744'] ) input[1] = [] """ @@ -49,10 +49,10 @@ nextflow_workflow { workflow { """ input[0] = Channel.of( - [[ id:'SAMPLE1', single_end:false ], 'ERR1109373'], - [[ id:'ERROR1', single_end:false ], 'SRR999908'], - [[ id:'ERROR2', single_end:false ], 'INVALID!!'], - [[ id:'SAMPLE2', single_end:false ], 'SRR13191702'] + [[ irida_id:'SAMPLE1', single_end:false ], 'ERR1109373'], + [[ irida_id:'ERROR1', single_end:false ], 'SRR999908'], + [[ irida_id:'ERROR2', single_end:false ], 'INVALID!!'], + [[ irida_id:'SAMPLE2', single_end:false ], 'SRR13191702'] ) input[1] = [] """ diff --git a/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test.snap b/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test.snap index 8c168d8..d51d6dd 100644 --- a/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test.snap +++ b/tests/workflows/fastq_download_prefetch_fasterqdump_sratools/main.nf.test.snap @@ -5,7 +5,7 @@ "0": [ [ { - "id": "test_paired_end", + "irida_id": "test_paired_end", "single_end": false }, [ @@ -16,7 +16,7 @@ ], [ { - "id": "test_single_end", + "irida_id": "test_single_end", "single_end": true }, "DRR000774.fastq.gz:md5,a110f93f7a9b0271455f5a435bce73c7" @@ -24,13 +24,13 @@ ], "1": [ "versions.yml:md5,1a2218ff913fc33408bffccb081b5048", - "versions.yml:md5,98d78bba9f3da39a0b7db6e9c7dcc224", - "versions.yml:md5,9c558ff624585a6eee82a19c8c0136db" + "versions.yml:md5,2f3b3a13b36dabf13f09327613d5558d", + "versions.yml:md5,53d6e983afde3a28add2ffc6b7eba4f3" ], "reads": [ [ { - "id": "test_paired_end", + "irida_id": "test_paired_end", "single_end": false }, [ @@ -41,7 +41,7 @@ ], [ { - "id": "test_single_end", + "irida_id": "test_single_end", "single_end": true }, "DRR000774.fastq.gz:md5,a110f93f7a9b0271455f5a435bce73c7" @@ -49,11 +49,15 @@ ], "versions": [ "versions.yml:md5,1a2218ff913fc33408bffccb081b5048", - "versions.yml:md5,98d78bba9f3da39a0b7db6e9c7dcc224", - "versions.yml:md5,9c558ff624585a6eee82a19c8c0136db" + "versions.yml:md5,2f3b3a13b36dabf13f09327613d5558d", + "versions.yml:md5,53d6e983afde3a28add2ffc6b7eba4f3" ] } ], - "timestamp": "2024-01-25T22:06:20.7303705" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-11T20:52:02.578591475" } } \ No newline at end of file diff --git a/tests/workflows/fetchdatairidanext/main.nf.test b/tests/workflows/fetchdatairidanext/main.nf.test new file mode 100644 index 0000000..447f5d1 --- /dev/null +++ b/tests/workflows/fetchdatairidanext/main.nf.test @@ -0,0 +1,36 @@ +nextflow_workflow { + + name "Test workflow: workflows/fetchdatairidanext.nf" + script "workflows/fetchdatairidanext.nf" + workflow "FETCHDATAIRIDANEXT" + tag "full workflow" + tag "fetchdatairdanext" + + test("Samplesheets with sample_name") { + tag "sample_name" + when { + params { + input = "$baseDir/tests/data/add-samplesheet.csv" + outdir = "output" + } + } + + then { + assert workflow.success + assert path("$launchDir/output").exists() + + // Check that reads have the correct filename when supplying a sample_name + assert path("$launchDir/output/iridanext.output.json").exists() + def iridanext_json = path("$launchDir/output/iridanext.output.json").json + def iridanext_samples = iridanext_json.files.samples + + assert iridanext_samples.SAMPLE1 == [['path':'reads/S_1_ERR1109373_2.fastq.gz'], ['path':'reads/S_1_ERR1109373_1.fastq.gz'], ['path':'reads/S_1_ERR1109373.fastq.gz']] + assert iridanext_samples.SAMPLE2 == [['path':'reads/S2_ERR1109373_2.fastq.gz'], ['path':'reads/S2_ERR1109373_1.fastq.gz'], ['path':'reads/S2_ERR1109373.fastq.gz']] + assert iridanext_samples.SAMPLE3 == [['path':'reads/S2_SRR13191702_2.fastq.gz'], ['path':'reads/S2_SRR13191702_1.fastq.gz']] + assert iridanext_samples.SAMPLE4 == [['path':'reads/SRR13191702_2.fastq.gz'], ['path':'reads/SRR13191702_1.fastq.gz']] + + + } + } + +} diff --git a/workflows/fetchdatairidanext.nf b/workflows/fetchdatairidanext.nf index 74b5fcc..9a70a39 100644 --- a/workflows/fetchdatairidanext.nf +++ b/workflows/fetchdatairidanext.nf @@ -56,10 +56,14 @@ workflow FETCHDATAIRIDANEXT { // Create a new channel of metadata from a sample sheet // NB: `input` corresponds to `params.input` and associated sample sheet schema input = Channel.fromSamplesheet("input") - meta_accessions = input.map {meta -> tuple(["id": meta.id.first()], meta.insdc_accession.first())} + // and remove non-alphanumeric characters in sample_names (meta.id) + .map { meta -> + def new_id = meta.id[0]?.replaceAll(/[^A-Za-z0-9_\.\-]/, '_') ?: meta.id[0] + return [["id": new_id, "irida_id": meta.irida_id[0], "insdc_accession": meta.insdc_accession[0]], meta.insdc_accession[0]] + } FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - ch_sra_ids = meta_accessions, + ch_sra_ids = input, ch_dbgap_key = [] ) ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions)