diff --git a/.github/workflows/bbknn.yml b/.github/workflows/bbknn.yml index 9497ffa5..27cb1900 100644 --- a/.github/workflows/bbknn.yml +++ b/.github/workflows/bbknn.yml @@ -26,7 +26,7 @@ jobs: run: | mkdir testdata wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xvf sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz cp -r sample_data testdata/sample1 mv sample_data testdata/sample2 - name: Run single_sample test diff --git a/.github/workflows/bbknn_scenic.yml b/.github/workflows/bbknn_scenic.yml index 47384c7d..91bb8a41 100644 --- a/.github/workflows/bbknn_scenic.yml +++ b/.github/workflows/bbknn_scenic.yml @@ -26,7 +26,7 @@ jobs: run: | mkdir testdata wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xvf sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz cp -r sample_data testdata/sample1 mv sample_data testdata/sample2 - name: Run bbknn_scenic test diff --git a/.github/workflows/harmony.yml b/.github/workflows/harmony.yml index 4ee68426..5abbbb5f 100644 --- a/.github/workflows/harmony.yml +++ b/.github/workflows/harmony.yml @@ -26,7 +26,7 @@ jobs: run: | mkdir testdata wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xvf sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz cp -r sample_data testdata/sample1 mv sample_data testdata/sample2 - name: Run single_sample test diff --git a/.github/workflows/mnncorrect.yml b/.github/workflows/mnncorrect.yml index 23fba61d..8b43d811 100644 --- a/.github/workflows/mnncorrect.yml +++ b/.github/workflows/mnncorrect.yml @@ -26,7 +26,7 @@ jobs: run: | mkdir testdata wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xvf sample_data.tar.gz + tar xzvf sample_data.tar.gz cp -r sample_data testdata/sample1 mv sample_data testdata/sample2 - name: Run single_sample test diff --git a/.github/workflows/single_sample.yml b/.github/workflows/single_sample.yml index 37fc5c56..a257b145 100644 --- a/.github/workflows/single_sample.yml +++ b/.github/workflows/single_sample.yml @@ -25,7 +25,7 @@ jobs: - name: Get sample data run: | wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz - tar xvf sample_data_tiny.tar.gz + tar xzvf sample_data_tiny.tar.gz - name: Run single_sample test run: | nextflow run ${GITHUB_WORKSPACE} -profile single_sample,test__single_sample,docker -entry single_sample -ansi-log false diff --git a/.github/workflows/single_sample_scenic.yml b/.github/workflows/single_sample_scenic.yml index e2f65b37..a3db4980 100644 --- a/.github/workflows/single_sample_scenic.yml +++ b/.github/workflows/single_sample_scenic.yml @@ -25,7 +25,7 @@ jobs: - name: Get sample data run: | wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xvf sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz - name: Run single_sample_scenic test run: | nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scenic,test__single_sample_scenic,docker -entry single_sample_scenic -ansi-log false diff --git a/.github/workflows/single_sample_scenic_multiruns.yml b/.github/workflows/single_sample_scenic_multiruns.yml index 20b9be71..b137707f 100644 --- a/.github/workflows/single_sample_scenic_multiruns.yml +++ b/.github/workflows/single_sample_scenic_multiruns.yml @@ -25,7 +25,7 @@ jobs: - name: Get sample data run: | wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_small.tar.gz - tar xvf sample_data_small.tar.gz + tar xzvf sample_data_small.tar.gz - name: Run single_sample_scenic test run: | nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scenic,scenic_multiruns,test__single_sample_scenic_multiruns,docker -entry single_sample_scenic -ansi-log false diff --git a/.github/workflows/single_sample_scrublet.yml b/.github/workflows/single_sample_scrublet.yml index b7fa274d..837ba063 100644 --- a/.github/workflows/single_sample_scrublet.yml +++ b/.github/workflows/single_sample_scrublet.yml @@ -25,7 +25,7 @@ jobs: - name: Get sample data run: | wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data.tar.gz - tar xvf sample_data.tar.gz + tar xzvf sample_data.tar.gz - name: Run single_sample_scrublet test run: | nextflow run ${GITHUB_WORKSPACE} -profile single_sample_scrublet,test__single_sample_scrublet,docker -entry single_sample_scrublet -ansi-log false diff --git a/.gitmodules b/.gitmodules index fbd6bbc1..4d4ba51b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,49 +1,49 @@ [submodule "src/cellranger"] path = src/cellranger - url = git@github.com:vib-singlecell-nf/cellranger.git + url = https://github.com/vib-singlecell-nf/cellranger.git [submodule "src/dropletutils"] path = src/dropletutils - url = git@github.com:vib-singlecell-nf/dropletutils.git + url = https://github.com/vib-singlecell-nf/dropletutils.git [submodule "src/dropseqtools"] path = src/dropseqtools - url = git@github.com:vib-singlecell-nf/dropseqtools.git + url = https://github.com/vib-singlecell-nf/dropseqtools.git [submodule "src/edirect"] path = src/edirect - url = git@github.com:vib-singlecell-nf/edirect.git + url = https://github.com/vib-singlecell-nf/edirect.git [submodule "src/fastp"] path = src/fastp - url = git@github.com:vib-singlecell-nf/fastp.git + url = https://github.com/vib-singlecell-nf/fastp.git [submodule "src/picard"] path = src/picard - url = git@github.com:vib-singlecell-nf/picard.git + url = https://github.com/vib-singlecell-nf/picard.git [submodule "src/scanpy"] path = src/scanpy - url = git@github.com:vib-singlecell-nf/scanpy.git + url = https://github.com/vib-singlecell-nf/scanpy.git [submodule "src/scenic"] path = src/scenic - url = git@github.com:vib-singlecell-nf/scenic.git + url = https://github.com/vib-singlecell-nf/scenic.git [submodule "src/sratoolkit"] path = src/sratoolkit - url = git@github.com:vib-singlecell-nf/sratoolkit.git + url = https://github.com/vib-singlecell-nf/sratoolkit.git [submodule "src/star"] path = src/star - url = git@github.com:vib-singlecell-nf/star.git + url = https://github.com/vib-singlecell-nf/star.git [submodule "src/flybaser"] path = src/flybaser - url = git@github.com:vib-singlecell-nf/flybaser.git + url = https://github.com/vib-singlecell-nf/flybaser.git [submodule "src/pcacv"] path = src/pcacv - url = git@github.com:vib-singlecell-nf/pcacv.git + url = https://github.com/vib-singlecell-nf/pcacv.git [submodule "src/harmony"] path = src/harmony - url = git@github.com:vib-singlecell-nf/harmony.git + url = https://github.com/vib-singlecell-nf/harmony.git [submodule "src/cellranger-atac"] path = src/cellranger-atac - url = git@github.com:vib-singlecell-nf/cellranger-atac.git + url = https://github.com/vib-singlecell-nf/cellranger-atac.git [submodule "src/popscle"] path = src/popscle - url = git@github.com:vib-singlecell-nf/popscle.git + url = https://github.com/vib-singlecell-nf/popscle.git [submodule "src/scrublet"] path = src/scrublet - url = git@github.com:vib-singlecell-nf/scrublet.git + url = https://github.com/vib-singlecell-nf/scrublet.git branch = develop diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..69a76ccd --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,10 @@ +version: 2 + +sphinx: + configuration: docs/conf.py + +formats: all + +submodules: + exclude: all + diff --git a/conf/genomes/hg19.config b/conf/genomes/hg19.config new file mode 100644 index 00000000..6fba31d3 --- /dev/null +++ b/conf/genomes/hg19.config @@ -0,0 +1,8 @@ +params { + global { + species = 'human' + genome { + assembly = 'hg19' + } + } +} diff --git a/conf/genomes/mm10.config b/conf/genomes/mm10.config new file mode 100644 index 00000000..8d7967d7 --- /dev/null +++ b/conf/genomes/mm10.config @@ -0,0 +1,8 @@ +params { + global { + species = 'mouse' + genome { + assembly = 'mm10' + } + } +} diff --git a/docs/pipelines.rst b/docs/pipelines.rst index f225afd0..31fa0afd 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -380,6 +380,28 @@ In the generated .config file, make sure the ``file_paths`` parameter is set wit ---- +Loom +---- +Use the following profile when generating the config file:: + + -profile loom + + +In the generated .config file, make sure the ``file_paths`` parameter is set with the paths to the ``.loom`` files:: + + [...] + data { + loom { + file_paths = "data/1k_pbmc_v*_chemistry_SUFFIX.SC__FILE_CONVERTER.loom" + suffix = "_SUFFIX.SC__FILE_CONVERTER.loom" + } + } + [...] + +- The ``suffix`` parameter is used to infer the sample name from the file paths (it is removed from the input file path to derive a sample name). + +---- + Seurat Rds ---------- diff --git a/nextflow.config b/nextflow.config index cba364d2..f6705696 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,7 +3,7 @@ manifest { name = 'vib-singlecell-nf/vsn-pipelines' description = 'A repository of pipelines for single-cell data in Nextflow DSL2' homePage = 'https://github.com/vib-singlecell-nf/vsn-pipelines' - version = '0.17.0' + version = '0.18.0' mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!19.12.0-edge' // with ! prefix, stop execution if current version does not match required version. @@ -115,7 +115,11 @@ profiles { includeConfig 'src/scenic/conf/multi_runs.config' } single_sample { - includeConfig 'src/scanpy/scanpy.config' + if(min && min.enabled) { + includeConfig 'src/scanpy/conf/min.config' + } else { + includeConfig 'src/scanpy/scanpy.config' + } } multi_sample { includeConfig 'src/scanpy/scanpy.config' @@ -211,6 +215,9 @@ profiles { h5ad { includeConfig 'src/channels/conf/h5ad.config' } + loom { + includeConfig 'src/channels/conf/loom.config' + } tsv { includeConfig 'src/channels/conf/tsv.config' } @@ -238,6 +245,16 @@ profiles { includeConfig 'conf/genomes/hg38.config' } + hg19 { + includeConfig 'src/scenic/conf/min/tfs/human-v0.0.1.config' + includeConfig 'conf/genomes/hg19.config' + } + + mm10 { + includeConfig 'src/scenic/conf/min/tfs/mouse-v0.0.1.config' + includeConfig 'conf/genomes/mm10.config' + } + // feature profiles: pcacv { diff --git a/src/channels/channels.nf b/src/channels/channels.nf index cd53c3c9..db004da7 100644 --- a/src/channels/channels.nf +++ b/src/channels/channels.nf @@ -85,6 +85,16 @@ workflow getDataChannel { } ).view() } + if(params.data.containsKey("loom")) { + data = data.concat( + getFileChannel( + params.data.loom.file_paths, + params.data.loom.suffix + ).map { + it -> tuple(it[0], it[1], "loom", "h5ad") + } + ).view() + } if(params.data.containsKey("tsv")) { data = data.concat( getFileChannel( diff --git a/src/channels/conf/loom.config b/src/channels/conf/loom.config new file mode 100644 index 00000000..55040e03 --- /dev/null +++ b/src/channels/conf/loom.config @@ -0,0 +1,13 @@ +params { + data { + loom { + file_paths = '' + suffix = '.loom' + } + } + sc { + file_converter { + iff = 'loom' + } + } +} diff --git a/src/channels/tenx.nf b/src/channels/tenx.nf index 0d3b5cfe..dc54436f 100644 --- a/src/channels/tenx.nf +++ b/src/channels/tenx.nf @@ -84,9 +84,8 @@ workflow getMEXChannel { } channel = Channel .fromPath(glob, type: 'dir', checkIfExists: true) - .view() .map { - filePath -> tuple(extractSampleFromH5( "${filePath}" ), file("${filePath}")) + filePath -> tuple(extractSampleFromMEX( "${filePath}" ), file("${filePath}")) } emit: diff --git a/src/scanpy b/src/scanpy index 6092226a..22f83e00 160000 --- a/src/scanpy +++ b/src/scanpy @@ -1 +1 @@ -Subproject commit 6092226ad260fa9fd0055aa231481d79306b8ca3 +Subproject commit 22f83e00dd1b93c41f03e89a04bc9f2175555cf5 diff --git a/src/scenic b/src/scenic index 964bf37f..1f7b1f18 160000 --- a/src/scenic +++ b/src/scenic @@ -1 +1 @@ -Subproject commit 964bf37fea93061da497d41ee6a179bb314e2663 +Subproject commit 1f7b1f1869b23098ecfbce232c4cdc17841d32bb diff --git a/src/utils/bin/h5ad_to_filtered_loom.py b/src/utils/bin/h5ad_to_filtered_loom.py index d3d0ffb9..f0a61594 100755 --- a/src/utils/bin/h5ad_to_filtered_loom.py +++ b/src/utils/bin/h5ad_to_filtered_loom.py @@ -40,9 +40,11 @@ "nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(), } +matrix = (adata.X).T + lp.create( filename=f"{FILE_PATH_OUT_BASENAME}.loom", - layers=(adata.X).T.toarray(), + layers=matrix if type(matrix) == np.ndarray else matrix.toarray(), row_attrs=row_attrs, col_attrs=col_attrs, ) diff --git a/src/utils/bin/sc_file_converter.py b/src/utils/bin/sc_file_converter.py index 2d5ca0cb..b7e631cc 100755 --- a/src/utils/bin/sc_file_converter.py +++ b/src/utils/bin/sc_file_converter.py @@ -225,6 +225,28 @@ def add_sample_id(adata, args): # Sort var index adata = adata[:, np.sort(adata.var.index)] adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME)) +elif INPUT_FORMAT == 'loom' and OUTPUT_FORMAT == 'h5ad': + adata = sc.read_loom( + FILE_PATH_IN, + sparse=False + ) + adata = add_sample_id( + adata=adata, + args=args + ) + # If is tag_cell_with_sample_id is given, add the sample ID as suffix + if args.tag_cell_with_sample_id: + adata.obs.index = map(lambda x: re.sub('-[0-9]+', f"-{args.sample_id}", x), adata.obs.index) + adata.var.index = adata.var.index.astype(str) + # Check if var index is unique + if len(np.unique(adata.var.index)) < len(adata.var.index) and not args.make_var_index_unique: + raise Exception("VSN ERROR: AnnData var index is not unique. This can be fixed by making it unique. To do so update the following param 'makeVarIndexUnique = true' (under params.sc.sc_file_converter) in your config.") + if len(np.unique(adata.var.index)) < len(adata.var.index) and args.make_var_index_unique: + adata.var_names_make_unique() + print("Making AnnData var index unique...") + # Sort var index + adata = adata[:, np.sort(adata.var.index)] + adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME)) else: raise Exception( "File format conversion {0} --> {1} hasn't been implemented yet.".format(INPUT_FORMAT, OUTPUT_FORMAT)) diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 2019b7e5..d5789588 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -15,18 +15,59 @@ def clean(params) { def detectCellRangerVersionData = { cellRangerV2Data, cellRangerV3Data -> if(cellRangerV2Data.isDirectory() || cellRangerV3Data.isDirectory()) { if(cellRangerV2Data.exists()) { + // Sanity checks + if(new File(Paths.get(cellRangerV2Data.toString(), "genes.tsv.gz").toString()).exists()) + throw new Exception("Found genes.tsv.gz but expecting genes.tsv. The gene file should be uncompressed.") + if(new File(Paths.get(cellRangerV2Data.toString(), "genes.tsv").toString()).exists()) + return [ + version: 2, + path: cellRangerV2Data + ] + // Extract genome folder if a single one exists genomes = cellRangerV2Data.list() if(genomes.size() > 1 || genomes.size() == 0) throw new Exception("None or multiple genomes detected for the output generated by CellRanger v2. Selecting custom genome is currently not implemented.") - return file(Paths.get(cellRangerV2Data.toString(), genomes[0])) - } else if(cellRangerV3Data.exists()) - return cellRangerV3Data - throw new Exception("Cannot detect the version of the data format of CellRanger.") + genomeFilePath = Paths.get(cellRangerV2Data.toString(), genomes[0]) + // Sanity checks + if(!new File(genomeFilePath.toString()).isDirectory()) + throw new Exception("Expecting a genome directory from the output generated by CellRanger v2.") + if(new File(Paths.get(genomeFilePath.toString(), "genes.tsv.gz").toString()).exists()) + throw new Exception("Found compressed gene file (genes.tsv.gz) but expecting uncompressed gene file (genes.tsv). Use gunzip for instance to uncompress it.") + if(new File(Paths.get(genomeFilePath.toString(), "barcodes.tsv.gz").toString()).exists()) + throw new Exception("Found compressed gene file (barcodes.tsv.gz) but expecting uncompressed gene file (barcodes.tsv). Use gunzip for instance to uncompress it.") + if(new File(Paths.get(genomeFilePath.toString(), "matrix.mtx.gz").toString()).exists()) + throw new Exception("Found compressed gene file (matrix.mtx.gz) but expecting uncompressed gene file (matrix.mtx.gz). Use gunzip for instance to uncompress it.") + if(!new File(Paths.get(genomeFilePath.toString(), "genes.tsv").toString()).exists()) + throw new Exception("Expecting a gene file genes.tsv file but none are found.") + if(!new File(Paths.get(genomeFilePath.toString(), "barcodes.tsv").toString()).exists()) + throw new Exception("Expecting a barcode file barcodes.tsv file but none are found.") + if(!new File(Paths.get(genomeFilePath.toString(), "matrix.mtx").toString()).exists()) + throw new Exception("Expecting a matrix file matrix.mtx file but none are found.") + return [ + version: 2, + path: file(genomeFilePath) + ] + } else if(cellRangerV3Data.exists()) { + if(!new File(Paths.get(cellRangerV3Data.toString(), "features.tsv").toString()).exists() && !new File(Paths.get(cellRangerV3Data.toString(), "features.tsv.gz").toString()).exists()) + throw new Exception("Expecting either a features.tsv or features.tsv.gz file but none are found.") + return [ + version: 3, + path: cellRangerV3Data + ] + } else { + throw new Exception("Cannot detect the version of the data format of CellRanger.") + } } else { if(cellRangerV2Data.exists()) { - return cellRangerV2Data + return [ + version: 2, + path: cellRangerV2Data + ] } else if(cellRangerV3Data.exists()) { - return cellRangerV3Data + return [ + version: 3, + path: cellRangerV3Data + ] } else { throw new Exception("Cannot detect the version of the data format of CellRanger.") } @@ -73,7 +114,8 @@ process SC__FILE_CONVERTER { // Check if output was generated with CellRanger v2 or v3 cellranger_outs_v2_mex = file("${f.toRealPath()}/${processParams.useFilteredMatrix ? "filtered" : "raw"}_gene_bc_matrices/") cellranger_outs_v3_mex = file("${f.toRealPath()}/${processParams.useFilteredMatrix ? "filtered" : "raw"}_feature_bc_matrix/") - f = detectCellRangerVersionData(cellranger_outs_v2_mex, cellranger_outs_v3_mex) + cellRangerData = detectCellRangerVersionData(cellranger_outs_v2_mex, cellranger_outs_v3_mex) + f = cellRangerData.path inputDataType = "10x_cellranger_mex" break; case "10x_cellranger_h5": @@ -83,7 +125,8 @@ process SC__FILE_CONVERTER { // Check if output was generated with CellRanger v2 or v3 cellranger_outs_v2_h5 = file("${f.toRealPath()}/${processParams.useFilteredMatrix ? "filtered" : "raw"}_gene_bc_matrices.h5") cellranger_outs_v3_h5 = file("${f.toRealPath()}/${processParams.useFilteredMatrix ? "filtered" : "raw"}_feature_bc_matrix.h5") - f = detectCellRangerVersionData(cellranger_outs_v2_h5, cellranger_outs_v3_h5) + cellRangerData = detectCellRangerVersionData(cellranger_outs_v2_h5, cellranger_outs_v3_h5) + f = cellRangerData.path inputDataType = "10x_cellranger_h5" case "10x_atac_cellranger_mex_outs": // Nothing to be done here @@ -100,6 +143,10 @@ process SC__FILE_CONVERTER { // Nothing to be done here break; + case "loom": + // Nothing to be done here + break; + case "seurat_rds": // Nothing to be done here break;