Skip to content

Commit

Permalink
Adjust parallelism in Optimus for proper Duplication Marking (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsotobroad authored Feb 27, 2018
1 parent dbda12d commit 1a3f87b
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 73 deletions.
36 changes: 0 additions & 36 deletions library/subworkflows/AlignTagCorrectUmis.wdl

This file was deleted.

11 changes: 5 additions & 6 deletions library/tasks/MergeSortBam.wdl
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@

task MergeSortBamFiles {
Array[Array[File]] bam_inputs
Array[File] bam_inputs
String sort_order

Int disk_size = 500
Int compression_level = 5

command {
input_line=$(python -c "print(' INPUT='.join('${sep=' ' bam_inputs}'.replace('[', '').replace(']', '').replace(',', '').split()))")

echo $input_line
java -Dsamjdk.compression_level=${compression_level} -Xms7000m -jar /usr/gitc/picard.jar \
MergeSamFiles \
USE_THREADING=true \
SORT_ORDER=coordinate \
INPUT=$input_line \
SORT_ORDER=${sort_order} \
INPUT=${sep=' INPUT=' bam_inputs} \
OUTPUT=merged.bam \
}

Expand Down
49 changes: 35 additions & 14 deletions pipelines/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import "SplitBamByCellBarcode.wdl" as split
import "CollectMultiplePicardMetrics.wdl" as collect
import "MergeSortBam.wdl" as merge
import "CreateCountMatrix.wdl" as count
import "AlignTagCorrectUmis.wdl" as AlignTagCorrectUmis
import "StarAlignBamSingleEnd.wdl" as StarAlignBamWDL
import "TagGeneExon.wdl" as TagGeneExonWDL
import "CorrectUmiMarkDuplicates.wdl" as CorrectUmiMarkDuplicatesWDL

# The optimus 3' pipeline processes 10x genomics sequencing data based on the v2
# chemistry. It corrects cell barcodes and UMIs, aligns reads, marks duplicates, and
Expand Down Expand Up @@ -62,45 +64,64 @@ workflow Optimus {
}

File barcoded_bam = select_first([attach_barcodes.bam_output, attach_barcodes_no_index.bam_output])
}

call merge.MergeSortBamFiles as MergeUnsorted {
input:
bam_inputs = barcoded_bam,
sort_order = "unsorted"
}

call split.SplitBamByCellBarcode {
input:
bam_input = MergeUnsorted.output_bam
}

call split.SplitBamByCellBarcode {
scatter (bam in SplitBamByCellBarcode.bam_output_array) {
call StarAlignBamWDL.StarAlignBamSingleEnd as StarAlign {
input:
bam_input = barcoded_bam
bam_input = bam,
tar_star_reference = tar_star_reference
}

call AlignTagCorrectUmis.AlignTagCorrectUmis {
call TagGeneExonWDL.TagGeneExon as TagGenes {
input:
bam_array = SplitBamByCellBarcode.bam_output_array,
tar_star_reference = tar_star_reference,
bam_input = StarAlign.bam_output,
annotations_gtf = annotations_gtf
}

call CorrectUmiMarkDuplicatesWDL.CorrectUmiMarkDuplicates as CorrectUmiMarkDuplicates {
input:
bam_input = TagGenes.bam_output
}
}

call merge.MergeSortBamFiles {
call merge.MergeSortBamFiles as MergeSorted {
input:
bam_inputs = AlignTagCorrectUmis.bam_outputs
bam_inputs = CorrectUmiMarkDuplicates.bam_output,
sort_order = "coordinate"
}

call count.DropSeqToolsDigitalExpression {
input:
bam_input = MergeSortBamFiles.output_bam,
bam_input = MergeSorted.output_bam,
whitelist = whitelist
}

call collect.CollectMultipleMetrics {
input:
aligned_bam = MergeSortBamFiles.output_bam,
aligned_bam = MergeSorted.output_bam,
ref_genome_fasta = ref_genome_fasta,
output_filename = sample_id
}

output {
File bam = MergeSortBamFiles.output_bam
File bam = MergeSorted.output_bam
File matrix = DropSeqToolsDigitalExpression.matrix_output
File matrix_summary = DropSeqToolsDigitalExpression.matrix_summary
Array[Array[File]] tag_gene_exon_log = AlignTagCorrectUmis.tag_gene_exon_log
Array[Array[File]] umi_metrics = AlignTagCorrectUmis.umi_metrics
Array[Array[File]] duplicate_metrics = AlignTagCorrectUmis.duplicate_metrics
Array[File] tag_gene_exon_log = TagGenes.log
Array[File] umi_metrics = CorrectUmiMarkDuplicates.umi_metrics
Array[File] duplicate_metrics = CorrectUmiMarkDuplicates.duplicate_metrics
File picard_metrics = CollectMultipleMetrics.alignment_metrics
}
}
16 changes: 8 additions & 8 deletions pipelines/optimus/example_test_outputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@
"outputs": {
"Optimus.tag_gene_exon_log": [
[
"gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-AlignTagCorrectUmis/shard-0/AlignTagCorrectUmis/d250b98d-a2f0-4b19-be86-e347bd076a5f/call-TagGeneExon/shard-0/gene_exon_tag_summary.log"
"gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-TagGenes/shard-0/gene_exon_tag_summary.log"
]
],
"Optimus.matrix_summary": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-DropSeqToolsDigitalExpression/digital_expression_summary.txt",
"Optimus.matrix_summary": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-DropSeqToolsDigitalExpression/digital_expression_summary.txt",
"Optimus.umi_metrics": [
[
"gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-AlignTagCorrectUmis/shard-0/AlignTagCorrectUmis/d250b98d-a2f0-4b19-be86-e347bd076a5f/call-CorrectUmiMarkDuplicates/shard-0/umi_metrics.txt"
"gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-CorrectUmiMarkDuplicates/shard-0/umi_metrics.txt"
]
],
"Optimus.duplicate_metrics": [
[
"gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-AlignTagCorrectUmis/shard-0/AlignTagCorrectUmis/d250b98d-a2f0-4b19-be86-e347bd076a5f/call-CorrectUmiMarkDuplicates/shard-0/duplicate_metrics.txt"
"gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-CorrectUmiMarkDuplicates/shard-0/duplicate_metrics.txt"
]
],
"Optimus.matrix": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-DropSeqToolsDigitalExpression/digital_expression.txt.gz",
"Optimus.bam": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-MergeBam/out.bam",
"Optimus.picard_metrics": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/c98322d7-406d-4e3f-96c8-e2bd1429e645/call-CollectMultipleMetrics/pbmc8k_test.tar.gz"
"Optimus.matrix": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-DropSeqToolsDigitalExpression/digital_expression.txt.gz",
"Optimus.bam": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-MergeSorted/merged.bam",
"Optimus.picard_metrics": "gs://hca-dcp-mint-test-data/10x/demo/cromwell-executions/Optimus/307bd16a-7b08-49ce-8ba5-e18cee6d8593/call-CollectMultipleMetrics/pbmc8k_test.tar.gz"
},
"id": "c98322d7-406d-4e3f-96c8-e2bd1429e645"
"id": "307bd16a-7b08-49ce-8ba5-e18cee6d8593"
}
1 change: 0 additions & 1 deletion test/optimus/pr/dependencies.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"Optimus.wdl": "pipelines/optimus/Optimus.wdl",
"ValidateOptimus.wdl": "test/optimus/pr/ValidateOptimus.wdl",
"AlignTagCorrectUmis.wdl": "library/subworkflows/AlignTagCorrectUmis.wdl",
"StarAlignBamSingleEnd.wdl": "library/tasks/StarAlignBamSingleEnd.wdl",
"FastqToUBam.wdl": "library/tasks/FastqToUBam.wdl",
"Attach10xBarcodes.wdl": "library/tasks/Attach10xBarcodes.wdl",
Expand Down
2 changes: 1 addition & 1 deletion test/optimus/pr/test_inputs.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"TestOptimusPR.expected_bam_hash": "f944c8d26ed178e5cbcc96d75f8f380f",
"TestOptimusPR.expected_bam_hash": "c4d4d28c687f00c6d2cccca524284e99",
"TestOptimusPR.expected_matrix_hash": "69f3be6085e0c5f694b3cfa877b0eeaa",
"TestOptimusPR.expected_matrix_summary_hash": "dd513351d4e7688c97f7bf902ba2876f",
"TestOptimusPR.expected_picard_metrics_hash": "7b7be5c9a2236920ca09f05811dca6d5",
Expand Down
3 changes: 1 addition & 2 deletions test/optimus/scientific/dependencies.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"Optimus.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/optimus/Optimus.wdl",
"ScientificTests.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/test/optimus/scientific/ScientificTests.wdl",
"AlignTagCorrectUmis.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/optimus/AlignTagCorrectUmis.wdl",
"StarAlignBamSingleEnd.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/pipelines/tasks/StarAlignBamSingleEnd.wdl",
"FastqToUBam.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/pipelines/tasks/FastqToUBam.wdl",
"Attach10xBarcodes.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/pipelines/tasks/Attach10xBarcodes.wdl",
Expand All @@ -11,4 +10,4 @@
"CollectMultiplePicardMetrics.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/pipelines/tasks/CollectMultiplePicardMetrics.wdl",
"MergeSortBam.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/pipelines/tasks/MergeSortBam.wdl",
"CreateCountMatrix.wdl": "https://raw.githubusercontent.com/HumanCellAtlas/skylab/master/pipelines/tasks/CreateCountMatrix.wdl"
}
}
8 changes: 3 additions & 5 deletions test/test_pr_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@
" \"CorrectUmiMarkDuplicates.wdl\": \"../pipelines/tasks/CorrectUmiMarkDuplicates.wdl\",\n",
" \"CollectMultiplePicardMetrics.wdl\": \"../pipelines/tasks/CollectMultiplePicardMetrics.wdl\",\n",
" \"MergeBam.wdl\": \"../pipelines/tasks/MergeBam.wdl\",\n",
" \"CreateCountMatrix.wdl\": \"../pipelines/tasks/CreateCountMatrix.wdl\",\n",
" \"AlignTagCorrectUmis.wdl\": \"../optimus/AlignTagCorrectUmis.wdl\" \n",
" \"CreateCountMatrix.wdl\": \"../pipelines/tasks/CreateCountMatrix.wdl\"\n",
"}"
]
},
Expand Down Expand Up @@ -411,8 +410,7 @@
" \"CorrectUmiMarkDuplicates.wdl\": \"../pipelines/tasks/CorrectUmiMarkDuplicates.wdl\",\n",
" \"CollectMultiplePicardMetrics.wdl\": \"../pipelines/tasks/CollectMultiplePicardMetrics.wdl\",\n",
" \"MergeBam.wdl\": \"../pipelines/tasks/MergeBam.wdl\",\n",
" \"CreateCountMatrix.wdl\": \"../pipelines/tasks/CreateCountMatrix.wdl\",\n",
" \"AlignTagCorrectUmis.wdl\": \"../optimus/AlignTagCorrectUmis.wdl\" \n",
" \"CreateCountMatrix.wdl\": \"../pipelines/tasks/CreateCountMatrix.wdl\"\n",
"}"
]
},
Expand Down Expand Up @@ -1050,7 +1048,7 @@
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 6,
"threshold": 6.0,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
Expand Down

0 comments on commit 1a3f87b

Please sign in to comment.