Skip to content

Commit

Permalink
Yanc support animal references (#148)
Browse files Browse the repository at this point in the history
* support animal references

* manual flake8 & black

* biomaterial_core.ncbi_taxon_id is a List

* use cell suspension and fail on multiple

* fix error

* unit tests and mdata-api bump

* imports

* make travis happy?

* pass tests

* formatting etc

* store taxon_id_values as ReferenceId enum

* fmt

* ignore notebook checkpoints

* small cleanup

* rex comments

* Update optimus.py

Make sure both `('fastq.gz', 'fastq')` metadata fields can run through the Optimus pipeline.

* fix incomplete rename

* Update cellranger.py

Make sure both `('fastq.gz', 'fastq')` metadata fields can run through the cellranger pipeline.

* fmt

* saman comments

* dashes not underscore
  • Loading branch information
Charley Yan authored and samanehsan committed Jun 7, 2019
1 parent 6cfa801 commit 087aadc
Show file tree
Hide file tree
Showing 23 changed files with 274 additions and 157 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ wheels/
*.egg-info/
.installed.cfg
*.egg

# Development Artifacts
*.ipynb_checkpoints/
20 changes: 10 additions & 10 deletions adapter_pipelines/Optimus/adapter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ task GetInputs {
}
output {
String sample_id = read_string("sample_id.txt")
File tar_star_reference = read_string("tar_star_reference.txt") # star reference
File annotations_gtf = read_string("annotations_gtf.txt") # gtf containing annotations for gene tagging
File ref_genome_fasta = read_string("ref_genome_fasta.txt") # genome fasta file
Array[String] r1_fastq = read_lines("r1.txt")
Array[String] r2_fastq = read_lines("r2.txt")
File i1_file = "i1.txt"
Expand Down Expand Up @@ -102,9 +105,6 @@ workflow AdapterOptimus {
String bundle_version
File whitelist # 10x genomics cell barcode whitelist for 10x V2
File tar_star_reference # star reference
File annotations_gtf # gtf containing annotations for gene tagging
File ref_genome_fasta # genome fasta file
String fastq_suffix = ".gz" # add this suffix to fastq files for picard
# Note: This "None" is a workaround in WDL-draft to simulate a "None" type
Expand Down Expand Up @@ -139,7 +139,7 @@ workflow AdapterOptimus {
Int max_cromwell_retries = 0
Boolean add_md5s = false
String pipeline_tools_version = "v0.52.0"
String pipeline_tools_version = "v0.53.0"
call GetInputs as prep {
input:
Expand All @@ -161,9 +161,9 @@ workflow AdapterOptimus {
i1_fastq = if (length(prep.i1_fastq) <= 0) then None else prep.i1_fastq,
sample_id = prep.sample_id,
whitelist = whitelist,
tar_star_reference = tar_star_reference,
annotations_gtf = annotations_gtf,
ref_genome_fasta = ref_genome_fasta,
tar_star_reference = prep.tar_star_reference,
annotations_gtf = prep.annotations_gtf,
ref_genome_fasta = prep.ref_genome_fasta,
fastq_suffix = fastq_suffix
}
Expand All @@ -183,15 +183,15 @@ workflow AdapterOptimus {
},
{
"name": "tar_star_reference",
"value": tar_star_reference
"value": prep.tar_star_reference
},
{
"name": "annotations_gtf",
"value": annotations_gtf
"value": prep.annotations_gtf
},
{
"name": "ref_genome_fasta",
"value": ref_genome_fasta
"value": prep.ref_genome_fasta
}
],
pipeline_tools_version = pipeline_tools_version
Expand Down
5 changes: 1 addition & 4 deletions adapter_pipelines/Optimus/adapter_example_static.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
{
"AdapterOptimus.whitelist": "gs://hca-dcp-sc-pipelines-test-data/whitelists/737K-august-2016.txt",
"AdapterOptimus.tar_star_reference": "gs://hca-dcp-sc-pipelines-test-data/alignmentReferences/optimusGencodeV27/buildReference/output_bucket/star_primary_gencode_v27.tar",
"AdapterOptimus.annotations_gtf": "gs://hca-dcp-sc-pipelines-test-data/alignmentReferences/optimusGencodeV27/gencode.v27.primary_assembly.annotation.gtf.gz",
"AdapterOptimus.ref_genome_fasta": "gs://hca-dcp-sc-pipelines-test-data/alignmentReferences/optimusGencodeV27/GRCh38.primary_assembly.genome.fa",
"AdapterOptimus.reference_bundle": "bf51d668-3e14-4843-9bc7-5d676fdf0e01",
"AdapterOptimus.reference_bundle": "00000000-0000-0000-0000-000000000000",
"AdapterOptimus.format_map": "gs://hca-dcp-mint-test-data/adapters/file_format_map.json",
"AdapterOptimus.method": "Optimus",
"AdapterOptimus.analysis_file_version": "6.0.0",
Expand Down
14 changes: 0 additions & 14 deletions adapter_pipelines/Optimus_mouse/adapter_example_static.json

This file was deleted.

15 changes: 7 additions & 8 deletions adapter_pipelines/cellranger/adapter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ task GetInputs {
}
output {
String sample_id = read_string("sample_id.txt")
String reference_name = read_string("reference_name.txt")
File transcriptome_tar_gz = read_string("transcriptome_tar_gz.txt")
Int expect_cells = read_string("expect_cells.txt")
Array[File] fastqs = read_lines("fastqs.txt")
Array[String] fastq_names = read_lines("fastq_names.txt")
Expand Down Expand Up @@ -123,9 +125,6 @@ workflow Adapter10xCount {
String bundle_uuid
String bundle_version
String reference_name
File transcriptome_tar_gz
# Submission
File format_map
String dss_url
Expand All @@ -150,7 +149,7 @@ workflow Adapter10xCount {
Int max_cromwell_retries = 0
Boolean add_md5s = false
String pipeline_tools_version = "v0.52.0"
String pipeline_tools_version = "v0.53.0"
call GetInputs {
input:
Expand Down Expand Up @@ -180,8 +179,8 @@ workflow Adapter10xCount {
input:
sample_id = GetInputs.sample_id,
fastqs = rename_fastqs.outputs,
reference_name = reference_name,
transcriptome_tar_gz = transcriptome_tar_gz,
reference_name = GetInputs.reference_name,
transcriptome_tar_gz = GetInputs.transcriptome_tar_gz,
expect_cells = GetInputs.expect_cells,
max_retries = max_cromwell_retries
}
Expand All @@ -196,11 +195,11 @@ workflow Adapter10xCount {
},
{
"name": "reference_name",
"value": reference_name
"value": GetInputs.reference_name
},
{
"name": "transcriptome_tar_gz",
"value": transcriptome_tar_gz
"value": GetInputs.transcriptome_tar_gz
}
],
expect_cells = GetInputs.expect_cells,
Expand Down
4 changes: 1 addition & 3 deletions adapter_pipelines/cellranger/adapter_example_static.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
{
"Adapter10xCount.reference_name": "GRCh38",
"Adapter10xCount.transcriptome_tar_gz": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_GencodeV27_Primary_CellRanger.tar",
"Adapter10xCount.reference_bundle": "bf51d668-3e14-4843-9bc7-5d676fdf0e01",
"Adapter10xCount.reference_bundle": "00000000-0000-0000-0000-000000000000",
"Adapter10xCount.format_map": "gs://hca-dcp-mint-test-data/adapters/file_format_map.json",
"Adapter10xCount.method": "10x",
"Adapter10xCount.analysis_file_version": "6.0.0",
Expand Down
49 changes: 19 additions & 30 deletions adapter_pipelines/ss2_single_end/adapter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,6 @@ workflow AdapterSmartSeq2SingleCellUnpaired {
String bundle_uuid
String bundle_version
# fixed parameters
File genome_ref_fasta
File rrna_intervals
File gene_ref_flat
File hisat2_ref_index
File hisat2_ref_trans_index
File rsem_ref_index
File hisat2_ref_name
File hisat2_ref_trans_name
String stranded
# submission parameters
File format_map
String dss_url
Expand All @@ -81,7 +70,7 @@ workflow AdapterSmartSeq2SingleCellUnpaired {
Int max_cromwell_retries = 0
Boolean add_md5s = false
String pipeline_tools_version = "v0.52.0"
String pipeline_tools_version = "v0.53.0"
call GetInputs as prep {
input:
Expand All @@ -98,15 +87,15 @@ workflow AdapterSmartSeq2SingleCellUnpaired {
call ss2.SmartSeq2SingleCellUnpaired as analysis {
input:
genome_ref_fasta = genome_ref_fasta,
rrna_intervals = rrna_intervals,
gene_ref_flat = gene_ref_flat,
hisat2_ref_index = hisat2_ref_index,
hisat2_ref_trans_index = hisat2_ref_trans_index,
rsem_ref_index = rsem_ref_index,
hisat2_ref_name = hisat2_ref_name,
hisat2_ref_trans_name = hisat2_ref_trans_name,
stranded = stranded,
genome_ref_fasta = prep.inputs.genome_ref_fasta,
rrna_intervals = prep.inputs.rrna_intervals,
gene_ref_flat = prep.inputs.gene_ref_flat,
hisat2_ref_index = prep.inputs.hisat2_ref_index,
hisat2_ref_trans_index = prep.inputs.hisat2_ref_trans_index,
rsem_ref_index = prep.inputs.rsem_ref_index,
hisat2_ref_name = prep.inputs.hisat2_ref_name,
hisat2_ref_trans_name = prep.inputs.hisat2_ref_trans_name,
stranded = prep.inputs.stranded,
sample_name = prep.inputs.sample_id,
output_name = prep.inputs.sample_id,
fastq = prep.inputs.fastq,
Expand All @@ -130,39 +119,39 @@ workflow AdapterSmartSeq2SingleCellUnpaired {
},
{
"name": "genome_ref_fasta",
"value": genome_ref_fasta
"value": prep.inputs.genome_ref_fasta
},
{
"name": "rrna_intervals",
"value": rrna_intervals
"value": prep.inputs.rrna_intervals
},
{
"name": "gene_ref_flat",
"value": gene_ref_flat
"value": prep.inputs.gene_ref_flat
},
{
"name": "hisat2_ref_index",
"value": hisat2_ref_index
"value": prep.inputs.hisat2_ref_index
},
{
"name": "hisat2_ref_trans_name",
"value": hisat2_ref_trans_name
"value": prep.inputs.hisat2_ref_trans_name
},
{
"name": "rsem_ref_index",
"value": rsem_ref_index
"value": prep.inputs.rsem_ref_index
},
{
"name": "hisat2_ref_name",
"value": hisat2_ref_name
"value": prep.inputs.hisat2_ref_name
},
{
"name": "hisat2_ref_trans_name",
"value": hisat2_ref_trans_name
"value": prep.inputs.hisat2_ref_trans_name
},
{
"name": "stranded",
"value": stranded
"value": prep.inputs.stranded
}
],
outputs = flatten(
Expand Down
10 changes: 0 additions & 10 deletions adapter_pipelines/ss2_single_end/adapter_example_static.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
{
"AdapterSmartSeq2SingleCellUnpaired.hisat2_ref_trans_name": "gencode_v27_trans_rsem",
"AdapterSmartSeq2SingleCellUnpaired.rrna_intervals": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode.v27.rRNA.interval_list",
"AdapterSmartSeq2SingleCellUnpaired.star_ref_index": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_GencodeV27_Primary.tar",
"AdapterSmartSeq2SingleCellUnpaired.hisat2_ref_index": "gs://hca-dcp-mint-test-data/reference/HISAT2/genome_snp_tran.tar.gz",
"AdapterSmartSeq2SingleCellUnpaired.genome_ref_fasta": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38.primary_assembly.genome.fa",
"AdapterSmartSeq2SingleCellUnpaired.hisat2_ref_trans_index": "gs://hca-dcp-mint-test-data/reference/HISAT2/gencode_v27_trans_rsem.tar.gz",
"AdapterSmartSeq2SingleCellUnpaired.rsem_ref_index": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode_v27_primary.tar",
"AdapterSmartSeq2SingleCellUnpaired.gene_ref_flat": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_gencode.v27.refFlat.txt",
"AdapterSmartSeq2SingleCellUnpaired.hisat2_ref_name": "genome_snp_tran",
"AdapterSmartSeq2SingleCellUnpaired.stranded":"NONE",
"AdapterSmartSeq2SingleCellUnpaired.reference_bundle": "00000000-0000-0000-0000-000000000000",
"AdapterSmartSeq2SingleCellUnpaired.format_map": "gs://hca-dcp-mint-test-data/adapters/file_format_map.json",
"AdapterSmartSeq2SingleCellUnpaired.method": "SmartSeq2SingleCell",
Expand Down
49 changes: 19 additions & 30 deletions adapter_pipelines/ss2_single_sample/adapter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,6 @@ workflow AdapterSmartSeq2SingleCell{
String bundle_uuid
String bundle_version
# fixed parameters
File genome_ref_fasta
File rrna_intervals
File gene_ref_flat
File hisat2_ref_index
File hisat2_ref_trans_index
File rsem_ref_index
File hisat2_ref_name
File hisat2_ref_trans_name
String stranded
# submission parameters
File format_map
String dss_url
Expand All @@ -81,7 +70,7 @@ workflow AdapterSmartSeq2SingleCell{
Int max_cromwell_retries = 0
Boolean add_md5s = false
String pipeline_tools_version = "v0.52.0"
String pipeline_tools_version = "v0.53.0"
call GetInputs as prep {
input:
Expand All @@ -98,15 +87,15 @@ workflow AdapterSmartSeq2SingleCell{
call ss2.SmartSeq2SingleCell as analysis {
input:
genome_ref_fasta = genome_ref_fasta,
rrna_intervals = rrna_intervals,
gene_ref_flat = gene_ref_flat,
hisat2_ref_index = hisat2_ref_index,
hisat2_ref_trans_index = hisat2_ref_trans_index,
rsem_ref_index = rsem_ref_index,
hisat2_ref_name = hisat2_ref_name,
hisat2_ref_trans_name = hisat2_ref_trans_name,
stranded = stranded,
genome_ref_fasta = prep.inputs.genome_ref_fasta,
rrna_intervals = prep.inputs.rrna_intervals,
gene_ref_flat = prep.inputs.gene_ref_flat,
hisat2_ref_index = prep.inputs.hisat2_ref_index,
hisat2_ref_trans_index = prep.inputs.hisat2_ref_trans_index,
rsem_ref_index = prep.inputs.rsem_ref_index,
hisat2_ref_name = prep.inputs.hisat2_ref_name,
hisat2_ref_trans_name = prep.inputs.hisat2_ref_trans_name,
stranded = prep.inputs.stranded,
sample_name = prep.inputs.sample_id,
output_name = prep.inputs.sample_id,
fastq1 = prep.inputs.fastq_1,
Expand Down Expand Up @@ -135,39 +124,39 @@ workflow AdapterSmartSeq2SingleCell{
},
{
"name": "genome_ref_fasta",
"value": genome_ref_fasta
"value": prep.inputs.genome_ref_fasta
},
{
"name": "rrna_intervals",
"value": rrna_intervals
"value": prep.inputs.rrna_intervals
},
{
"name": "gene_ref_flat",
"value": gene_ref_flat
"value": prep.inputs.gene_ref_flat
},
{
"name": "hisat2_ref_index",
"value": hisat2_ref_index
"value": prep.inputs.hisat2_ref_index
},
{
"name": "hisat2_ref_trans_name",
"value": hisat2_ref_trans_name
"value": prep.inputs.hisat2_ref_trans_name
},
{
"name": "rsem_ref_index",
"value": rsem_ref_index
"value": prep.inputs.rsem_ref_index
},
{
"name": "hisat2_ref_name",
"value": hisat2_ref_name
"value": prep.inputs.hisat2_ref_name
},
{
"name": "hisat2_ref_trans_name",
"value": hisat2_ref_trans_name
"value": prep.inputs.hisat2_ref_trans_name
},
{
"name": "stranded",
"value": stranded
"value": prep.inputs.stranded
}
],
outputs = flatten(
Expand Down
12 changes: 1 addition & 11 deletions adapter_pipelines/ss2_single_sample/adapter_example_static.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
{
"AdapterSmartSeq2SingleCell.hisat2_ref_trans_name": "gencode_v27_trans_rsem",
"AdapterSmartSeq2SingleCell.rrna_intervals": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode.v27.rRNA.interval_list",
"AdapterSmartSeq2SingleCell.star_ref_index": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_GencodeV27_Primary.tar",
"AdapterSmartSeq2SingleCell.hisat2_ref_index": "gs://hca-dcp-mint-test-data/reference/HISAT2/genome_snp_tran.tar.gz",
"AdapterSmartSeq2SingleCell.genome_ref_fasta": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38.primary_assembly.genome.fa",
"AdapterSmartSeq2SingleCell.hisat2_ref_trans_index": "gs://hca-dcp-mint-test-data/reference/HISAT2/gencode_v27_trans_rsem.tar.gz",
"AdapterSmartSeq2SingleCell.rsem_ref_index": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/gencode_v27_primary.tar",
"AdapterSmartSeq2SingleCell.gene_ref_flat": "gs://hca-dcp-mint-test-data/reference/GRCh38_Gencode/GRCh38_gencode.v27.refFlat.txt",
"AdapterSmartSeq2SingleCell.hisat2_ref_name": "genome_snp_tran",
"AdapterSmartSeq2SingleCell.stranded":"NONE",
"AdapterSmartSeq2SingleCell.reference_bundle": "bf51d668-3e14-4843-9bc7-5d676fdf0e01",
"AdapterSmartSeq2SingleCell.reference_bundle": "00000000-0000-0000-0000-000000000000",
"AdapterSmartSeq2SingleCell.format_map": "gs://hca-dcp-mint-test-data/adapters/file_format_map.json",
"AdapterSmartSeq2SingleCell.method": "SmartSeq2SingleCell",
"AdapterSmartSeq2SingleCell.analysis_file_version": "6.0.0",
Expand Down
Loading

0 comments on commit 087aadc

Please sign in to comment.