Skip to content

Commit

Permalink
Switch to get the version of the pipelines from WDL and update HCA me…
Browse files Browse the repository at this point in the history
…tadata schemas (#75)

* Switch to get the version of the pipelines from WDL, update the adapter wdls and readme files.

* Update HCA metadata schema versions.

* Remove unused static input files to make the repo clean.

* Update the example input file for submit WDL.

* Update the version used in adpater WDLs.

* Prepare for merging.
  • Loading branch information
rexwangcc authored Sep 18, 2018
1 parent 65fe001 commit 0f04daf
Show file tree
Hide file tree
Showing 16 changed files with 46 additions and 204 deletions.
5 changes: 3 additions & 2 deletions adapter_pipelines/Optimus/adapter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ workflow AdapterOptimus {
Int max_cromwell_retries = 0
Boolean add_md5s = false
String pipeline_tools_version = "v0.28.0"
String pipeline_tools_version = "v0.29.0"
call GetInputs as prep {
input:
Expand Down Expand Up @@ -217,6 +217,7 @@ workflow AdapterOptimus {
use_caas = use_caas,
record_http = record_http,
pipeline_tools_version = pipeline_tools_version,
add_md5s = add_md5s
add_md5s = add_md5s,
pipeline_version = analysis.pipeline_version
}
}

This file was deleted.

6 changes: 3 additions & 3 deletions adapter_pipelines/Optimus/adapter_example_static.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
"AdapterOptimus.reference_bundle": "bf51d668-3e14-4843-9bc7-5d676fdf0e01",
"AdapterOptimus.format_map": "gs://hca-dcp-mint-test-data/adapters/file_format_map.json",
"AdapterOptimus.method": "Optimus",
"AdapterOptimus.analysis_file_version": "5.2.1",
"AdapterOptimus.analysis_protocol_schema_version": "8.0.0",
"AdapterOptimus.analysis_process_schema_version": "8.0.0",
"AdapterOptimus.analysis_file_version": "5.3.4",
"AdapterOptimus.analysis_protocol_schema_version": "8.0.3",
"AdapterOptimus.analysis_process_schema_version": "8.0.3",
"AdapterOptimus.run_type": "run",
"AdapterOptimus.add_md5s": false
}
15 changes: 0 additions & 15 deletions adapter_pipelines/Optimus/adapter_example_static_demo.json

This file was deleted.

5 changes: 3 additions & 2 deletions adapter_pipelines/ss2_single_sample/adapter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ workflow AdapterSmartSeq2SingleCell{
Int max_cromwell_retries = 0
Boolean add_md5s = false
String pipeline_tools_version = "v0.28.2"
String pipeline_tools_version = "v0.29.0"
call GetInputs as prep {
input:
Expand Down Expand Up @@ -230,6 +230,7 @@ workflow AdapterSmartSeq2SingleCell{
record_http = record_http,
pipeline_tools_version = pipeline_tools_version,
add_md5s = add_md5s,
max_retries = max_cromwell_retries
max_retries = max_cromwell_retries,
pipeline_version = analysis.pipeline_version
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
"AdapterSmartSeq2SingleCell.reference_bundle": "bf51d668-3e14-4843-9bc7-5d676fdf0e01",
"AdapterSmartSeq2SingleCell.format_map": "gs://hca-dcp-mint-test-data/adapters/file_format_map.json",
"AdapterSmartSeq2SingleCell.method": "SmartSeq2SingleCell",
"AdapterSmartSeq2SingleCell.analysis_file_version": "5.2.1",
"AdapterSmartSeq2SingleCell.analysis_protocol_schema_version": "8.0.0",
"AdapterSmartSeq2SingleCell.analysis_process_schema_version": "8.0.0",
"AdapterSmartSeq2SingleCell.analysis_file_version": "5.3.4",
"AdapterSmartSeq2SingleCell.analysis_protocol_schema_version": "8.0.3",
"AdapterSmartSeq2SingleCell.analysis_process_schema_version": "8.0.3",
"AdapterSmartSeq2SingleCell.run_type": "run",
"AdapterSmartSeq2SingleCell.add_md5s": false
}

This file was deleted.

9 changes: 5 additions & 4 deletions adapter_pipelines/submit.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,18 @@ task get_metadata {
# to be unbuffered. This is the same as "-u", more info: https://docs.python.org/3/using/cmdline.html#cmdoption-u
export PYTHONUNBUFFERED=TRUE
get-analysis-metadata \
get-analysis-workflow-metadata \
--analysis_output_path ${analysis_output_path} \
--cromwell_url ${cromwell_url} \
--use_caas ${use_caas}
>>>
runtime {
docker: "gcr.io/broad-dsde-mint-${runtime_environment}/cromwell-metadata:v1.0.0"
docker: "gcr.io/broad-dsde-mint-${runtime_environment}/cromwell-metadata:v1.1.0"
maxRetries: max_retries
}
output {
File metadata = "metadata.json"
String workflow_id = read_string("workflow_id.txt")
String pipeline_version = read_string("pipeline_version.txt")
Array[File] http_requests = glob("request_*.txt")
Array[File] http_responses = glob("response_*.txt")
}
Expand Down Expand Up @@ -252,6 +251,8 @@ workflow submit {
String pipeline_tools_version
Boolean add_md5s
Int max_retries = 0
# Version of the pipeline, should be included in the pipeline file
String pipeline_version

call get_metadata {
input:
Expand Down Expand Up @@ -283,7 +284,7 @@ workflow submit {
metadata_json = get_metadata.metadata,
input_bundle_uuid = input_bundle_uuid,
workflow_id = get_metadata.workflow_id,
pipeline_version = get_metadata.pipeline_version,
pipeline_version = pipeline_version,
retry_timeout = retry_timeout,
individual_request_timeout = individual_request_timeout,
retry_multiplier = retry_multiplier,
Expand Down
6 changes: 4 additions & 2 deletions adapter_pipelines/submit_example.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
"submit.reference_bundle": "bf51d668-3e14-4843-9bc7-5d676fdf0e01",
"submit.format_map": "gs://foo/format_map.json",
"submit.submit_url": "http://api.ingest.staging.data.humancellatlas.org/",
"submit.method": "Ss2RsemSingleSample",
"submit.schema_version": "4.6.1",
"submit.method": "SmartSeq2SingleCell",
"submit.analysis_file_version": "5.3.4",
"submit.analysis_protocol_schema_version": "8.0.3",
"submit.analysis_process_schema_version": "8.0.3",
"submit.run_type": "run",
"submit.runtime_environment": "dev"
}
2 changes: 2 additions & 0 deletions adapter_pipelines/submit_stub/submit.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ workflow submit {
String pipeline_tools_version
Boolean add_md5s
Int max_retries
# Version of the pipeline, should be included in the pipeline file
String pipeline_version
call submit_stub
Expand Down
2 changes: 1 addition & 1 deletion docker/cromwell-metadata/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM quay.io/humancellatlas/secondary-analysis-pipeline-tools:v0.26.0
FROM quay.io/humancellatlas/secondary-analysis-pipeline-tools:v0.29.0

RUN mkdir /cromwell-metadata
WORKDIR /cromwell-metadata
Expand Down
4 changes: 2 additions & 2 deletions pipeline_tools/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ The rest of the package consists of scripts that are meant to be invoked from th

Usage
=====
get_analysis_metadata.py
get_analysis_workflow_metadata.py
------------------------
Utility function fetches required information for creating submission to Ingest service, such as the Cromwell workflow
metadata, the UUID of the analysis workflow, and the version of the given pipeline.
Expand All @@ -55,7 +55,7 @@ Invoke it like this:

.. code::
get-analysis-metadata \
get-analysis-workflow-metadata \
--analysis_output_path ${analysis_output_path} \
--cromwell_url ${cromwell_url} \
--use_caas ${use_caas} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,65 +25,6 @@ def get_analysis_workflow_id(analysis_output_path):
return workflow_id


def get_adapter_workflow_id(analysis_output_path):
"""Parse the adapter workflow id from one of its analysis workflow output paths.
Args:
analysis_output_path (str): Path to workflow output file.
Returns:
workflow_id (str): String giving Cromwell UUID of the adapter workflow.
"""
url = analysis_output_path
calls = url.split('/call-')
adapter_workflow_id = calls[0].split('/')[-1]
print('Got adapter workflow UUID: {0}'.format(adapter_workflow_id))
return adapter_workflow_id


def get_adapter_workflow_version(cromwell_url,
adapter_workflow_id,
http_requests,
use_caas=False,
caas_key_file=None):
"""Get the version of the adapter workflow from its workflow id and write the version to a file so that it is
available outside of the get_analysis task.
Args:
cromwell_url (str): Url to the cromwell environment the workflow was run in.
adapter_workflow_id (str): String giving Cromwell UUID of the adapter workflow.
http_requests: `http_requests.HttpRequests` instance, a wrapper around requests provides better retry and
logging.
use_caas (bool): whether or not to use Cromwell-as-a-Service.
caas_key_file (str): path to CaaS service account JSON key file.
Raises:
requests.HTTPError: for 4xx errors or 5xx errors beyond the timeout
"""

def log_before(workflow_id):
print('Getting the version for adapter workflow {}'.format(workflow_id))

cromwell_url = cromwell_url

if use_caas:
json_credentials = caas_key_file or "/cromwell-metadata/caas_key.json"
headers = cromwell_tools.generate_auth_header_from_key_file(json_credentials)
auth = None
else:
headers = None
auth = get_auth()
url = '{0}/query?id={1}&additionalQueryResultFields=labels'.format(cromwell_url, adapter_workflow_id)
response = http_requests.get(url, auth=auth, headers=headers, before=log_before(adapter_workflow_id))

workflow_labels = response.json().get('results')[0].get('labels')

workflow_version = workflow_labels.get('workflow-version') if workflow_labels else None

with open('pipeline_version.txt', 'w') as f:
f.write(workflow_version)


def get_auth(credentials_file=None):
"""Parse cromwell username and password from credentials file.
Expand Down Expand Up @@ -155,14 +96,6 @@ def main():
use_caas=use_caas,
caas_key_file=args.caas_key_file)

# Get the pipeline version and write to file
adapter_workflow_id = get_adapter_workflow_id(analysis_output_path=args.analysis_output_path)
get_adapter_workflow_version(cromwell_url=args.cromwell_url,
adapter_workflow_id=adapter_workflow_id,
http_requests=HttpRequests(),
use_caas=use_caas,
caas_key_file=args.caas_key_file)


if __name__ == '__main__':
main()
Loading

0 comments on commit 0f04daf

Please sign in to comment.