Skip to content

Commit

Permalink
Add format_map to pipeline_tools code (#171)
Browse files Browse the repository at this point in the history
* Add format_map to pipeline_tools code

* Fix getting format map
  • Loading branch information
samanehsan authored Oct 8, 2019
1 parent 4c60f26 commit 5f23845
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
12 changes: 2 additions & 10 deletions pipeline_tools/shared/submission/create_analysis_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from google.cloud import storage
import re
import arrow
from pipeline_tools.shared.submission.format_map import EXTENSION_TO_FORMAT


def create_analysis_process(
Expand Down Expand Up @@ -511,20 +512,11 @@ def main():
required=True,
help='Path to JSON file containing info about outputs.',
)
parser.add_argument(
'--format_map',
required=True,
help='JSON file providing map of file extensions to formats.',
)
parser.add_argument(
'--add_md5s', help='Set to "true" to add md5 checksums to file metadata'
)
args = parser.parse_args()

# Get the extension_to_format mapping
with open(args.format_map) as f:
extension_to_format = json.load(f)

schema_url = args.schema_url.strip('/')

# Get metadata for inputs and outputs
Expand All @@ -533,7 +525,7 @@ def main():
output_urls = f.read().splitlines()
outputs = get_outputs(
output_urls=output_urls,
extension_to_format=extension_to_format,
extension_to_format=EXTENSION_TO_FORMAT,
schema_url=schema_url,
analysis_file_version=args.analysis_file_version,
)
Expand Down
21 changes: 21 additions & 0 deletions pipeline_tools/shared/submission/format_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
EXTENSION_TO_FORMAT = {
"[.]bam$": "bam",
"[_]metrics$": "metrics",
"[.]txt$": "txt",
"[.]log$": "log",
"[.]pdf$": "pdf",
"[.]results$": "results",
"[.]theta$": "theta",
"[.]cnt$": "cnt",
"[.]time$": "time",
"[.]model$": "model",
"[.]bai$": "bai",
"[.]tsv$": "tsv",
"[.]mtx$": "mtx",
"[.]h5$": "h5",
"[.]csv$": "csv",
"[.]csv.gz$": "csv.gz",
"[.]zarr": "matrix",
"[.]npz$": "npz",
"[.]npy$": "npy",
}

0 comments on commit 5f23845

Please sign in to comment.