forked from UtrechtUniversity/yoda-ruleset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintake_checksums.py
53 lines (37 loc) · 1.69 KB
/
intake_checksums.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
"""Functions for intake checksums."""
__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
__license__ = 'GPLv3, see LICENSE'
import itertools
import genquery
from util import *
def chop_checksum(checksum):
"""Chop iRODS checksum in checksum type and checksum string.
Checksum format is ({type}:){checksum}, if type is missing then it is "md5".
:param checksum: iRODS checksum string
:returns: type checksum
"""
checksum_split = checksum.split(":")
if len(checksum_split) > 1:
type = checksum_split[0]
checksum = checksum_split[1]
return type, checksum
def intake_generate_dataset_checksums(ctx, dataset_path, checksum_file):
""""Generate data object with all checksums of a dataset.
:param ctx: Combined type of a callback and rei struct
:param dataset_path: Root collection of dataset to be indexed
:param checksum_file: Data object to write checksums to
"""
q_root = genquery.row_iterator("COLL_NAME, DATA_NAME, DATA_CHECKSUM, DATA_SIZE",
"COLL_NAME = '{}'".format(dataset_path),
genquery.AS_LIST, ctx)
q_sub = genquery.row_iterator("COLL_NAME, DATA_NAME, DATA_CHECKSUM, DATA_SIZE",
"COLL_NAME like '{}/%'".format(dataset_path),
genquery.AS_LIST, ctx)
# Create checksums file.
checksums = ""
for row in itertools.chain(q_root, q_sub):
type, checksum = chop_checksum(row[2])
checksums += "{} {} {} {}/{}\n".format(type, checksum, row[3], row[0], row[1])
# Write checksums file.
data_object.write(ctx, checksum_file, checksums)