diff --git a/seqio/beam_utils.py b/seqio/beam_utils.py index 0c5da099..a4c66d7c 100644 --- a/seqio/beam_utils.py +++ b/seqio/beam_utils.py @@ -136,6 +136,8 @@ def _emit_examples(self, shard: Tuple[int, str]): shard_preprocessors_seed = int.from_bytes(md5_digest, "little") + ( self._preprocessors_seed or 0 ) + # Truncate if still a large number. + shard_preprocessors_seed %= self._int64_max ds = task.source.get_dataset( split=self._split,