From 3bcb5666ca21435ed77ec7f47a0ca0e6391a7914 Mon Sep 17 00:00:00 2001 From: Yotaro Kubo Date: Wed, 17 Jan 2024 02:06:10 -0800 Subject: [PATCH] Fix behavior when `Iterable` is actually given as `split_to_filepattern`. PiperOrigin-RevId: 599100213 --- seqio/dataset_providers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/seqio/dataset_providers.py b/seqio/dataset_providers.py index 5089c38d..397f4e48 100644 --- a/seqio/dataset_providers.py +++ b/seqio/dataset_providers.py @@ -695,10 +695,16 @@ def list_shards(self, split: str) -> Sequence[str]: if isinstance(filepattern, str): return _list_files(pattern=filepattern) + filepattern = list(filepattern) + if not any(glob.has_magic(f) for f in filepattern): return filepattern else: - return _list_files(pattern=filepattern) + assert isinstance(filepattern, Iterable) + ret = [] + for f in filepattern: + ret.extend(_list_files(pattern=f)) + return ret