From 43b0b4f6671395b039533e588178cc65e645e96a Mon Sep 17 00:00:00 2001 From: mgiulini Date: Wed, 11 Dec 2024 15:14:47 +0100 Subject: [PATCH] added flat tar extraction --- src/haddock/gear/clean_steps.py | 4 ++-- src/haddock/gear/prepare_run.py | 5 ++--- src/haddock/libs/libio.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/haddock/gear/clean_steps.py b/src/haddock/gear/clean_steps.py index a73915d6f..933412d49 100644 --- a/src/haddock/gear/clean_steps.py +++ b/src/haddock/gear/clean_steps.py @@ -163,8 +163,8 @@ def unpack_compressed_and_archived_files(folders: Iterable[FilePathT], pass for tar_file in tar_files: - with tarfile.open(tar_file) as fin: - fin.extractall(folder) + from haddock.libs.libio import extract_files_flat + extract_files_flat(tar_file, folder) tar_file.unlink() diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index 65b03cbb8..8f1ee274b 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -70,7 +70,7 @@ ) from haddock.gear.zerofill import zero_fill from haddock.libs.libfunc import not_none -from haddock.libs.libio import make_writeable_recursive +from haddock.libs.libio import make_writeable_recursive, extract_files_flat from haddock.libs.libutil import ( extract_keys_recursive, recursive_convert_paths_to_strings, @@ -1040,8 +1040,7 @@ def copy_input_files_to_data_dir( # account for input .tgz files if name.endswith("tgz"): log.info(f"Uncompressing tar {value}") - with tarfile.open(target_path) as fin: - fin.extractall(pf) + extract_files_flat(value, pf) def check_run_dir_exists(run_dir: FilePath) -> None: diff --git a/src/haddock/libs/libio.py b/src/haddock/libs/libio.py index 05e317a27..8f0b17c77 100644 --- a/src/haddock/libs/libio.py +++ b/src/haddock/libs/libio.py @@ -21,6 +21,7 @@ Iterable, Mapping, Optional, + Union, ) from haddock.libs.libontology import PDBFile from haddock.libs.libutil import sort_numbered_paths @@ -613,3 +614,25 @@ def make_writeable_recursive(path: FilePath) -> None: for file_ in (os.path.join(root, f) for f in files): os.chmod(file_, get_perm(file_) | stat.S_IWUSR) + + +def extract_files_flat(tar_path: Union[str, FilePath], + dest_path: Union[str, FilePath]) -> None: + """ + Extract files from a tarball to a destination folder. + + Parameters + ---------- + tar_path : str or Path + The path to the tarball file. + + dest_path : str or Path + The path to the destination folder. + """ + with tarfile.open(tar_path, "r:gz") as tar: + for member in tar.getmembers(): + # Extract only files (skip directories) + if member.isfile(): + # Modify the member name to remove the directory structure + member.name = os.path.basename(member.name) + tar.extract(member, dest_path)