diff --git a/src/haddock/gear/clean_steps.py b/src/haddock/gear/clean_steps.py index a73915d6f..933412d49 100644 --- a/src/haddock/gear/clean_steps.py +++ b/src/haddock/gear/clean_steps.py @@ -163,8 +163,8 @@ def unpack_compressed_and_archived_files(folders: Iterable[FilePathT], pass for tar_file in tar_files: - with tarfile.open(tar_file) as fin: - fin.extractall(folder) + from haddock.libs.libio import extract_files_flat + extract_files_flat(tar_file, folder) tar_file.unlink() diff --git a/src/haddock/gear/prepare_run.py b/src/haddock/gear/prepare_run.py index 65b03cbb8..8f1ee274b 100644 --- a/src/haddock/gear/prepare_run.py +++ b/src/haddock/gear/prepare_run.py @@ -70,7 +70,7 @@ ) from haddock.gear.zerofill import zero_fill from haddock.libs.libfunc import not_none -from haddock.libs.libio import make_writeable_recursive +from haddock.libs.libio import make_writeable_recursive, extract_files_flat from haddock.libs.libutil import ( extract_keys_recursive, recursive_convert_paths_to_strings, @@ -1040,8 +1040,7 @@ def copy_input_files_to_data_dir( # account for input .tgz files if name.endswith("tgz"): log.info(f"Uncompressing tar {value}") - with tarfile.open(target_path) as fin: - fin.extractall(pf) + extract_files_flat(value, pf) def check_run_dir_exists(run_dir: FilePath) -> None: diff --git a/src/haddock/libs/libio.py b/src/haddock/libs/libio.py index 05e317a27..8f0b17c77 100644 --- a/src/haddock/libs/libio.py +++ b/src/haddock/libs/libio.py @@ -21,6 +21,7 @@ Iterable, Mapping, Optional, + Union, ) from haddock.libs.libontology import PDBFile from haddock.libs.libutil import sort_numbered_paths @@ -613,3 +614,25 @@ def make_writeable_recursive(path: FilePath) -> None: for file_ in (os.path.join(root, f) for f in files): os.chmod(file_, get_perm(file_) | stat.S_IWUSR) + + +def extract_files_flat(tar_path: Union[str, FilePath], + dest_path: Union[str, FilePath]) -> None: + """ + Extract files from a tarball to a destination folder. + + Parameters + ---------- + tar_path : str or Path + The path to the tarball file. + + dest_path : str or Path + The path to the destination folder. + """ + with tarfile.open(tar_path, "r:gz") as tar: + for member in tar.getmembers(): + # Extract only files (skip directories) + if member.isfile(): + # Modify the member name to remove the directory structure + member.name = os.path.basename(member.name) + tar.extract(member, dest_path) diff --git a/tests/golden_data/ambig.tbl.tgz b/tests/golden_data/ambig.tbl.tgz new file mode 100644 index 000000000..56f4ef82e Binary files /dev/null and b/tests/golden_data/ambig.tbl.tgz differ diff --git a/tests/test_libio.py b/tests/test_libio.py index b917a1242..305ac1dc5 100644 --- a/tests/test_libio.py +++ b/tests/test_libio.py @@ -1,12 +1,14 @@ """Test libio.""" import tempfile from pathlib import Path +import shutil import pytest from haddock.libs.libio import ( clean_suffix, dot_suffix, + extract_files_flat, file_exists, folder_exists, read_from_yaml, @@ -15,6 +17,7 @@ ) from . import emptycfg, haddock3_yaml_cfg_examples +from . import golden_data @pytest.mark.parametrize( @@ -133,3 +136,15 @@ def test_folder_exists_wrong(i): def test_folder_exists_wrong_othererror(): with pytest.raises(TypeError): folder_exists("some_bad_path", exception=TypeError) + + +def test_extract_files_flat(monkeypatch): + """Test extract_files_flat.""" + with tempfile.TemporaryDirectory() as tempdir: + reference_archive = Path(golden_data, "ambig.tbl.tgz") + shutil.copy(reference_archive, tempdir) + monkeypatch.chdir(tempdir) + archive = Path(reference_archive.name) + # extract the archive + extract_files_flat(archive, ".") + assert Path("ambig_1.tbl").exists()