Skip to content

Commit

Permalink
Merge pull request #1180 from haddocking/flat_extraction
Browse files Browse the repository at this point in the history
added flat tar extraction
  • Loading branch information
mgiulini authored Dec 11, 2024
2 parents a4487ad + aede563 commit f7a40b1
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/haddock/gear/clean_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def unpack_compressed_and_archived_files(folders: Iterable[FilePathT],
pass

for tar_file in tar_files:
with tarfile.open(tar_file) as fin:
fin.extractall(folder)
from haddock.libs.libio import extract_files_flat
extract_files_flat(tar_file, folder)

tar_file.unlink()

Expand Down
5 changes: 2 additions & 3 deletions src/haddock/gear/prepare_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
)
from haddock.gear.zerofill import zero_fill
from haddock.libs.libfunc import not_none
from haddock.libs.libio import make_writeable_recursive
from haddock.libs.libio import make_writeable_recursive, extract_files_flat
from haddock.libs.libutil import (
extract_keys_recursive,
recursive_convert_paths_to_strings,
Expand Down Expand Up @@ -1040,8 +1040,7 @@ def copy_input_files_to_data_dir(
# account for input .tgz files
if name.endswith("tgz"):
log.info(f"Uncompressing tar {value}")
with tarfile.open(target_path) as fin:
fin.extractall(pf)
extract_files_flat(value, pf)


def check_run_dir_exists(run_dir: FilePath) -> None:
Expand Down
23 changes: 23 additions & 0 deletions src/haddock/libs/libio.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
Iterable,
Mapping,
Optional,
Union,
)
from haddock.libs.libontology import PDBFile
from haddock.libs.libutil import sort_numbered_paths
Expand Down Expand Up @@ -613,3 +614,25 @@ def make_writeable_recursive(path: FilePath) -> None:

for file_ in (os.path.join(root, f) for f in files):
os.chmod(file_, get_perm(file_) | stat.S_IWUSR)


def extract_files_flat(tar_path: Union[str, FilePath],
dest_path: Union[str, FilePath]) -> None:
"""
Extract files from a tarball to a destination folder.
Parameters
----------
tar_path : str or Path
The path to the tarball file.
dest_path : str or Path
The path to the destination folder.
"""
with tarfile.open(tar_path, "r:gz") as tar:
for member in tar.getmembers():
# Extract only files (skip directories)
if member.isfile():
# Modify the member name to remove the directory structure
member.name = os.path.basename(member.name)
tar.extract(member, dest_path)
Binary file added tests/golden_data/ambig.tbl.tgz
Binary file not shown.
15 changes: 15 additions & 0 deletions tests/test_libio.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Test libio."""
import tempfile
from pathlib import Path
import shutil

import pytest

from haddock.libs.libio import (
clean_suffix,
dot_suffix,
extract_files_flat,
file_exists,
folder_exists,
read_from_yaml,
Expand All @@ -15,6 +17,7 @@
)

from . import emptycfg, haddock3_yaml_cfg_examples
from . import golden_data


@pytest.mark.parametrize(
Expand Down Expand Up @@ -133,3 +136,15 @@ def test_folder_exists_wrong(i):
def test_folder_exists_wrong_othererror():
with pytest.raises(TypeError):
folder_exists("some_bad_path", exception=TypeError)


def test_extract_files_flat(monkeypatch):
"""Test extract_files_flat."""
with tempfile.TemporaryDirectory() as tempdir:
reference_archive = Path(golden_data, "ambig.tbl.tgz")
shutil.copy(reference_archive, tempdir)
monkeypatch.chdir(tempdir)
archive = Path(reference_archive.name)
# extract the archive
extract_files_flat(archive, ".")
assert Path("ambig_1.tbl").exists()

0 comments on commit f7a40b1

Please sign in to comment.