Skip to content

Commit

Permalink
added flat tar extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
mgiulini committed Dec 11, 2024
1 parent a4487ad commit 43b0b4f
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/haddock/gear/clean_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def unpack_compressed_and_archived_files(folders: Iterable[FilePathT],
pass

for tar_file in tar_files:
with tarfile.open(tar_file) as fin:
fin.extractall(folder)
from haddock.libs.libio import extract_files_flat
extract_files_flat(tar_file, folder)

tar_file.unlink()

Expand Down
5 changes: 2 additions & 3 deletions src/haddock/gear/prepare_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
)
from haddock.gear.zerofill import zero_fill
from haddock.libs.libfunc import not_none
from haddock.libs.libio import make_writeable_recursive
from haddock.libs.libio import make_writeable_recursive, extract_files_flat
from haddock.libs.libutil import (
extract_keys_recursive,
recursive_convert_paths_to_strings,
Expand Down Expand Up @@ -1040,8 +1040,7 @@ def copy_input_files_to_data_dir(
# account for input .tgz files
if name.endswith("tgz"):
log.info(f"Uncompressing tar {value}")
with tarfile.open(target_path) as fin:
fin.extractall(pf)
extract_files_flat(value, pf)


def check_run_dir_exists(run_dir: FilePath) -> None:
Expand Down
23 changes: 23 additions & 0 deletions src/haddock/libs/libio.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
Iterable,
Mapping,
Optional,
Union,
)
from haddock.libs.libontology import PDBFile
from haddock.libs.libutil import sort_numbered_paths
Expand Down Expand Up @@ -613,3 +614,25 @@ def make_writeable_recursive(path: FilePath) -> None:

for file_ in (os.path.join(root, f) for f in files):
os.chmod(file_, get_perm(file_) | stat.S_IWUSR)


def extract_files_flat(tar_path: Union[str, FilePath],
dest_path: Union[str, FilePath]) -> None:
"""
Extract files from a tarball to a destination folder.
Parameters
----------
tar_path : str or Path
The path to the tarball file.
dest_path : str or Path
The path to the destination folder.
"""
with tarfile.open(tar_path, "r:gz") as tar:
for member in tar.getmembers():
# Extract only files (skip directories)
if member.isfile():
# Modify the member name to remove the directory structure
member.name = os.path.basename(member.name)
tar.extract(member, dest_path)

0 comments on commit 43b0b4f

Please sign in to comment.