From d23dac42c770a2dc62e98782da460f087931e429 Mon Sep 17 00:00:00 2001 From: Kristjan Eimre Date: Fri, 23 Feb 2024 00:22:29 +0200 Subject: [PATCH] Improve the logging of the `backup_utils` module (#166) A number of changes to improve the logging: * A base logger `disk_objectstore` is defined and the `backup_utils` uses a child logger. This makes it easy to control the logging config for the entire package. * The `BackupManager` no longer takes a `logger` as argument. The base logger should be configured instead. * Logger is now only configured in the CLI. Not in the API itself. * `BackupManager` removes the `exes` argument and now only takes the `rsync_exe` argument to specify the `rsync` executable. * Log messages use lazy interpolation instead of f-string formatting * `rsync` is now called with `--info` flag to show a progress bar. This option is not available for rsync older than v3, in which case the `--progress` flag is used instead. --- disk_objectstore/__init__.py | 5 ++ disk_objectstore/backup_utils.py | 129 +++++++++++++++++++------------ disk_objectstore/cli.py | 15 ++-- tests/test_backup.py | 97 +++++++++++++++++------ 4 files changed, 168 insertions(+), 78 deletions(-) diff --git a/disk_objectstore/__init__.py b/disk_objectstore/__init__.py index f3e5bb2..b919ef0 100644 --- a/disk_objectstore/__init__.py +++ b/disk_objectstore/__init__.py @@ -2,8 +2,13 @@ It does not require a server running. """ + +import logging + from .container import CompressMode, Container, ObjectType +LOGGER = logging.getLogger(__name__) + __all__ = ("Container", "ObjectType", "CompressMode") __version__ = "1.0.0" diff --git a/disk_objectstore/backup_utils.py b/disk_objectstore/backup_utils.py index 8e89425..be9fb84 100644 --- a/disk_objectstore/backup_utils.py +++ b/disk_objectstore/backup_utils.py @@ -5,6 +5,7 @@ import datetime import logging import random +import re import shutil import sqlite3 import string @@ -13,10 +14,10 @@ from pathlib import Path from typing import Callable, Optional +from disk_objectstore import LOGGER as BASE_LOGGER from disk_objectstore.container import Container -logging.basicConfig(format="%(levelname)s:%(message)s") -backup_logger = logging.getLogger(__name__) +LOGGER = BASE_LOGGER.getChild(__name__) class BackupError(Exception): @@ -49,23 +50,13 @@ class BackupManager: def __init__( self, dest: str, - logger: logging.Logger, keep: int = 1, - exes: Optional[dict] = None, + rsync_exe: Optional[str] = None, ) -> None: self.dest = dest self.keep = keep - self.logger = logger self.remote, self.path = split_remote_and_path(dest) - - if exes is None: - self.exes = {} - else: - self.exes = exes - - # make sure rsync gets added so it gets validated - if "rsync" not in self.exes: - self.exes["rsync"] = "rsync" + self.rsync_exe = rsync_exe if rsync_exe is not None else "rsync" # Validate the backup config inputs @@ -77,10 +68,10 @@ def __init__( if self.remote: self.check_if_remote_accessible() - if self.exes: - for _, path in self.exes.items(): - if not is_exe_found(path): - raise ValueError(f"Input validation failed: {path} not accessible.") + if not is_exe_found(self.rsync_exe): + raise ValueError( + f"Input validation failed: {self.rsync_exe} not accessible." + ) if not self.check_path_exists(self.path): success = self.run_cmd(["mkdir", str(self.path)])[0] @@ -89,13 +80,15 @@ def __init__( f"Input validation failed: Couldn't access/create '{str(self.path)}'!" ) + self.rsync_version = self.get_rsync_major_version() + def check_if_remote_accessible(self): """Check if remote host is accessible via ssh""" - self.logger.info(f"Checking if '{self.remote}' is accessible...") + LOGGER.info("Checking if '%s' is accessible...", self.remote) success = self.run_cmd(["exit"])[0] if not success: raise BackupError(f"Remote '{self.remote}' is not accessible!") - self.logger.info("Success! '%s' is accessible!", self.remote) + LOGGER.info("Success! '%s' is accessible!", self.remote) def check_path_exists(self, path: Path) -> bool: cmd = ["[", "-e", str(path), "]"] @@ -114,17 +107,35 @@ def run_cmd( res = subprocess.run(all_args, capture_output=True, text=True, check=False) - self.logger.debug( - f"Command: {all_args}\n" - f" Exit Code: {res.returncode}\n" - f" stdout/stderr: {res.stdout}\n{res.stderr}" + LOGGER.debug( + "Command: %s\n Exit Code: %d\n stdout/stderr: %s\n%s", + str(all_args), + res.returncode, + res.stdout, + res.stderr, ) success = res.returncode == 0 return success, res.stdout - def call_rsync( # pylint: disable=too-many-arguments + def get_rsync_major_version(self): + """ + Get the rsync major version. + """ + result = subprocess.run( + [self.rsync_exe, "--version"], + capture_output=True, + text=True, + check=False, + ) + pattern = r"rsync\s+version\s+(\d+\.\d+\.\d+)" + match = re.search(pattern, result.stdout) + if match: + return int(match.group(1).split(".")[0]) + return None + + def call_rsync( # pylint: disable=too-many-arguments,too-many-branches self, src: Path, dest: Path, @@ -145,12 +156,27 @@ def call_rsync( # pylint: disable=too-many-arguments :param dest_trailing_slash: Add a trailing slash to the destination path. This makes rsync interpret the destination as a folder and create it if it doesn't exists. - """ - assert "rsync" in self.exes + all_args = [ + self.rsync_exe, + "-azh", + "--no-whole-file", + ] + + capture_output = True + if LOGGER.isEnabledFor(logging.INFO): + capture_output = False + if self.rsync_version and self.rsync_version >= 3: + # These options show progress in a nicer way but + # they're only available for rsync version 3+ + all_args += ["--info=progress2,stats1"] + else: + LOGGER.info("rsync version <3 detected: showing 'legacy' progress.") + all_args += ["--progress"] - all_args = [self.exes["rsync"], "-azh", "-vv", "--no-whole-file"] + if LOGGER.isEnabledFor(logging.DEBUG): + all_args += ["-vv"] if extra_args: all_args += extra_args if link_dest: @@ -173,16 +199,18 @@ def call_rsync( # pylint: disable=too-many-arguments else: all_args += [f"{self.remote}:{dest_str}"] - res = subprocess.run(all_args, capture_output=True, text=True, check=False) + cmd_str = " ".join(all_args) + LOGGER.info("Running '%s'", cmd_str) - self.logger.debug( - "Command: %s\n Exit Code: %s\n stdout/stderr: %s\n%s", - str(all_args), - res.returncode, - res.stdout, - res.stderr, + res = subprocess.run( + all_args, capture_output=capture_output, text=True, check=False ) + info_text = f"rsync completed. Exit Code: {res.returncode}" + if capture_output: + info_text += f"\nstdout/stderr: {res.stdout}\n{res.stderr}" + LOGGER.info(info_text) + if res.returncode != 0: raise BackupError(f"rsync failed for: {str(src)} to {str(dest)}") @@ -224,9 +252,9 @@ def delete_old_backups(self): for folder in to_delete: success = self.run_cmd(["rm", "-rf", folder])[0] if success: - self.logger.info(f"Deleted old backup: {folder}") + LOGGER.info("Deleted old backup: %s", folder) else: - self.logger.warning("Warning: couldn't delete old backup: %s", folder) + LOGGER.warning("Warning: couldn't delete old backup: %s", folder) def backup_auto_folders(self, backup_func: Callable) -> None: """Create a backup, managing live and previous backup folders automatically @@ -247,11 +275,11 @@ def backup_auto_folders(self, backup_func: Callable) -> None: last_folder = self.get_last_backup_folder() if last_folder: - self.logger.info( - f"Last backup is '{str(last_folder)}', using it for rsync --link-dest." + LOGGER.info( + "Last backup is '%s', using it for rsync --link-dest.", str(last_folder) ) else: - self.logger.info("Couldn't find a previous backup to increment from.") + LOGGER.info("Couldn't find a previous backup to increment from.") backup_func( live_folder, @@ -273,8 +301,10 @@ def backup_auto_folders(self, backup_func: Callable) -> None: f"Failed to move '{str(live_folder)}' to '{str(self.path / folder_name)}'" ) - self.logger.info( - f"Backup moved from '{str(live_folder)}' to '{str(self.path / folder_name)}'." + LOGGER.info( + "Backup moved from '%s' to '%s'.", + str(live_folder), + str(self.path / folder_name), ) symlink_name = "last-backup" @@ -282,11 +312,12 @@ def backup_auto_folders(self, backup_func: Callable) -> None: ["ln", "-sfn", str(folder_name), str(self.path / symlink_name)] )[0] if not success: - self.logger.warning( - f"Couldn't create symlink '{symlink_name}'. Perhaps the filesystem doesn't support it." + LOGGER.warning( + "Couldn't create symlink '%s'. Perhaps the filesystem doesn't support it.", + symlink_name, ) else: - self.logger.info(f"Added symlink '{symlink_name}' to '{folder_name}'.") + LOGGER.info("Added symlink '%s' to '%s'.", symlink_name, folder_name) self.delete_old_backups() @@ -331,7 +362,7 @@ def backup_container( prev_backup_loose = prev_backup / loose_path_rel if prev_backup else None manager.call_rsync(loose_path, path, link_dest=prev_backup_loose) - manager.logger.info(f"Transferred {str(loose_path)} to {str(path)}") + LOGGER.info("Transferred %s to %s", str(loose_path), str(path)) # step 2: back up sqlite db @@ -341,18 +372,18 @@ def backup_container( _sqlite_backup(sqlite_path, sqlite_temp_loc) if sqlite_temp_loc.is_file(): - manager.logger.info(f"Dumped the SQLite database to {str(sqlite_temp_loc)}") + LOGGER.info("Dumped the SQLite database to %s", str(sqlite_temp_loc)) else: raise BackupError(f"'{str(sqlite_temp_loc)}' failed to be created.") # step 3: transfer the SQLITE database file manager.call_rsync(sqlite_temp_loc, path, link_dest=prev_backup) - manager.logger.info(f"Transferred SQLite database to {str(path)}") + LOGGER.info("Transferred SQLite database to %s", str(path)) # step 4: transfer the packed files packs_path_rel = packs_path.relative_to(container_root_path) manager.call_rsync(packs_path, path, link_dest=prev_backup) - manager.logger.info(f"Transferred {str(packs_path)} to {str(path)}") + LOGGER.info("Transferred %s to %s", str(packs_path), str(path)) # step 5: transfer anything else in the container folder manager.call_rsync( diff --git a/disk_objectstore/cli.py b/disk_objectstore/cli.py index fabb957..cbc9634 100644 --- a/disk_objectstore/cli.py +++ b/disk_objectstore/cli.py @@ -1,4 +1,5 @@ """A small CLI tool for managing stores.""" + import dataclasses import json import logging @@ -228,24 +229,26 @@ def backup( by OpenSSH, such as adding configuration options to ~/.ssh/config (e.g. to allow for passwordless login - recommended, since this script might ask multiple times for the password). - NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will not work on + NOTE: 'rsync' and other UNIX-specific commands are called, thus the command will likely not work on non-UNIX environments. """ + logging.basicConfig(format="%(levelname)s:%(message)s") + logger = logging.getLogger("disk_objectstore") + if verbosity == "silent": - backup_utils.backup_logger.setLevel(logging.ERROR) + logger.setLevel(logging.ERROR) elif verbosity == "info": - backup_utils.backup_logger.setLevel(logging.INFO) + logger.setLevel(logging.INFO) elif verbosity == "debug": - backup_utils.backup_logger.setLevel(logging.DEBUG) + logger.setLevel(logging.DEBUG) with dostore.container as container: try: backup_manager = backup_utils.BackupManager( dest, - backup_utils.backup_logger, - exes={"rsync": rsync_exe}, keep=keep, + rsync_exe=rsync_exe, ) backup_manager.backup_auto_folders( lambda path, prev: backup_utils.backup_container( diff --git a/tests/test_backup.py b/tests/test_backup.py index a8b199f..598d7c4 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -2,9 +2,11 @@ """ +import logging import platform import random import string +import subprocess from pathlib import Path import pytest @@ -25,21 +27,21 @@ def test_invalid_destination(): """Test invalid destination with two colons.""" dest = "localhost:/tmp/test:" with pytest.raises(ValueError, match="Invalid destination format"): - BackupManager(dest, backup_utils.backup_logger) + BackupManager(dest) def test_inaccessible_remote(): """Test a remote destination of random characters that will not be accessible.""" dest = f"_{_random_string()}:/tmp/test" with pytest.raises(BackupError, match="is not accessible"): - BackupManager(dest, backup_utils.backup_logger) + BackupManager(dest) def test_negative_keep(): """Test a negative keep value.""" dest = "/tmp/test" with pytest.raises(ValueError, match="keep variable can't be negative"): - BackupManager(dest, backup_utils.backup_logger, keep=-1) + BackupManager(dest, keep=-1) def test_inaccessible_exe(): @@ -47,47 +49,96 @@ def test_inaccessible_exe(): dest = "/tmp/test" rsync_exe = f"_{_random_string()}" with pytest.raises(ValueError, match=f"{rsync_exe} not accessible."): - BackupManager(dest, backup_utils.backup_logger, exes={"rsync": rsync_exe}) + BackupManager(dest, rsync_exe=rsync_exe) def test_inaccessible_path(): """Test case where path is not accessible.""" dest = f"/_{_random_string()}" # I assume there will be a permission error for this path with pytest.raises(ValueError, match=f"Couldn't access/create '{dest}'"): - BackupManager(dest, backup_utils.backup_logger) + BackupManager(dest) def test_rsync_failure(): """Test case where rsync fails.""" dest = "/tmp/test" with pytest.raises(BackupError, match="rsync failed"): - manager = BackupManager(dest, backup_utils.backup_logger) + manager = BackupManager(dest) # pick a src that doesn't exists manager.call_rsync(Path(f"/_{_random_string()}"), Path(dest)) -def test_rsync_dest_trailing_slash(temp_dir): +def test_rsync_dest_trailing_slash(tmp_path): """Test case for dest_trailing_slash.""" - dest1 = Path(temp_dir) / "dest1" - dest2 = Path(temp_dir) / "dest2" + dest1 = tmp_path / "dest1" + dest2 = tmp_path / "dest2" # manager will create dest1 folder - manager = BackupManager(str(dest1), backup_utils.backup_logger) + manager = BackupManager(str(dest1)) # dest_trailing_slash=True will create dest2 manager.call_rsync(dest1, dest2, dest_trailing_slash=True) assert dest2.exists() +def test_rsync_legacy_progress(monkeypatch, tmp_path, capfd): + """Test case where rsync version is less than 3. + + In this case, 'rsync --progress' is used, that prints each transferred file + and it's progress instead of a global progress like '--info=progress2'. + """ + + logging.getLogger("disk_objectstore").setLevel(logging.INFO) + + # monkeypatch the get_rsync_major_version + def mock_get_rsync_major_version(_self): + return None + + monkeypatch.setattr( + backup_utils.BackupManager, + "get_rsync_major_version", + mock_get_rsync_major_version, + ) + + dest1 = tmp_path / "dest1" + dest2 = tmp_path / "dest2" + + dest1.mkdir(parents=True) + + fname = "test_file.txt" + file_path = dest1 / fname + with open(file_path, "w") as f: + f.write("Test content\n") + + manager = BackupManager(str(dest2)) + manager.call_rsync(dest1, dest2, src_trailing_slash=True) + + captured = capfd.readouterr() + + assert (dest2 / fname).exists() + assert fname in captured.out + + +def test_get_rsync_major_version_failure(monkeypatch): + """Test case where rsync version fails.""" + + def mock_subprocess_run(*args, **_kwargs): + return subprocess.CompletedProcess(args, 0, "unexpected", "") + + monkeypatch.setattr(subprocess, "run", mock_subprocess_run) + manager = BackupManager("/tmp") + assert manager.get_rsync_major_version() is None + + def test_existing_backups_failure(): """Test case where existing backups fail to be determined.""" dest = "/tmp/test" with pytest.raises(BackupError, match="Existing backups determination failed"): - manager = BackupManager(dest, backup_utils.backup_logger) + manager = BackupManager(dest) # override the path to something that will fail manager.path = f"/_{_random_string()}" manager.get_existing_backup_folders() -def test_sqlite_failure(monkeypatch, temp_container, temp_dir): +def test_sqlite_failure(monkeypatch, temp_container, tmp_path): """Test case where sqlite fails to make a backup file.""" # monkeypatch sqlite backup to do nothing @@ -106,9 +157,9 @@ def mock_sqlite_backup(src, dst): # pylint: disable=unused-argument for idx in range(100): temp_container.add_object(f"test-{idx}".encode()) - dest = Path(temp_dir) / "backup" + dest = tmp_path / "backup" with pytest.raises(BackupError, match="'.*' failed to be created."): - manager = BackupManager(str(dest), backup_utils.backup_logger) + manager = BackupManager(str(dest)) manager.backup_auto_folders( lambda path, prev: backup_utils.backup_container( manager, temp_container, path, prev @@ -116,7 +167,7 @@ def mock_sqlite_backup(src, dst): # pylint: disable=unused-argument ) -def test_mv_failure(monkeypatch, temp_container, temp_dir): +def test_mv_failure(monkeypatch, temp_container, tmp_path): """ Test case where mv command fails by monkeypatching. Make sure correct BackupError is raised. @@ -143,9 +194,9 @@ def mock_run_cmd(self, args): for idx in range(100): temp_container.add_object(f"test-{idx}".encode()) - dest = Path(temp_dir) / "backup" + dest = tmp_path / "backup" with pytest.raises(BackupError, match="Failed to move"): - manager = BackupManager(str(dest), backup_utils.backup_logger) + manager = BackupManager(str(dest)) manager.backup_auto_folders( lambda path, prev: backup_utils.backup_container( manager, temp_container, path, prev @@ -153,7 +204,7 @@ def mock_run_cmd(self, args): ) -def test_ln_failure(monkeypatch, temp_container, temp_dir, caplog): +def test_ln_failure(monkeypatch, temp_container, tmp_path, caplog): """ Test case where ln command fails by monkeypatching. Make sure correct warning is logged. @@ -180,8 +231,8 @@ def mock_run_cmd(self, args): for idx in range(100): temp_container.add_object(f"test-{idx}".encode()) - dest = Path(temp_dir) / "backup" - manager = BackupManager(str(dest), backup_utils.backup_logger) + dest = tmp_path / "backup" + manager = BackupManager(str(dest)) manager.backup_auto_folders( lambda path, prev: backup_utils.backup_container( manager, temp_container, path, prev @@ -190,7 +241,7 @@ def mock_run_cmd(self, args): assert "Couldn't create symlink" in caplog.text -def test_rm_failure(monkeypatch, temp_container, temp_dir, caplog): +def test_rm_failure(monkeypatch, temp_container, tmp_path, caplog): """ Test case where rm command fails by monkeypatching. Make sure correct warning is logged. @@ -218,8 +269,8 @@ def mock_run_cmd(self, args): for idx in range(100): temp_container.add_object(f"test-{idx}".encode()) - dest = Path(temp_dir) / "backup" - manager = BackupManager(str(dest), backup_utils.backup_logger, keep=0) + dest = tmp_path / "backup" + manager = BackupManager(str(dest), keep=0) for _ in range(2): manager.backup_auto_folders( lambda path, prev: backup_utils.backup_container(