From f5fb84c93bcad27ea779de9f49f358b5cfa08e77 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Wed, 15 Jan 2025 09:11:08 +0100 Subject: [PATCH] commands - commit changes after every batch/file has been processed to avoid increasingly large journaling file --- digiarch/commands/edit/action.py | 12 ++++++++++++ digiarch/commands/edit/common.py | 2 ++ digiarch/commands/edit/remove.py | 3 +++ digiarch/commands/edit/rename.py | 1 + digiarch/commands/extract/extract.py | 2 ++ digiarch/commands/identify.py | 8 ++++++++ 6 files changed, 28 insertions(+) diff --git a/digiarch/commands/edit/action.py b/digiarch/commands/edit/action.py index 8002db03..b8165978 100644 --- a/digiarch/commands/edit/action.py +++ b/digiarch/commands/edit/action.py @@ -223,6 +223,8 @@ def cmd_action_original_convert( set_action(ctx, database, file, "convert", data, reason, dry_run, log_stdout) if lock: set_lock(ctx, database, file, reason, dry_run, log_stdout) + if not dry_run: + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -271,6 +273,8 @@ def cmd_action_original_extract( set_action(ctx, database, file, "extract", data, reason, dry_run, log_stdout) if lock: set_lock(ctx, database, file, reason, dry_run, log_stdout) + if not dry_run: + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -327,6 +331,8 @@ def cmd_action_original_manual( set_action(ctx, database, file, "manual", data, reason, dry_run, log_stdout) if lock: set_lock(ctx, database, file, reason, dry_run, log_stdout) + if not dry_run: + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -392,6 +398,8 @@ def cmd_action_original_ignore( set_action(ctx, database, file, "ignore", data, reason, dry_run, log_stdout) if lock: set_lock(ctx, database, file, reason, dry_run, log_stdout) + if not dry_run: + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -467,6 +475,8 @@ def cmd_action_original_copy( set_action(ctx, database, file, action, data, reason, dry_run, log_stdout) if lock: set_lock(ctx, database, file, reason, dry_run, log_stdout) + if not dry_run: + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -520,6 +530,8 @@ def cmd_action_master_convert( with ExceptionManager(BaseException) as exception: for file in query_table(database.master_files, query, [("lower(relative_path)", "asc")]): set_master_convert(ctx, database, file, data, action_type, reason, dry_run) + if not dry_run: + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) diff --git a/digiarch/commands/edit/common.py b/digiarch/commands/edit/common.py index 6c8f5dbe..e1a7f20f 100644 --- a/digiarch/commands/edit/common.py +++ b/digiarch/commands/edit/common.py @@ -51,6 +51,7 @@ def edit_file_value( setattr(file, "lock", True) table.update(file) database.log.insert(event) + database.commit() event.log(INFO, *loggers, show_args=["uuid", "data"], path=file.relative_path) @@ -73,5 +74,6 @@ def _handler(_ctx: Context, _avid: AVID, database: FilesDB, event: Event, file: prev_value, next_value = event.data setattr(file, property_name, prev_value) table.update(file) + database.commit() return _handler diff --git a/digiarch/commands/edit/remove.py b/digiarch/commands/edit/remove.py index 34a953bf..cb6844a3 100644 --- a/digiarch/commands/edit/remove.py +++ b/digiarch/commands/edit/remove.py @@ -141,6 +141,8 @@ def remove_files( if reset_processed: reset_parent_processed(database, file) + database.commit() + def rollback_remove_original(_ctx: Context, avid: AVID, database: FilesDB, event: Event, file: BaseFile | None): old_file = OriginalFile.model_validate(event.data) @@ -153,6 +155,7 @@ def rollback_remove_original(_ctx: Context, avid: AVID, database: FilesDB, event raise FileNotFoundError(old_file.relative_path) database.original_files.insert(old_file) + database.commit() @rollback("remove", rollback_remove_original) diff --git a/digiarch/commands/edit/rename.py b/digiarch/commands/edit/rename.py index 066c3b70..8cbb1c8f 100644 --- a/digiarch/commands/edit/rename.py +++ b/digiarch/commands/edit/rename.py @@ -134,5 +134,6 @@ def cmd_rename_original( event.log(INFO, log_stdout, show_args=["uuid", "data"]) database.log.insert(event) + database.commit() end_program(ctx, database, exception, dry_run, log_file, log_stdout) diff --git a/digiarch/commands/extract/extract.py b/digiarch/commands/extract/extract.py index a22b59a5..02d8ba39 100644 --- a/digiarch/commands/extract/extract.py +++ b/digiarch/commands/extract/extract.py @@ -305,5 +305,7 @@ def cmd_extract( archive_file.action_data.ignore = IgnoreAction(template="extracted-archive") db.original_files.update(archive_file) + if not dry_run: + db.commit() end_program(ctx, db, exception, dry_run, log_file, log_stdout) diff --git a/digiarch/commands/identify.py b/digiarch/commands/identify.py index 2ace3f33..c06c5709 100644 --- a/digiarch/commands/identify.py +++ b/digiarch/commands/identify.py @@ -429,6 +429,8 @@ def cmd_identify_original( None, log_stdout, ) + if not dry_run: + db.commit() end_program(ctx, db, exception, dry_run, log_file, log_stdout) @@ -521,6 +523,8 @@ def cmd_identify_master( while batch := list(islice(files, batch_size)): for sf_file in siegfried.identify(*batch).files: identify_master_file(ctx, avid, db, sf_file, custom_signatures, actions, dry_run, log_stdout) + if not dry_run: + db.commit() end_program(ctx, db, exception, dry_run, log_file, log_stdout) @@ -596,6 +600,8 @@ def cmd_identify_access( while batch := list(islice(files, batch_size)): for sf_file in siegfried.identify(*batch).files: identify_converted_file(ctx, avid, db.access_files, "access", sf_file, dry_run, log_stdout) + if not dry_run: + db.commit() end_program(ctx, db, exception, dry_run, log_file, log_stdout) @@ -671,6 +677,8 @@ def cmd_identify_statutory( while batch := list(islice(files, batch_size)): for sf_file in siegfried.identify(*batch).files: identify_converted_file(ctx, avid, db.statutory_files, "statutory", sf_file, dry_run, log_stdout) + if not dry_run: + db.commit() end_program(ctx, db, exception, dry_run, log_file, log_stdout)