From 3d703befd9c31435bedf335a6d51c2db6a5c59ac Mon Sep 17 00:00:00 2001 From: Jaap Koetsier Date: Fri, 24 Nov 2023 10:41:01 +0100 Subject: [PATCH] Hr update events bugfixes --- CHANGES.md | 6 +++ setup.cfg | 2 +- src/schematools/events/full.py | 20 +++++----- tests/test_events_full.py | 72 ++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 11 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 28a0c1d8..3305a6bd 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +# 2023-11-24 (5.17.18) + +* Bugfix: Update nested table when nested field name has underscore. +* Bugfix: Update parent table when parent table has shortname for update events. +* Bugfix: Only check for row existence when table exists. + # 2023-10-18 (5.17.17) * Bugfix: Ignore id when copying data from temp table to main table for nested tables. diff --git a/setup.cfg b/setup.cfg index bcf3f704..2a821fa0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = amsterdam-schema-tools -version = 5.17.17 +version = 5.17.18 url = https://github.com/amsterdam/schema-tools license = Mozilla Public 2.0 author = Team Data Diensten, van het Dataplatform onder de Directie Digitale Voorzieningen (Gemeente Amsterdam) diff --git a/src/schematools/events/full.py b/src/schematools/events/full.py index 638f81f6..3998a710 100644 --- a/src/schematools/events/full.py +++ b/src/schematools/events/full.py @@ -55,7 +55,7 @@ def parent_id_field(self): def parent_id_value(self, prepared_row: dict) -> str: return ".".join( [ - str(prepared_row[to_snake_case(f"{self.parent_schema_table.id}_{fn}")]) + str(prepared_row[to_snake_case(f"{self.parent_schema_table.shortname}_{fn}")]) for fn in self.parent_schema_table.identifier ] ) @@ -421,16 +421,16 @@ def _process_row( event_type = event_meta["event_type"] - if ( - run_configuration.check_existence_on_add - and event_type == "ADD" - and self._row_exists_in_database(run_configuration, id_value) - ): - logger.info("Row with id %s already exists in database. Skipping.", row["id"]) - return - db_operation = None if run_configuration.update_table: + if ( + run_configuration.check_existence_on_add + and event_type == "ADD" + and self._row_exists_in_database(run_configuration, id_value) + ): + logger.info("Row with id %s already exists in database. Skipping.", row["id"]) + return + db_operation_name, needs_select = EVENT_TYPE_MAPPINGS[event_type] db_operation = getattr(table, db_operation_name)() @@ -472,7 +472,7 @@ def _update_nested_tables( if is_delete: continue - if value := prepared_row.get(field.id, []): + if value := prepared_row.get(to_snake_case(field.id), []): if rows := self._prepare_nested_rows(field, value, id_value): self.conn.execute(table.insert(), rows) diff --git a/tests/test_events_full.py b/tests/test_events_full.py index 7ead6b46..01cc68ba 100644 --- a/tests/test_events_full.py +++ b/tests/test_events_full.py @@ -1248,6 +1248,78 @@ def test_full_load_shortnames(here, db_schema, tconn, local_metadata, hr_simple_ assert records[0]["heeft_hoofdvestiging_id"] == "24902480" +def test_full_load_shortnames_update(here, db_schema, tconn, local_metadata, hr_simple_schema): + importer = EventsProcessor([hr_simple_schema], tconn, local_metadata=local_metadata) + + # First import an object with nested objects + events = [ + ( + { + "event_type": "ADD", + "event_id": 1, + "dataset_id": "hr", + "table_id": "maatschappelijkeactiviteiten", + }, + { + "kvknummer": 42, + "email_adressen": [ + { + "email_adres": "address1@example.com", + }, + { + "email_adres": "address2@example.com", + }, + ], + }, + ) + ] + importer.process_events(events) + + # Not testing contents here, but merely the fact that the right tables are used without errors + records = [dict(r) for r in tconn.execute("SELECT * FROM hr_mac")] + assert len(records) == 1 + assert records[0]["heeft_hoofdvestiging_id"] is None + + nested_records = [dict(r) for r in tconn.execute("SELECT * FROM hr_mac_email_adressen")] + assert len(nested_records) == 2 + assert nested_records[0]["parent_id"] == "42" + assert nested_records[0]["email_adres"] == "address1@example.com" + + # Now test adding a relation object that references a parent table with short name + events = [ + ( + { + "dataset_id": "hr", + "table_id": "maatschappelijkeactiviteiten_heeftHoofdvestiging", + "event_type": "ADD", + "event_id": 1658565091, + "tid": "42.AMSBI.24902480", + "generated_timestamp": "2023-10-05T09:59:05.314873", + }, + { + "mac_kvknummer": "42", + "mac_id": "42", + "heeft_hoofdvestiging_vestigingsnummer": "24902480", + "heeft_hoofdvestiging_id": "24902480", + "begin_geldigheid": None, + "eind_geldigheid": None, + "id": 457172, + }, + ) + ] + + importer.process_events(events) + rel_records = [dict(r) for r in tconn.execute("SELECT * FROM hr_mac_heeft_hoofdvestiging")] + assert len(rel_records) == 1 + assert rel_records[0]["id"] == 457172 + assert rel_records[0]["mac_id"] == "42" + assert rel_records[0]["heeft_hoofdvestiging_id"] == "24902480" + + records = [dict(r) for r in tconn.execute("SELECT * FROM hr_mac")] + assert len(records) == 1 + assert records[0]["heeft_hoofdvestiging_id"] == "24902480" + + def test_reset_lasteventid_after_incomplete_full_load( here, db_schema, tconn, local_metadata, nap_schema, gebieden_schema ):