From 25e7e8f61166756a370e9b091569ada4f2683261 Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Fri, 26 Jun 2020 15:42:17 -0700 Subject: [PATCH 1/7] Added sql patch --- microsetta_private_api/db/patches/0065.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 microsetta_private_api/db/patches/0065.sql diff --git a/microsetta_private_api/db/patches/0065.sql b/microsetta_private_api/db/patches/0065.sql new file mode 100644 index 000000000..b6a5f142c --- /dev/null +++ b/microsetta_private_api/db/patches/0065.sql @@ -0,0 +1,11 @@ +CREATE TABLE ag.event_log( + id uuid PRIMARY KEY NOT NULL, + event_type varchar(100) NOT NULL, + event_time timestamptz default current_timestamp, + event_subtype varchar(100) NOT NULL, + event_state jsonb); + +CREATE UNIQUE INDEX idx_event_log_event_time ON ag.event_log (event_time); +CREATE UNIQUE INDEX idx_event_log_event_type_event_time ON ag.event_log (event_type, event_time); +CREATE UNIQUE INDEX idx_events_type_time ON ag.event_log (event_type, event_time); + From c7cf053752342e2609646d06b8dc47905b4e69eb Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Fri, 26 Jun 2020 19:19:29 -0700 Subject: [PATCH 2/7] Added event log --- microsetta_private_api/db/patches/0065.sql | 16 ++- microsetta_private_api/model/log_event.py | 50 +++++++ microsetta_private_api/repo/event_log_repo.py | 122 ++++++++++++++++++ microsetta_private_api/repo/tests/__init__.py | 0 .../repo/tests/test_event_log_repo.py | 97 ++++++++++++++ microsetta_private_api/repo/transaction.py | 3 + 6 files changed, 283 insertions(+), 5 deletions(-) create mode 100644 microsetta_private_api/model/log_event.py create mode 100644 microsetta_private_api/repo/event_log_repo.py create mode 100644 microsetta_private_api/repo/tests/__init__.py create mode 100644 microsetta_private_api/repo/tests/test_event_log_repo.py diff --git a/microsetta_private_api/db/patches/0065.sql b/microsetta_private_api/db/patches/0065.sql index b6a5f142c..8f710efca 100644 --- a/microsetta_private_api/db/patches/0065.sql +++ b/microsetta_private_api/db/patches/0065.sql @@ -1,11 +1,17 @@ CREATE TABLE ag.event_log( id uuid PRIMARY KEY NOT NULL, event_type varchar(100) NOT NULL, - event_time timestamptz default current_timestamp, event_subtype varchar(100) NOT NULL, + event_time timestamptz default current_timestamp, event_state jsonb); -CREATE UNIQUE INDEX idx_event_log_event_time ON ag.event_log (event_time); -CREATE UNIQUE INDEX idx_event_log_event_type_event_time ON ag.event_log (event_type, event_time); -CREATE UNIQUE INDEX idx_events_type_time ON ag.event_log (event_type, event_time); - +-- Full event log sorted by time +CREATE INDEX idx_event_log_event_time ON ag.event_log (event_time); +-- Event log filtered by type sorted by time +CREATE INDEX idx_event_log_event_type_event_time ON ag.event_log (event_type, event_time); +-- Event log filtered by type and subtype sorted by time +CREATE INDEX idx_event_log_event_type_event_subtype_event_time ON ag.event_log (event_type, event_subtype, event_time); +-- Event log filtered by user email sorted by time +CREATE INDEX idx_events_state_email_time ON ag.event_log ((event_state->>'email'), event_time); +-- Event log filtered by user account id sorted by time +CREATE INDEX idx_events_state_account_id_time ON ag.event_log ((event_state->>'account_id'), event_time); diff --git a/microsetta_private_api/model/log_event.py b/microsetta_private_api/model/log_event.py new file mode 100644 index 000000000..33aabf6d8 --- /dev/null +++ b/microsetta_private_api/model/log_event.py @@ -0,0 +1,50 @@ +import datetime +import uuid + +from microsetta_private_api.model.model_base import ModelBase +from enum import Enum, unique + +# NOTE: The string values of these enums are persisted to the database +# therefore. They MUST NOT BE CHANGED. +@unique +class EventType(Enum): + # The event type indicating an email was sent to an end user + EMAIL = "email" + + +@unique +class EventSubtype(Enum): + # Email event subtypes refer to the various email templates we can send out + + # indicate a sample was received, and is good, but is being banked + EMAIL_SAMPLE_RECEIVED_BANKED = "sample_received_banked" + # indicate a sample was received, is good, and is being plated + EMAIL_SAMPLE_RECEIVED_PLATED = "sample_received_plated" + # indicate a previously banked sample is now being plated + EMAIL_BANKED_SAMPLE_NOW_PLATED = "banked_sample_plated" + # indicate if there is a problem with a sample + # (messaging should be tailored to the problem) + EMAIL_SAMPLE_RECEIVED_WITH_PROBLEMS = "sample_received_with_problems" + + +class LogEvent(ModelBase): + def __init__(self, + event_id: uuid.UUID, + event_type: EventType, + event_subtype: EventSubtype, + event_time: datetime, + event_state: dict): + self.event_id = event_id + self.event_type = event_type + self.event_subtype = event_subtype + self.event_time = event_time + self.event_state = event_state + + def to_api(self): + return { + "event_id": str(self.event_id), + "event_type": self.event_type.value, + "event_subtype": self.event_subtype.value, + "event_time": self.event_time, + "event_state": self.event_state + } diff --git a/microsetta_private_api/repo/event_log_repo.py b/microsetta_private_api/repo/event_log_repo.py new file mode 100644 index 000000000..70de310e8 --- /dev/null +++ b/microsetta_private_api/repo/event_log_repo.py @@ -0,0 +1,122 @@ +from uuid import UUID + +from psycopg2._json import Json + +from microsetta_private_api.model.log_event import LogEvent, EventType, \ + EventSubtype +from microsetta_private_api.repo.base_repo import BaseRepo + + +_read_cols = "id, event_type, event_subtype, event_time, event_state" + + +def _event_to_row(event: LogEvent): + return { + "id": event.event_id, + "event_type": event.event_type.value, + "event_subtype": event.event_subtype.value, + # event_time is set by db upon creation, need not be passed in. + "event_state": Json(event.event_state), + } + + +def _row_to_event(row): + return LogEvent(row['id'], + EventType(row['event_type']), + EventSubtype(row['event_subtype']), + row['event_time'], + row['event_state']) + + +class EventLogRepo(BaseRepo): + def __init__(self, transaction): + super().__init__(transaction) + + def add_event(self, event: LogEvent): + with self._transaction.cursor() as cur: + cur.execute("INSERT INTO event_log(" + "id, " + "event_type, " + "event_subtype, " + "event_state" + ") VALUES (" + "%(id)s, " + "%(event_type)s, " + "%(event_subtype)s, " + "%(event_state)s" + ")", + _event_to_row(event)) + return cur.rowcount == 1 + + def get_events(self): + with self._transaction.dict_cursor() as cur: + cur.execute("SELECT " + _read_cols + " FROM " + "event_log " + "ORDER BY " + "event_time DESC") + return [_row_to_event(row) for row in cur.fetchall()] + + def get_events_by_type(self, event_type: EventType): + with self._transaction.dict_cursor() as cur: + cur.execute("SELECT " + _read_cols + " FROM " + "event_log " + "WHERE " + "event_type = %s " + "ORDER BY " + "event_time DESC", + (event_type.value,)) + return [_row_to_event(row) for row in cur.fetchall()] + + def get_events_by_subtype(self, + event_type: EventType, + event_subtype: EventSubtype): + with self._transaction.dict_cursor() as cur: + cur.execute("SELECT " + _read_cols + " FROM " + "event_log " + "WHERE " + "event_type = %s AND " + "event_subtype = %s " + "ORDER BY " + "event_time DESC", + (event_type.value, event_subtype.value)) + return [_row_to_event(row) for row in cur.fetchall()] + + # See https://www.postgresql.org/docs/9.5/functions-json.html#FUNCTIONS-JSON-OP-TABLE # noqa + # to understand referencing email field from jsonb representation + + # TODO: I believe the LIKE operator can make use of the btree index i've + # set on this table so long as the pattern specified is a case sensitive + # prefix. To support ILIKE or searching middle of email field, may have + # to pull it out of the jsonb field, or dive into gin_trgm_ops + # See https://stackoverflow.com/questions/33025890/indexing-jsonb-data-for-pattern-matching-searches # noqa + def get_events_by_email(self, email: str): + with self._transaction.dict_cursor() as cur: + cur.execute("SELECT " + _read_cols + " FROM " + "event_log " + "WHERE " + "event_state->>'email' LIKE %s " + "ORDER BY " + "event_state->>'email', event_time DESC", + # Do not change this pattern without analyzing + # the query in postgres to ensure it uses indexes + (email+"%",)) + return [_row_to_event(row) for row in cur.fetchall()] + + def get_events_by_account(self, account_id: UUID): + with self._transaction.dict_cursor() as cur: + cur.execute("SELECT " + _read_cols + " FROM " + "event_log " + "WHERE " + "event_state->>'account_id' = %s " + "ORDER BY " + "event_time DESC", + (str(account_id),)) + return [_row_to_event(row) for row in cur.fetchall()] + + def delete_event(self, event_id: UUID): + with self._transaction.cursor() as cur: + cur.execute("DELETE FROM event_log " + "WHERE " + "id = %s", + (event_id,)) + return cur.rowcount == 1 diff --git a/microsetta_private_api/repo/tests/__init__.py b/microsetta_private_api/repo/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/microsetta_private_api/repo/tests/test_event_log_repo.py b/microsetta_private_api/repo/tests/test_event_log_repo.py new file mode 100644 index 000000000..91f1c3839 --- /dev/null +++ b/microsetta_private_api/repo/tests/test_event_log_repo.py @@ -0,0 +1,97 @@ +import json + +import psycopg2 +import unittest + +from microsetta_private_api.model.log_event import LogEvent, EventType, \ + EventSubtype +from microsetta_private_api.repo.event_log_repo import EventLogRepo +from microsetta_private_api.repo.transaction import Transaction + +import uuid + +from microsetta_private_api.util.util import json_converter, fromisotime + + +class EventLogTests(unittest.TestCase): + def test_event_log(self): + event_id = uuid.uuid4() + acct_id = uuid.uuid4() + event = LogEvent( + event_id, + EventType.EMAIL, + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED, + None, + { + "email": "foobarbaz@kasdgklhasg.com", + "account_id": str(acct_id), + "blahblah": "Blah blah blah", + "ski": "ball" + } + ) + + with Transaction() as t: + events = EventLogRepo(t) + + insertion = events.add_event(event) + self.assertTrue(insertion) + + # Check full event log + self.assertEqual(events.get_events()[0].event_id, event_id) + + # Check event log filtered by primary type + primary_type = events.get_events_by_type(EventType.EMAIL)[0] + self.assertEqual(primary_type.event_id, event_id) + + # Check fields + client_obj = json.loads(json.dumps(primary_type.to_api(), + default=json_converter)) + self.assertEqual(client_obj['event_id'], str(event_id)) + self.assertEqual(client_obj['event_type'], + EventType.EMAIL.value) + self.assertEqual(client_obj['event_subtype'], + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED.value) + self.assertEqual(client_obj['event_state']['ski'], 'ball') + + # Check event log filtered by subtype + subtype = events.get_events_by_subtype( + EventType.EMAIL, + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED)[0] + self.assertEqual(subtype.event_id, event_id) + + # Check event log filtered by exact email + exact = events.get_events_by_email("foobarbaz@kasdgklhasg.com")[0] + self.assertEqual(exact.event_id, event_id) + + # Check event log filtered by email prefix + partial = events.get_events_by_email("foobarbaz@ka")[0] + self.assertEqual(partial.event_id, event_id) + + # Check event log filtered by account + acct = events.get_events_by_account(acct_id)[0] + self.assertEqual(acct.event_id, event_id) + + # Check event can be deleted + deletion = events.delete_event(event_id) + self.assertTrue(deletion) + + # Check event is no longer there + all = events.get_events() + if len(all) > 0: + assert all[0].event_id != event_id + t.rollback() + + def test_dups_rejected(self): + event = LogEvent( + uuid.uuid4(), + EventType.EMAIL, + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED, + None, + {} + ) + with Transaction() as t: + events = EventLogRepo(t) + events.add_event(event) + with self.assertRaises(psycopg2.errors.UniqueViolation): + events.add_event(event) + t.rollback() diff --git a/microsetta_private_api/repo/transaction.py b/microsetta_private_api/repo/transaction.py index 94111b0ae..22e7b5646 100644 --- a/microsetta_private_api/repo/transaction.py +++ b/microsetta_private_api/repo/transaction.py @@ -17,6 +17,9 @@ class Transaction: host=AMGUT_CONFIG.host, port=AMGUT_CONFIG.port) + # Register any extra psycopg2 types we need it to understand + psycopg2.extras.register_uuid() + @staticmethod @atexit.register def shutdown_pool(): From c6a2229e7f2876759248752534c7c61631cab47f Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Fri, 26 Jun 2020 19:33:36 -0700 Subject: [PATCH 3/7] Registering types in psycopg2 breaks util.json_converter. Easier to just adapt the uuid in the repo layer than figure out what psycopg2 is actually doing --- microsetta_private_api/repo/event_log_repo.py | 6 +++--- microsetta_private_api/repo/transaction.py | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/microsetta_private_api/repo/event_log_repo.py b/microsetta_private_api/repo/event_log_repo.py index 70de310e8..2630c3d1f 100644 --- a/microsetta_private_api/repo/event_log_repo.py +++ b/microsetta_private_api/repo/event_log_repo.py @@ -12,7 +12,7 @@ def _event_to_row(event: LogEvent): return { - "id": event.event_id, + "id": str(event.event_id), "event_type": event.event_type.value, "event_subtype": event.event_subtype.value, # event_time is set by db upon creation, need not be passed in. @@ -21,7 +21,7 @@ def _event_to_row(event: LogEvent): def _row_to_event(row): - return LogEvent(row['id'], + return LogEvent(UUID(row['id']), EventType(row['event_type']), EventSubtype(row['event_subtype']), row['event_time'], @@ -118,5 +118,5 @@ def delete_event(self, event_id: UUID): cur.execute("DELETE FROM event_log " "WHERE " "id = %s", - (event_id,)) + (str(event_id),)) return cur.rowcount == 1 diff --git a/microsetta_private_api/repo/transaction.py b/microsetta_private_api/repo/transaction.py index 22e7b5646..94111b0ae 100644 --- a/microsetta_private_api/repo/transaction.py +++ b/microsetta_private_api/repo/transaction.py @@ -17,9 +17,6 @@ class Transaction: host=AMGUT_CONFIG.host, port=AMGUT_CONFIG.port) - # Register any extra psycopg2 types we need it to understand - psycopg2.extras.register_uuid() - @staticmethod @atexit.register def shutdown_pool(): From 3fbe586791e44a6d883de6176fed97b70d6ceeed Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Fri, 26 Jun 2020 19:34:18 -0700 Subject: [PATCH 4/7] Flake8 --- microsetta_private_api/repo/tests/test_event_log_repo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microsetta_private_api/repo/tests/test_event_log_repo.py b/microsetta_private_api/repo/tests/test_event_log_repo.py index 91f1c3839..f440cf1c3 100644 --- a/microsetta_private_api/repo/tests/test_event_log_repo.py +++ b/microsetta_private_api/repo/tests/test_event_log_repo.py @@ -10,7 +10,7 @@ import uuid -from microsetta_private_api.util.util import json_converter, fromisotime +from microsetta_private_api.util.util import json_converter class EventLogTests(unittest.TestCase): From 4ca184b8e1faf56fbea2bbfec3eebeab9a2e635b Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Fri, 26 Jun 2020 19:41:21 -0700 Subject: [PATCH 5/7] Travis' Flake8 is just wrong, but whatever. --- microsetta_private_api/model/log_event.py | 1 + 1 file changed, 1 insertion(+) diff --git a/microsetta_private_api/model/log_event.py b/microsetta_private_api/model/log_event.py index 33aabf6d8..7e78d4982 100644 --- a/microsetta_private_api/model/log_event.py +++ b/microsetta_private_api/model/log_event.py @@ -4,6 +4,7 @@ from microsetta_private_api.model.model_base import ModelBase from enum import Enum, unique + # NOTE: The string values of these enums are persisted to the database # therefore. They MUST NOT BE CHANGED. @unique From a72133dfbca2d797c38473e9a7c5e6b8defcabb9 Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Fri, 26 Jun 2020 19:54:18 -0700 Subject: [PATCH 6/7] test showing event_state doesn't need to include email and account_id just for good measure --- .../repo/tests/test_event_log_repo.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/microsetta_private_api/repo/tests/test_event_log_repo.py b/microsetta_private_api/repo/tests/test_event_log_repo.py index f440cf1c3..ebd67a5d6 100644 --- a/microsetta_private_api/repo/tests/test_event_log_repo.py +++ b/microsetta_private_api/repo/tests/test_event_log_repo.py @@ -95,3 +95,49 @@ def test_dups_rejected(self): with self.assertRaises(psycopg2.errors.UniqueViolation): events.add_event(event) t.rollback() + + def test_empty_emails(self): + acct_id = uuid.uuid4() + event1 = LogEvent( + uuid.uuid4(), + EventType.EMAIL, + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED, + None, + { + 'email': "foobarbaz@itzatest.com" + } + ) + event2 = LogEvent( + uuid.uuid4(), + EventType.EMAIL, + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED, + None, + { + 'email': "foobarbaz@itzatest.com" + } + ) + event3 = LogEvent( + uuid.uuid4(), + EventType.EMAIL, + EventSubtype.EMAIL_BANKED_SAMPLE_NOW_PLATED, + None, + { + 'account_id': str(acct_id) + } + ) + with Transaction() as t: + events = EventLogRepo(t) + events.add_event(event1) + events.add_event(event2) + events.add_event(event3) + + self.assertEqual( + len(events.get_events_by_email("foobarbaz@itzatest.com")), + 2 + ) + self.assertEqual( + len(events.get_events_by_account(acct_id)), + 1 + ) + + t.rollback() From c57c36f9f9180d5847fd0320bac0dd3dd4f0daed Mon Sep 17 00:00:00 2001 From: dhakim87 Date: Mon, 29 Jun 2020 12:44:25 -0700 Subject: [PATCH 7/7] Explicitly made timestamp non null. Switched behavior on email search to be the 'slow' case insensitively search, since it turned out LIKE operators with suffix wildcards don't use the btree index in postgres 9.5 --- microsetta_private_api/db/patches/0065.sql | 2 +- microsetta_private_api/repo/event_log_repo.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/microsetta_private_api/db/patches/0065.sql b/microsetta_private_api/db/patches/0065.sql index 8f710efca..f5932d451 100644 --- a/microsetta_private_api/db/patches/0065.sql +++ b/microsetta_private_api/db/patches/0065.sql @@ -2,7 +2,7 @@ CREATE TABLE ag.event_log( id uuid PRIMARY KEY NOT NULL, event_type varchar(100) NOT NULL, event_subtype varchar(100) NOT NULL, - event_time timestamptz default current_timestamp, + event_time timestamptz default current_timestamp NOT NULL, event_state jsonb); -- Full event log sorted by time diff --git a/microsetta_private_api/repo/event_log_repo.py b/microsetta_private_api/repo/event_log_repo.py index 2630c3d1f..9c59dca89 100644 --- a/microsetta_private_api/repo/event_log_repo.py +++ b/microsetta_private_api/repo/event_log_repo.py @@ -84,21 +84,20 @@ def get_events_by_subtype(self, # See https://www.postgresql.org/docs/9.5/functions-json.html#FUNCTIONS-JSON-OP-TABLE # noqa # to understand referencing email field from jsonb representation - # TODO: I believe the LIKE operator can make use of the btree index i've - # set on this table so long as the pattern specified is a case sensitive - # prefix. To support ILIKE or searching middle of email field, may have - # to pull it out of the jsonb field, or dive into gin_trgm_ops + # Based on results of EXPLAIN of queries in psql 9.5, looks like postgres + # can use our index for exact matches, but can't use it for anything with a + # wildcard + # TODO: Should test against newest postgresql, May need to look up gin + # indexes to improve this if performance becomes an issue. # See https://stackoverflow.com/questions/33025890/indexing-jsonb-data-for-pattern-matching-searches # noqa def get_events_by_email(self, email: str): with self._transaction.dict_cursor() as cur: cur.execute("SELECT " + _read_cols + " FROM " "event_log " "WHERE " - "event_state->>'email' LIKE %s " + "event_state->>'email' ILIKE %s " "ORDER BY " "event_state->>'email', event_time DESC", - # Do not change this pattern without analyzing - # the query in postgres to ensure it uses indexes (email+"%",)) return [_row_to_event(row) for row in cur.fetchall()]