From 29d27b9aedbc977e21ce05ca4cfb03c963f425b8 Mon Sep 17 00:00:00 2001
From: Philipp Kessling <p.kessling@leibniz-hbi.de>
Date: Tue, 26 Nov 2024 09:21:09 +0100
Subject: [PATCH 1/3] feat: add some unit tests.

---
 dabapush/Configuration/__init__.py         |  1 -
 tests/Writer/test_ndjson_writer.py         | 61 +++++++++++++++++
 tests/test_utils.py                        | 78 ++++++++++++++++++++++
 tests/tests/Configuration/__init__.py      |  0
 tests/tests/Configuration/test_Registry.py | 45 +++++++++++++
 tests/tests/__init__.py                    |  0
 6 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 tests/Writer/test_ndjson_writer.py
 create mode 100644 tests/test_utils.py
 create mode 100644 tests/tests/Configuration/__init__.py
 create mode 100644 tests/tests/Configuration/test_Registry.py
 create mode 100644 tests/tests/__init__.py

diff --git a/dabapush/Configuration/__init__.py b/dabapush/Configuration/__init__.py
index aba4c6e..e69de29 100644
--- a/dabapush/Configuration/__init__.py
+++ b/dabapush/Configuration/__init__.py
@@ -1 +0,0 @@
-from .FileWriterConfiguration import FileWriterConfiguration
diff --git a/tests/Writer/test_ndjson_writer.py b/tests/Writer/test_ndjson_writer.py
new file mode 100644
index 0000000..eb673eb
--- /dev/null
+++ b/tests/Writer/test_ndjson_writer.py
@@ -0,0 +1,61 @@
+"""Test suite for the NDJSONWriter module."""
+
+import pytest
+
+from dabapush.Record import Record
+from dabapush.Writer.NDJSONWriter import NDJSONWriterConfiguration
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            [{"key1": "value1"}, {"key2": "value2"}],
+            '{"key1":"value1"}\n{"key2":"value2"}\n',
+        ),
+        ([{"key1": "value1", "key2": "value2"}], '{"key1":"value1","key2":"value2"}\n'),
+    ],
+)
+def test_write_ndjson(data, expected, tmp_path):
+    """Should write records to a file in NDJSON format."""
+    configuration = NDJSONWriterConfiguration(
+        name="test",
+        id="test",
+        chunk_size=1,
+        path=str(tmp_path),
+        name_template="test.ndjson",
+    )
+    file_path = tmp_path / "test.ndjson"
+    writer = configuration.get_instance()
+    writer.write((Record(_) for _ in data))
+
+    with file_path.open("rt", encoding="utf8") as f:
+        result = f.read()
+
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        ([{"key1": "value1"}, {"key2": "value2"}], 2),
+        ([{"key1": "value1", "key2": "value2"}], 1),
+    ],
+)
+def test_write_ndjson_line_count(data, expected, tmp_path):
+    """Should write records to a file in NDJSON format."""
+    configuration = NDJSONWriterConfiguration(
+        name="test",
+        id="test",
+        chunk_size=1,
+        path=str(tmp_path),
+        name_template="test.ndjson",
+    )
+    file_path = tmp_path / "test.ndjson"
+    writer = configuration.get_instance()
+    writer.write((Record(_) for _ in data))
+
+    with file_path.open("rt", encoding="utf8") as f:
+        result = f.readlines()
+
+    assert len(result) == expected
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..a72d245
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,78 @@
+"""Test suite for the utils module."""
+
+import pytest
+
+from dabapush.utils import flatten, safe_access, safe_write, unpack
+
+# pylint: disable=W0622
+
+
+@pytest.mark.parametrize(
+    "nested_dict, namespace, expected",
+    [
+        ({"a": {"b": "yuk"}}, None, {"a.b": "yuk"}),
+        (
+            {"a": {"b": "yuk", "c": [{"d": "meh"}]}},
+            None,
+            {"a.b": "yuk", "a.c": [{"d": "meh"}]},
+        ),
+        ({"a": {"b": "yuk"}}, "namespace", {"namespace.a.b": "yuk"}),
+    ],
+)
+def test_flatten(nested_dict, namespace, expected):
+    """Should flatten dicts correctly."""
+    assert flatten(nested_dict, namespace=namespace) == expected
+
+
+@pytest.mark.parametrize(
+    "nested_dict, path, expected",
+    [
+        ({"a": {"b": {"c": "value"}}}, ["a", "b", "c"], "value"),
+        ({"a": {"b": {"c": "value"}}}, ["a", "b", "d"], None),
+        ({"a": {"b": {"c": "value"}}}, ["a", "b"], {"c": "value"}),
+    ],
+)
+def test_safe_access(nested_dict, path, expected):
+    """Should safely access nested dicts."""
+    assert safe_access(nested_dict, path) == expected
+
+
+@pytest.mark.parametrize(
+    "nested_dict, path, key, value, expected",
+    [
+        (
+            {"a": {"b": {"c": "value"}}},
+            ["a", "b"],
+            "d",
+            "new_value",
+            {"a": {"b": {"c": "value", "d": "new_value"}}},
+        ),
+        (
+            {"a": {"b": {"c": "value"}}},
+            ["a", "b", "e"],
+            "f",
+            "another_value",
+            {"a": {"b": {"c": "value", "e": {"f": "another_value"}}}},
+        ),
+    ],
+)
+def test_safe_write(nested_dict, path, key, value, expected):
+    """Should safely write to nested dicts."""
+    assert safe_write(nested_dict, path, key, value) == expected
+
+
+@pytest.mark.parametrize(
+    "includes, id, id_key, expected",
+    [
+        (
+            [{"id": "1", "name": "item1"}, {"id": "2", "name": "item2"}],
+            "1",
+            "id",
+            {"id": "1", "name": "item1"},
+        ),
+        ([{"id": "1", "name": "item1"}, {"id": "2", "name": "item2"}], "3", "id", None),
+    ],
+)
+def test_unpack(includes, id, id_key, expected):
+    """Should unpack a dict from a list of dicts."""
+    assert unpack(id, includes, id_key) == expected
diff --git a/tests/tests/Configuration/__init__.py b/tests/tests/Configuration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tests/Configuration/test_Registry.py b/tests/tests/Configuration/test_Registry.py
new file mode 100644
index 0000000..98d10d0
--- /dev/null
+++ b/tests/tests/Configuration/test_Registry.py
@@ -0,0 +1,45 @@
+"""Test suite for the Registry module."""
+
+from importlib.metadata import EntryPoint
+
+from dabapush.Configuration import Registry
+from dabapush.Configuration.ReaderConfiguration import ReaderConfiguration
+from dabapush.Configuration.WriterConfiguration import WriterConfiguration
+
+
+def test_readers():
+    """Should fetch readers from the reader entry point."""
+    assert isinstance(Registry.readers(), list)
+    assert all(isinstance(_, EntryPoint) for _ in Registry.readers())
+
+
+def test_writers():
+    """Should fetch writers from the writer entry point."""
+    assert isinstance(Registry.writers(), list)
+    assert all(isinstance(_, EntryPoint) for _ in Registry.writers())
+
+
+def test_get_reader():
+    """Should fetch a reader by name."""
+    reader = Registry.get_reader("NDJSON")
+    assert reader is not None
+    assert issubclass(reader, ReaderConfiguration)
+
+
+def test_get_writer():
+    """Should fetch a writer by name."""
+    writer = Registry.get_writer("NDJSON")
+    assert writer is not None
+    assert issubclass(writer, WriterConfiguration)
+
+
+def test_list_all_readers():
+    """Should list all available readers."""
+    assert isinstance(Registry.list_all_readers(), list)
+    assert all(isinstance(_, str) for _ in Registry.list_all_readers())
+
+
+def test_list_all_writers():
+    """Should list all available writers."""
+    assert isinstance(Registry.list_all_readers(), list)
+    assert all(isinstance(_, str) for _ in Registry.list_all_readers())
diff --git a/tests/tests/__init__.py b/tests/tests/__init__.py
new file mode 100644
index 0000000..e69de29

From c167f6c208b80ba520b28441ea6adb7dff03510a Mon Sep 17 00:00:00 2001
From: Philipp Kessling <p.kessling@leibniz-hbi.de>
Date: Tue, 26 Nov 2024 09:53:33 +0100
Subject: [PATCH 2/3] feat: add unit tests for CSVWriter .

---
 .../Configuration/FileWriterConfiguration.py  | 31 +++++---
 dabapush/Writer/CSVWriter.py                  | 17 ++---
 tests/Writer/test_csv_writer.py               | 71 +++++++++++++++++++
 3 files changed, 99 insertions(+), 20 deletions(-)
 create mode 100644 tests/Writer/test_csv_writer.py

diff --git a/dabapush/Configuration/FileWriterConfiguration.py b/dabapush/Configuration/FileWriterConfiguration.py
index 157f93a..ed8e3cf 100644
--- a/dabapush/Configuration/FileWriterConfiguration.py
+++ b/dabapush/Configuration/FileWriterConfiguration.py
@@ -1,10 +1,16 @@
 """FileWriterConfiguration provides a base class for file-based Writers."""
+
+import abc
 from datetime import datetime
 from string import Template
 from typing import Dict, Optional
 
+from loguru import logger as log
+
 from .WriterConfiguration import WriterConfiguration
 
+# pylint: disable=W0221,W0622,R0917,R0913
+
 
 class FileWriterConfiguration(WriterConfiguration):
     """Abstract class describing configuration items for a file based writer"""
@@ -33,15 +39,18 @@ def make_file_name(self, additional_keys: Optional[Dict] = None) -> str:
           Interpolated file name as str.
         """
         now = datetime.now()
-        return Template(self.name_template).substitute(
-            **{
-                "date": datetime.strftime(now, "%Y-%m-%d"),
-                "time": datetime.strftime(now, "%H%M"),
-                "name": self.name,
-                "id": self.id,
-                **(additional_keys or {}),
-            }
-        )
+        available_data = {
+            "date": datetime.strftime(now, "%Y-%m-%d"),
+            "time": datetime.strftime(now, "%H%M"),
+            "chunk_size": self.chunk_size,
+            "name": self.name,
+            "id": self.id,
+            **(additional_keys or {}),
+        }
+
+        log.info(f"Available data: {available_data}")
+
+        return Template(self.name_template).substitute(**available_data)
 
     def set_name_template(self, template: str):
         """Sets the template string.
@@ -51,3 +60,7 @@ def set_name_template(self, template: str):
             Template string to use.
         """
         self.name_template = template
+
+    @abc.abstractmethod
+    def get_instance(self) -> object or None:
+        """Get configured instance of Writer"""
diff --git a/dabapush/Writer/CSVWriter.py b/dabapush/Writer/CSVWriter.py
index c9400e0..6fa8e59 100644
--- a/dabapush/Writer/CSVWriter.py
+++ b/dabapush/Writer/CSVWriter.py
@@ -10,6 +10,8 @@
 from ..Configuration.FileWriterConfiguration import FileWriterConfiguration
 from .Writer import Writer
 
+# pylint: disable=R0917
+
 
 class CSVWriter(Writer):
     """Writes CSVs from buffered stream"""
@@ -23,11 +25,9 @@ def persist(self):
         """persist buffer to disk"""
 
         last_rows = self.buffer
-        self.buffer = []
 
-        log.info(f"Persisted {len(last_rows)} records")
         _path = Path(self.config.path) / self.config.make_file_name(
-            {"chunk_number": self.chunk_number}
+            {"chunk_number": self.chunk_number, "type": "csv"}
         )
         pd.DataFrame(
             (a.payload for a in last_rows),
@@ -35,6 +35,9 @@ def persist(self):
             r"\n|\r", r"\\n", regex=True
         ).to_csv(_path, index=False)
         self.chunk_number += 1
+        self.buffer = []
+
+        log.info(f"Persisted {len(last_rows)} records")
 
         return len(last_rows)
 
@@ -57,14 +60,6 @@ def __init__(  # pylint: disable=R0913
         )
         self.type = "csv"
 
-    @property
-    def file_path(self) -> Path:
-        """get the path to a file to write in"""
-        # evalutate self.name_template
-        file_name = self.make_file_name({"type": "csv"})
-        # append to self.path and return
-        return Path(self.path) / file_name
-
     def get_instance(self):  # pylint: disable=W0221
         """get configured instance of CSVWriter"""
         return CSVWriter(self)
diff --git a/tests/Writer/test_csv_writer.py b/tests/Writer/test_csv_writer.py
new file mode 100644
index 0000000..9581dd5
--- /dev/null
+++ b/tests/Writer/test_csv_writer.py
@@ -0,0 +1,71 @@
+"""Tests for CSVWriter class."""
+
+import pytest
+
+from dabapush.Record import Record
+from dabapush.Writer.CSVWriter import CSVWriterConfiguration
+
+# pylint: disable=W0621
+
+
+@pytest.fixture
+def config_factory():
+    """Return a factory for CSVWriterConfiguration.
+
+    Parameters
+    ----------
+    path : str
+        The path to write to.
+
+    Returns
+    -------
+    function
+        A factory function that returns a CSVWriterConfiguration.
+    """
+    yield lambda path: CSVWriterConfiguration(
+        name="test", chunk_size=1000000, path=str(path)
+    )
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        ([{"key1": "value1"}, {"key2": "value2"}], "key1,key2\nvalue1,\n,value2\n"),
+        ([{"key1": "value1", "key2": "value2"}], "key1,key2\nvalue1,value2\n"),
+    ],
+)
+def test_write_csv(data, expected, config_factory, tmp_path):
+    """Should write the correct data to the file."""
+    config = config_factory(path=tmp_path)
+    writer = config.get_instance()
+    writer.write((Record(payload=d) for d in data))
+    writer.persist()
+
+    files = tmp_path.glob("*.csv")
+
+    data = [file.read_text() for file in files]
+
+    assert data[0] == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        ([{"key1": "value1"}, {"key2": "value2"}], 3),  # 1 header + 2 data rows
+        ([{"key1": "value1", "key2": "value2"}], 2),  # 1 header + 1 data row
+    ],
+)
+def test_write_csv_line_count(data, expected, config_factory, tmp_path):
+    """Should write the correct number of lines to the file."""
+    config = config_factory(path=tmp_path)
+    writer = config.get_instance()
+    writer.write((Record(payload=d) for d in data))
+    writer.persist()
+
+    files = tmp_path.glob("*.csv")
+
+    data = [file.read_text() for file in files]
+
+    print(data)
+
+    assert (len(data[0].split("\n")) - 1) == expected

From 3e80768107cb461326902542f5f1571302b85f50 Mon Sep 17 00:00:00 2001
From: Philipp Kessling <p.kessling@leibniz-hbi.de>
Date: Tue, 26 Nov 2024 09:54:17 +0100
Subject: [PATCH 3/3] fix: not an abstract classmethod.

---
 dabapush/Configuration/PlugInConfiguration.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dabapush/Configuration/PlugInConfiguration.py b/dabapush/Configuration/PlugInConfiguration.py
index 441266b..ec4706f 100644
--- a/dabapush/Configuration/PlugInConfiguration.py
+++ b/dabapush/Configuration/PlugInConfiguration.py
@@ -20,7 +20,6 @@ def __init__(self, name: str, id: str or None) -> None:
         self.name = name
         self.id = id if id is not None else str(uuid4())
 
-    @classmethod
     @abc.abstractmethod
-    def get_instance(cls) -> object or None:
+    def get_instance(self) -> object or None:
         """Get a configured instance of the appropriate reader or writer plugin."""