Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

25 increase test coverage #48

Merged
merged 3 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions dabapush/Configuration/FileWriterConfiguration.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
"""FileWriterConfiguration provides a base class for file-based Writers."""

import abc
from datetime import datetime
from string import Template
from typing import Dict, Optional

from loguru import logger as log

from .WriterConfiguration import WriterConfiguration

# pylint: disable=W0221,W0622,R0917,R0913


class FileWriterConfiguration(WriterConfiguration):
"""Abstract class describing configuration items for a file based writer"""
Expand Down Expand Up @@ -33,15 +39,18 @@ def make_file_name(self, additional_keys: Optional[Dict] = None) -> str:
Interpolated file name as str.
"""
now = datetime.now()
return Template(self.name_template).substitute(
**{
"date": datetime.strftime(now, "%Y-%m-%d"),
"time": datetime.strftime(now, "%H%M"),
"name": self.name,
"id": self.id,
**(additional_keys or {}),
}
)
available_data = {
"date": datetime.strftime(now, "%Y-%m-%d"),
"time": datetime.strftime(now, "%H%M"),
"chunk_size": self.chunk_size,
"name": self.name,
"id": self.id,
**(additional_keys or {}),
}

log.info(f"Available data: {available_data}")

return Template(self.name_template).substitute(**available_data)

def set_name_template(self, template: str):
"""Sets the template string.
Expand All @@ -51,3 +60,7 @@ def set_name_template(self, template: str):
Template string to use.
"""
self.name_template = template

@abc.abstractmethod
def get_instance(self) -> object or None:
"""Get configured instance of Writer"""
3 changes: 1 addition & 2 deletions dabapush/Configuration/PlugInConfiguration.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def __init__(self, name: str, id: str or None) -> None:
self.name = name
self.id = id if id is not None else str(uuid4())

@classmethod
@abc.abstractmethod
def get_instance(cls) -> object or None:
def get_instance(self) -> object or None:
"""Get a configured instance of the appropriate reader or writer plugin."""
1 change: 0 additions & 1 deletion dabapush/Configuration/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from .FileWriterConfiguration import FileWriterConfiguration
17 changes: 6 additions & 11 deletions dabapush/Writer/CSVWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from ..Configuration.FileWriterConfiguration import FileWriterConfiguration
from .Writer import Writer

# pylint: disable=R0917


class CSVWriter(Writer):
"""Writes CSVs from buffered stream"""
Expand All @@ -23,18 +25,19 @@ def persist(self):
"""persist buffer to disk"""

last_rows = self.buffer
self.buffer = []

log.info(f"Persisted {len(last_rows)} records")
_path = Path(self.config.path) / self.config.make_file_name(
{"chunk_number": self.chunk_number}
{"chunk_number": self.chunk_number, "type": "csv"}
)
pd.DataFrame(
(a.payload for a in last_rows),
).replace(
r"\n|\r", r"\\n", regex=True
).to_csv(_path, index=False)
self.chunk_number += 1
self.buffer = []

log.info(f"Persisted {len(last_rows)} records")

return len(last_rows)

Expand All @@ -57,14 +60,6 @@ def __init__( # pylint: disable=R0913
)
self.type = "csv"

@property
def file_path(self) -> Path:
"""get the path to a file to write in"""
# evalutate self.name_template
file_name = self.make_file_name({"type": "csv"})
# append to self.path and return
return Path(self.path) / file_name

def get_instance(self): # pylint: disable=W0221
"""get configured instance of CSVWriter"""
return CSVWriter(self)
71 changes: 71 additions & 0 deletions tests/Writer/test_csv_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Tests for CSVWriter class."""

import pytest

from dabapush.Record import Record
from dabapush.Writer.CSVWriter import CSVWriterConfiguration

# pylint: disable=W0621


@pytest.fixture
def config_factory():
"""Return a factory for CSVWriterConfiguration.

Parameters
----------
path : str
The path to write to.

Returns
-------
function
A factory function that returns a CSVWriterConfiguration.
"""
yield lambda path: CSVWriterConfiguration(
name="test", chunk_size=1000000, path=str(path)
)


@pytest.mark.parametrize(
"data, expected",
[
([{"key1": "value1"}, {"key2": "value2"}], "key1,key2\nvalue1,\n,value2\n"),
([{"key1": "value1", "key2": "value2"}], "key1,key2\nvalue1,value2\n"),
],
)
def test_write_csv(data, expected, config_factory, tmp_path):
"""Should write the correct data to the file."""
config = config_factory(path=tmp_path)
writer = config.get_instance()
writer.write((Record(payload=d) for d in data))
writer.persist()

files = tmp_path.glob("*.csv")

data = [file.read_text() for file in files]

assert data[0] == expected


@pytest.mark.parametrize(
"data, expected",
[
([{"key1": "value1"}, {"key2": "value2"}], 3), # 1 header + 2 data rows
([{"key1": "value1", "key2": "value2"}], 2), # 1 header + 1 data row
],
)
def test_write_csv_line_count(data, expected, config_factory, tmp_path):
"""Should write the correct number of lines to the file."""
config = config_factory(path=tmp_path)
writer = config.get_instance()
writer.write((Record(payload=d) for d in data))
writer.persist()

files = tmp_path.glob("*.csv")

data = [file.read_text() for file in files]

print(data)

assert (len(data[0].split("\n")) - 1) == expected
61 changes: 61 additions & 0 deletions tests/Writer/test_ndjson_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Test suite for the NDJSONWriter module."""

import pytest

from dabapush.Record import Record
from dabapush.Writer.NDJSONWriter import NDJSONWriterConfiguration


@pytest.mark.parametrize(
"data, expected",
[
(
[{"key1": "value1"}, {"key2": "value2"}],
'{"key1":"value1"}\n{"key2":"value2"}\n',
),
([{"key1": "value1", "key2": "value2"}], '{"key1":"value1","key2":"value2"}\n'),
],
)
def test_write_ndjson(data, expected, tmp_path):
"""Should write records to a file in NDJSON format."""
configuration = NDJSONWriterConfiguration(
name="test",
id="test",
chunk_size=1,
path=str(tmp_path),
name_template="test.ndjson",
)
file_path = tmp_path / "test.ndjson"
writer = configuration.get_instance()
writer.write((Record(_) for _ in data))

with file_path.open("rt", encoding="utf8") as f:
result = f.read()

assert result == expected


@pytest.mark.parametrize(
"data, expected",
[
([{"key1": "value1"}, {"key2": "value2"}], 2),
([{"key1": "value1", "key2": "value2"}], 1),
],
)
def test_write_ndjson_line_count(data, expected, tmp_path):
"""Should write records to a file in NDJSON format."""
configuration = NDJSONWriterConfiguration(
name="test",
id="test",
chunk_size=1,
path=str(tmp_path),
name_template="test.ndjson",
)
file_path = tmp_path / "test.ndjson"
writer = configuration.get_instance()
writer.write((Record(_) for _ in data))

with file_path.open("rt", encoding="utf8") as f:
result = f.readlines()

assert len(result) == expected
78 changes: 78 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Test suite for the utils module."""

import pytest

from dabapush.utils import flatten, safe_access, safe_write, unpack

# pylint: disable=W0622


@pytest.mark.parametrize(
"nested_dict, namespace, expected",
[
({"a": {"b": "yuk"}}, None, {"a.b": "yuk"}),
(
{"a": {"b": "yuk", "c": [{"d": "meh"}]}},
None,
{"a.b": "yuk", "a.c": [{"d": "meh"}]},
),
({"a": {"b": "yuk"}}, "namespace", {"namespace.a.b": "yuk"}),
],
)
def test_flatten(nested_dict, namespace, expected):
"""Should flatten dicts correctly."""
assert flatten(nested_dict, namespace=namespace) == expected


@pytest.mark.parametrize(
"nested_dict, path, expected",
[
({"a": {"b": {"c": "value"}}}, ["a", "b", "c"], "value"),
({"a": {"b": {"c": "value"}}}, ["a", "b", "d"], None),
({"a": {"b": {"c": "value"}}}, ["a", "b"], {"c": "value"}),
],
)
def test_safe_access(nested_dict, path, expected):
"""Should safely access nested dicts."""
assert safe_access(nested_dict, path) == expected


@pytest.mark.parametrize(
"nested_dict, path, key, value, expected",
[
(
{"a": {"b": {"c": "value"}}},
["a", "b"],
"d",
"new_value",
{"a": {"b": {"c": "value", "d": "new_value"}}},
),
(
{"a": {"b": {"c": "value"}}},
["a", "b", "e"],
"f",
"another_value",
{"a": {"b": {"c": "value", "e": {"f": "another_value"}}}},
),
],
)
def test_safe_write(nested_dict, path, key, value, expected):
"""Should safely write to nested dicts."""
assert safe_write(nested_dict, path, key, value) == expected


@pytest.mark.parametrize(
"includes, id, id_key, expected",
[
(
[{"id": "1", "name": "item1"}, {"id": "2", "name": "item2"}],
"1",
"id",
{"id": "1", "name": "item1"},
),
([{"id": "1", "name": "item1"}, {"id": "2", "name": "item2"}], "3", "id", None),
],
)
def test_unpack(includes, id, id_key, expected):
"""Should unpack a dict from a list of dicts."""
assert unpack(id, includes, id_key) == expected
Empty file.
45 changes: 45 additions & 0 deletions tests/tests/Configuration/test_Registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Test suite for the Registry module."""

from importlib.metadata import EntryPoint

from dabapush.Configuration import Registry
from dabapush.Configuration.ReaderConfiguration import ReaderConfiguration
from dabapush.Configuration.WriterConfiguration import WriterConfiguration


def test_readers():
"""Should fetch readers from the reader entry point."""
assert isinstance(Registry.readers(), list)
assert all(isinstance(_, EntryPoint) for _ in Registry.readers())


def test_writers():
"""Should fetch writers from the writer entry point."""
assert isinstance(Registry.writers(), list)
assert all(isinstance(_, EntryPoint) for _ in Registry.writers())


def test_get_reader():
"""Should fetch a reader by name."""
reader = Registry.get_reader("NDJSON")
assert reader is not None
assert issubclass(reader, ReaderConfiguration)


def test_get_writer():
"""Should fetch a writer by name."""
writer = Registry.get_writer("NDJSON")
assert writer is not None
assert issubclass(writer, WriterConfiguration)


def test_list_all_readers():
"""Should list all available readers."""
assert isinstance(Registry.list_all_readers(), list)
assert all(isinstance(_, str) for _ in Registry.list_all_readers())


def test_list_all_writers():
"""Should list all available writers."""
assert isinstance(Registry.list_all_readers(), list)
assert all(isinstance(_, str) for _ in Registry.list_all_readers())
Empty file added tests/tests/__init__.py
Empty file.