Skip to content

Commit

Permalink
try to optimize statement output a bit more
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed Nov 19, 2024
1 parent 1bc494c commit 9b4d3c9
Showing 1 changed file with 23 additions and 9 deletions.
32 changes: 23 additions & 9 deletions nomenklatura/statement/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from followthemoney.cli.util import MAX_LINE

from nomenklatura.statement.statement import S
from nomenklatura.util import pack_prop, unpack_prop
from nomenklatura.util import unpack_prop

JSON = "json"
CSV = "csv"
Expand Down Expand Up @@ -167,7 +167,7 @@ def __init__(self, fh: TextIO) -> None:

def write(self, stmt: S) -> None:
row = stmt.to_csv_row()
self._batch.append([row.get(c) for c in CSV_COLUMNS])
self._batch.append([row[c] for c in CSV_COLUMNS])
if len(self._batch) >= CSV_BATCH:
self.writer.writerows(self._batch)
self._batch.clear()
Expand All @@ -189,13 +189,27 @@ def __init__(self, fh: TextIO) -> None:
self._batch: List[List[Optional[str]]] = []

def write(self, stmt: S) -> None:
row = stmt.to_csv_row()
prop = row.pop("prop")
schema = row.pop("schema")
if prop is None or schema is None:
raise ValueError("Cannot pack statement without prop and schema")
row["prop"] = pack_prop(schema, prop)
self._batch.append([row.get(c) for c in PACK_COLUMNS])
# HACK: This is very similar to the CSV writer, but at the very inner
# loop of the application, so we're duplicating code here.
target_value: Optional[str] = "t" if stmt.target else "f"
if stmt.target is None:
target_value = None
external_value: Optional[str] = "t" if stmt.external else "f"
if stmt.external is None:
external_value = None
row = [
stmt.entity_id,
f"{stmt.schema}:{stmt.prop}",
stmt.value,
stmt.dataset,
stmt.lang,
stmt.original_value,
target_value,
external_value,
stmt.first_seen,
stmt.last_seen,
]
self._batch.append(row)
if len(self._batch) >= CSV_BATCH:
self.writer.writerows(self._batch)
self._batch.clear()
Expand Down

0 comments on commit 9b4d3c9

Please sign in to comment.