From 5cfedf1b1b87abfd41a274e473c491e4abdd2516 Mon Sep 17 00:00:00 2001 From: Miikka Kallio Date: Tue, 14 Nov 2023 18:11:50 +0200 Subject: [PATCH 1/5] Kuljetustiedot from .csv files. Added Asiakas to osapuolenroolit (temporary, should be removed with updated roles). Fixed outerjoin in buildings.py, previous code gave errors with csv files. Updated some Jatelaji models to reflect what is written in the kuljetustiedot. Changed siirtotiedosto to read from csv rather than excel. Converted test kuljetustiedot from excel to csv. Added already made fixes for tests (init_database.bat & update_database.bat). Modified tests. --- .../R__import_koodisto_osapuolenlaji.sql | 3 +- .../providers/db/services/buildings.py | 2 +- jkrimporter/providers/lahti/models.py | 8 ++-- jkrimporter/providers/lahti/siirtotiedosto.py | 41 +++++++++---------- .../kuljetustiedot_csv.csv | 4 ++ tests/scripts/init_database.bat | 2 +- tests/scripts/update_database.bat | 2 +- tests/test_data_import.py | 3 +- tests/test_lahti_siirtotiedosto.py | 4 +- 9 files changed, 38 insertions(+), 31 deletions(-) create mode 100644 tests/data/test_lahti_siirtotiedosto/kuljetustiedot_csv.csv diff --git a/db/migrations/R__import_koodisto_osapuolenlaji.sql b/db/migrations/R__import_koodisto_osapuolenlaji.sql index 33983dae..42d5e829 100644 --- a/db/migrations/R__import_koodisto_osapuolenlaji.sql +++ b/db/migrations/R__import_koodisto_osapuolenlaji.sql @@ -1,4 +1,5 @@ insert into jkr_koodistot.osapuolenrooli(id, selite) values (1,'Omistaja'), - (2,'Vanhin asukas') + (2,'Vanhin asukas'), + (3,'Asiakas') on conflict (id) do update set selite = excluded.selite; diff --git a/jkrimporter/providers/db/services/buildings.py b/jkrimporter/providers/db/services/buildings.py index dfbe4377..178e1fef 100644 --- a/jkrimporter/providers/db/services/buildings.py +++ b/jkrimporter/providers/db/services/buildings.py @@ -316,7 +316,7 @@ def _find_by_address(session: "Session", haltija: "Yhteystieto"): select(Rakennus) .join(Osoite) .join(Katu) - .join(RakennuksenVanhimmat, outer=True) # allow vapaa-ajanrakennukset + .outerjoin(RakennuksenVanhimmat) # allow vapaa-ajanrakennukset .where( Osoite.posti_numero == haltija.osoite.postinumero, sqlalchemyFunc.lower(Katu.katunimi_fi) == katunimi_lower, diff --git a/jkrimporter/providers/lahti/models.py b/jkrimporter/providers/lahti/models.py index b098b1f4..31efe843 100644 --- a/jkrimporter/providers/lahti/models.py +++ b/jkrimporter/providers/lahti/models.py @@ -19,11 +19,11 @@ class Jatelaji(str, Enum): aluekerays = "Aluekeräys" seka = "Sekajäte" energia = "Energia" - bio = "Bio" - kartonki = "Kartonki" + bio = "Biojäte" + kartonki = "Kartonkipakkaus" pahvi = "Pahvi" - metalli = "Metalli" - lasi = "Lasi" + metalli = "Metallipakkaus" + lasi = "Lasipakkaus" paperi = "Paperi" muovi = "Muovi" liete = "Liete" diff --git a/jkrimporter/providers/lahti/siirtotiedosto.py b/jkrimporter/providers/lahti/siirtotiedosto.py index 8fa59c9d..2aa029a7 100644 --- a/jkrimporter/providers/lahti/siirtotiedosto.py +++ b/jkrimporter/providers/lahti/siirtotiedosto.py @@ -1,42 +1,41 @@ import logging +import csv from pathlib import Path -from openpyxl.reader.excel import load_workbook - -from jkrimporter.datasheets import ExcelCombinedFileSheetCollection, SiirtotiedostoSheet +from jkrimporter.datasheets import SiirtotiedostoSheet from jkrimporter.providers.lahti.models import Asiakas - logger = logging.getLogger(__name__) - class AsiakastiedotSheet(SiirtotiedostoSheet[Asiakas]): @staticmethod def _obj_from_dict(data): return Asiakas.parse_obj(data) - class LahtiSiirtotiedosto: - # Lahti has no set sheet names. It has a directory with different sheets - # for different providers, all having identical format. - def __init__(self, path): - self._sheet_collection = ExcelCombinedFileSheetCollection(path) + self._path = path @classmethod def readable_by_me(cls, path): - p = Path(path) - for f in p.iterdir(): - if f.is_file() and f.suffix == ".xlsx": - try: - workbook = load_workbook(filename=f, data_only=True, read_only=True) - sheets = workbook.sheetnames - if "in" in sheets: - return True - except Exception: - pass + directory = Path(path) + for file in directory.iterdir(): + if file.is_file() and file.suffix == ".csv": + return True return False @property def asiakastiedot(self): - return AsiakastiedotSheet(self._sheet_collection) + all_data = [] + + # Iterate through all CSV files in the directory + for csv_file_path in Path(self._path).glob("*.csv"): + with open(csv_file_path, mode="r", encoding="cp1252", newline="") as csv_file: + csv_reader = csv.DictReader(csv_file, delimiter=";", quotechar='"') + data_list = [row for row in csv_reader] + all_data.extend(data_list) + + # Convert to a list of Asiakas objects + asiakas_list = [Asiakas.parse_obj(data) for data in all_data] + + return asiakas_list diff --git a/tests/data/test_lahti_siirtotiedosto/kuljetustiedot_csv.csv b/tests/data/test_lahti_siirtotiedosto/kuljetustiedot_csv.csv new file mode 100644 index 00000000..44b92443 --- /dev/null +++ b/tests/data/test_lahti_siirtotiedosto/kuljetustiedot_csv.csv @@ -0,0 +1,4 @@ +UrakoitsijaId;x-koordinaatti;y.koordinaattori;Eranro;UrakoitsijankohdeId;Rakennustunnus/Kiinteistotunnus;Kiinteistonkatuosoite;Kiinteistonposti;kimppa;kimppaid;Haltijannimi;Haltijanyhteyshlo;Haltijankatuosoite;Haltijanposti;Haltijanmaakoodi;Haltijanulkomaanpaikkakunta;Pvmalk;Pvmasti;tyyppiIdEWC;tapahtumannimi;COUNT(kaynnit);SUM(astiamaara);koko;SUM(paino);tyhjennysvali;kertaaviikossa;Voimassaoloviikotalkaen;Voimassaoloviikotasti;Voimassapmvalkaen;Voimassapvmasti;Voimassaoloviikotalkaen2;Voimassaoloviikotasti2;tyhjennysvali2;kertaaviikossa2;PalveluKuuluukokimppaan;Kimpanjakoosuus;palveluKimppakohdeId;KimpanNimi;Kimpasta vastaava;Kimpankatuosoite;Kimpanposti;Kuntatun;Keskeytysalkaen;Keskeytysasti +0000000-9;6761402;427353;1234AB;01-0000001-00;123456789A;HARJUKATU 44;15100 LAHTI;;;ASUNTO OY KAHDEN LAULUMUISTO;HEIKKI LEHMUSTO;VESIJÄRVENKATU 8;15100 LAHTI;FI;;1.1.2023;31.12.2023;Sekaj;660 L SEKAJÄTEASTIA TYHJENNYS;123;4;0,66;10;1;2;1;53;1.1.1900;31.12.2099;;;;;;;;;;;;398;1.2.2023;15.2.2023 +0000000-9;;;;01-0000123-01;134567890B;KIRKKOÄYRÄÄNTIE 1D;16200 ARTJÄRVI;;;LINDROTH OY;JOHN LINDROTH;KIRKKOÄYRÄÄNTIE 1D;16200 ARTJÄRVI;FI;;1.1.2023;31.12.2023;Energia;660 L ENERGIAJÄTEASTIA TYHJENNYS;66;2;0,66;8;1;;1;53;1.1.1900;31.12.2099;;;;;;;01-0000999-99;KIRKKOÄYRÄÄN ENERGIAKIMPPA;01-1110999-01;KIRKKOÄYRÄÄNTIE 11;16200 ARTJÄRVI;560;; +0000000-9;;;;01-0000123-02;100456789B;KUVAKALLIONTIE 1;15230 LAHTI;;;RIKU FORSSTRÖM;RIKU FORSSTRÖM;KUVAKALLIONTIE 1;15230 LAHTI;FI;;1.1.2023;31.12.2023;Energia;660 L ENERGIAJÄTEASTIA TYHJENNYS;22;2;0,66;7;1;;1;53;1.1.1900;31.12.2099;;;;;;;01-0000999-99;KIRKKOÄYRÄÄN ENERGIAKIMPPA;01-1110999-01;KIRKKOÄYRÄÄNTIE 11;16200 ARTJÄRVI;560;; diff --git a/tests/scripts/init_database.bat b/tests/scripts/init_database.bat index 6a60f6fb..be01f05c 100644 --- a/tests/scripts/init_database.bat +++ b/tests/scripts/init_database.bat @@ -38,4 +38,4 @@ ECHO Asukkaat %OGR2OGR_PATH%\\ogr2ogr -f PostgreSQL -overwrite -progress PG:"host=%HOST% port=%PORT% dbname=%DB_NAME% user=%USER% ACTIVE_SCHEMA=jkr_dvv" -nln vanhin "./data/test_data_import/DVV_original.xlsx" "R9 huon asukk" ECHO Muunnetaan jkr-muotoon... -psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -f "../scripts/import_dvv.sql" +psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -v formatted_date=20220128 -f "../scripts/import_dvv.sql" diff --git a/tests/scripts/update_database.bat b/tests/scripts/update_database.bat index 896a1665..f98a498e 100644 --- a/tests/scripts/update_database.bat +++ b/tests/scripts/update_database.bat @@ -27,4 +27,4 @@ ECHO Asukkaat %OGR2OGR_PATH%\\ogr2ogr -f PostgreSQL -overwrite -progress PG:"host=%HOST% port=%PORT% dbname=%DB_NAME% user=%USER% ACTIVE_SCHEMA=jkr_dvv" -nln vanhin "./data/test_data_import/DVV_update.xlsx" "R9 huon asukk" ECHO Muunnetaan jkr-muotoon... -psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -v POIMINTAPVM=20230131 -f "../scripts/import_dvv.sql" +psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -v formatted_date=20230131 -f "../scripts/import_dvv.sql" diff --git a/tests/test_data_import.py b/tests/test_data_import.py index 02fa69ef..4683ca10 100644 --- a/tests/test_data_import.py +++ b/tests/test_data_import.py @@ -24,7 +24,8 @@ def engine(): def test_osapuolenrooli(engine): osapuolenroolit = [(1, 'Omistaja'), - (2, 'Vanhin asukas')] + (2, 'Vanhin asukas'), + (3, 'Asiakas')] session = Session(engine) result = session.execute(select([Osapuolenrooli.id, Osapuolenrooli.selite])) assert [tuple(row) for row in result] == osapuolenroolit diff --git a/tests/test_lahti_siirtotiedosto.py b/tests/test_lahti_siirtotiedosto.py index 3a5989a7..ea4a2b1f 100644 --- a/tests/test_lahti_siirtotiedosto.py +++ b/tests/test_lahti_siirtotiedosto.py @@ -32,7 +32,9 @@ def test_readable(datadir): def test_kohteet(datadir): asiakastiedot = LahtiSiirtotiedosto(datadir).asiakastiedot - assert 'UrakoitsijaId' in asiakastiedot.headers + header_row = asiakastiedot[0] + headers = header_row.dict().keys() + assert 'UrakoitsijaId' in headers def test_import_data(engine, datadir): From 380e0101989b4ce641a1c16fee407f7946df51eb Mon Sep 17 00:00:00 2001 From: Miikka Kallio Date: Wed, 15 Nov 2023 21:45:08 +0200 Subject: [PATCH 2/5] Fixes Added alternative strings for some jatelaji. Fixed parsing empty strings, when data is missing. Fixed a case where SUM(astiamaara) is a decimal, with "," instead of ".". In reality this is likely a typo in the data. --- jkrimporter/providers/lahti/models.py | 33 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/jkrimporter/providers/lahti/models.py b/jkrimporter/providers/lahti/models.py index 31efe843..23b53a01 100644 --- a/jkrimporter/providers/lahti/models.py +++ b/jkrimporter/providers/lahti/models.py @@ -1,6 +1,7 @@ import datetime import re from datetime import date +from dateutil.parser import parse as date_parser from enum import Enum from typing import Dict, Optional, Union @@ -19,11 +20,11 @@ class Jatelaji(str, Enum): aluekerays = "Aluekeräys" seka = "Sekajäte" energia = "Energia" - bio = "Biojäte" - kartonki = "Kartonkipakkaus" + bio = "Bio" + kartonki = "Kartonki" pahvi = "Pahvi" - metalli = "Metallipakkaus" - lasi = "Lasipakkaus" + metalli = "Metalli" + lasi = "Lasi" paperi = "Paperi" muovi = "Muovi" liete = "Liete" @@ -118,6 +119,9 @@ def fix_posti(value: Union[str, int]): @validator("koko", "paino", pre=True) def parse_float(value: Union[float, str]): # If float wasn't parsed, let's parse them + # Return none if empty string was parsed + if value == '': + return None if type(value) is str: # we might have . or , as the separator return float(value.replace(",", ".")) @@ -127,6 +131,14 @@ def parse_float(value: Union[float, str]): def parse_jatelaji(value: str): if value == "Sekaj": value = "Sekajäte" + if value == "Biojäte": + value = "Bio" + if value == "Kartonkipakkaus": + value = "Kartonki" + if value == "Muovipakkaus": + value = "Muovi" + if value == "Lasipakkaus": + value = "Lasi" return value.title() @validator("Pvmalk", "Pvmasti", pre=True) @@ -142,8 +154,8 @@ def parse_date(value: Union[date, str]): "tyhjennysvali", "Voimassaoloviikotalkaen", "Voimassaoloviikotasti", pre=True ) def fix_na(value: str): - # Many fields may have strings such as #N/A. Parse them to None. - if value == "#N/A": + # Many fields may have strings such as #N/A or empty string. Parse them to None. + if value == "#N/A" or value == '': return None return value @@ -154,3 +166,12 @@ def parse_kuntatunnus(value: str): if value: return int(value) return None + + @validator("astiamaara", pre=True, always=True) + def parse_decimal_separator(cls, value): + if isinstance(value, str): + # There is atleast one case where SUM(astiamaara) is "0,12" + # Not sure what to do here, doesn't make sense that 0.12 containers have been emptied. + # But then again, should this be converted to 0, 1 or 12? + value = float(value.replace(',', '.')) + return value From 3b26e8250cb767e70a8b56df7596c03fec8656de Mon Sep 17 00:00:00 2001 From: Ismo Lahtinen Date: Fri, 17 Nov 2023 06:27:53 +0200 Subject: [PATCH 3/5] Added python-dateutil --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 7c517461..a70b133c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ shapely = "^1.8.2" typer = "^0.6.1" python-dotenv = "^0.20.0" addrparser = "^0.1.0" +python-dateutil = "^2.8.2" [tool.poetry.dev-dependencies] flake8 = "^5.0.4" From aea97a91668ab6a20f94bc2901d4001d6d47699e Mon Sep 17 00:00:00 2001 From: Ismo Lahtinen Date: Fri, 17 Nov 2023 06:38:01 +0200 Subject: [PATCH 4/5] Fixed formatting --- jkrimporter/providers/db/services/buildings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jkrimporter/providers/db/services/buildings.py b/jkrimporter/providers/db/services/buildings.py index 178e1fef..2d56fa21 100644 --- a/jkrimporter/providers/db/services/buildings.py +++ b/jkrimporter/providers/db/services/buildings.py @@ -316,7 +316,7 @@ def _find_by_address(session: "Session", haltija: "Yhteystieto"): select(Rakennus) .join(Osoite) .join(Katu) - .outerjoin(RakennuksenVanhimmat) # allow vapaa-ajanrakennukset + .outerjoin(RakennuksenVanhimmat) # allow vapaa-ajanrakennukset .where( Osoite.posti_numero == haltija.osoite.postinumero, sqlalchemyFunc.lower(Katu.katunimi_fi) == katunimi_lower, From 3d631beaa985fb8205eaf587ed2dba0ff29d39b5 Mon Sep 17 00:00:00 2001 From: Ismo Lahtinen Date: Fri, 17 Nov 2023 06:42:59 +0200 Subject: [PATCH 5/5] Fixed file formatting --- jkrimporter/providers/lahti/siirtotiedosto.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jkrimporter/providers/lahti/siirtotiedosto.py b/jkrimporter/providers/lahti/siirtotiedosto.py index 2aa029a7..dc4bb7d4 100644 --- a/jkrimporter/providers/lahti/siirtotiedosto.py +++ b/jkrimporter/providers/lahti/siirtotiedosto.py @@ -7,11 +7,13 @@ logger = logging.getLogger(__name__) + class AsiakastiedotSheet(SiirtotiedostoSheet[Asiakas]): @staticmethod def _obj_from_dict(data): return Asiakas.parse_obj(data) + class LahtiSiirtotiedosto: def __init__(self, path): self._path = path