Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lahti: Muuta importskripti lukemaan csv-tiedostoja xslx sijaan #61

Merged
merged 5 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion db/migrations/R__import_koodisto_osapuolenlaji.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
insert into jkr_koodistot.osapuolenrooli(id, selite) values
(1,'Omistaja'),
(2,'Vanhin asukas')
(2,'Vanhin asukas'),
(3,'Asiakas')
on conflict (id) do update set selite = excluded.selite;
2 changes: 1 addition & 1 deletion jkrimporter/providers/db/services/buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def _find_by_address(session: "Session", haltija: "Yhteystieto"):
select(Rakennus)
.join(Osoite)
.join(Katu)
.join(RakennuksenVanhimmat, outer=True) # allow vapaa-ajanrakennukset
.outerjoin(RakennuksenVanhimmat) # allow vapaa-ajanrakennukset
.where(
Osoite.posti_numero == haltija.osoite.postinumero,
sqlalchemyFunc.lower(Katu.katunimi_fi) == katunimi_lower,
Expand Down
25 changes: 23 additions & 2 deletions jkrimporter/providers/lahti/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import re
from datetime import date
from dateutil.parser import parse as date_parser
from enum import Enum
from typing import Dict, Optional, Union

Expand Down Expand Up @@ -118,6 +119,9 @@ def fix_posti(value: Union[str, int]):
@validator("koko", "paino", pre=True)
def parse_float(value: Union[float, str]):
# If float wasn't parsed, let's parse them
# Return none if empty string was parsed
if value == '':
return None
if type(value) is str:
# we might have . or , as the separator
return float(value.replace(",", "."))
Expand All @@ -127,6 +131,14 @@ def parse_float(value: Union[float, str]):
def parse_jatelaji(value: str):
if value == "Sekaj":
value = "Sekajäte"
if value == "Biojäte":
value = "Bio"
if value == "Kartonkipakkaus":
value = "Kartonki"
if value == "Muovipakkaus":
value = "Muovi"
if value == "Lasipakkaus":
value = "Lasi"
return value.title()

@validator("Pvmalk", "Pvmasti", pre=True)
Expand All @@ -142,8 +154,8 @@ def parse_date(value: Union[date, str]):
"tyhjennysvali", "Voimassaoloviikotalkaen", "Voimassaoloviikotasti", pre=True
)
def fix_na(value: str):
# Many fields may have strings such as #N/A. Parse them to None.
if value == "#N/A":
# Many fields may have strings such as #N/A or empty string. Parse them to None.
if value == "#N/A" or value == '':
return None
return value

Expand All @@ -154,3 +166,12 @@ def parse_kuntatunnus(value: str):
if value:
return int(value)
return None

@validator("astiamaara", pre=True, always=True)
def parse_decimal_separator(cls, value):
if isinstance(value, str):
# There is atleast one case where SUM(astiamaara) is "0,12"
# Not sure what to do here, doesn't make sense that 0.12 containers have been emptied.
# But then again, should this be converted to 0, 1 or 12?
value = float(value.replace(',', '.'))
return value
39 changes: 20 additions & 19 deletions jkrimporter/providers/lahti/siirtotiedosto.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import logging
import csv
from pathlib import Path

from openpyxl.reader.excel import load_workbook

from jkrimporter.datasheets import ExcelCombinedFileSheetCollection, SiirtotiedostoSheet
from jkrimporter.datasheets import SiirtotiedostoSheet
from jkrimporter.providers.lahti.models import Asiakas


logger = logging.getLogger(__name__)


Expand All @@ -17,26 +15,29 @@ def _obj_from_dict(data):


class LahtiSiirtotiedosto:
# Lahti has no set sheet names. It has a directory with different sheets
# for different providers, all having identical format.

def __init__(self, path):
self._sheet_collection = ExcelCombinedFileSheetCollection(path)
self._path = path

@classmethod
def readable_by_me(cls, path):
p = Path(path)
for f in p.iterdir():
if f.is_file() and f.suffix == ".xlsx":
try:
workbook = load_workbook(filename=f, data_only=True, read_only=True)
sheets = workbook.sheetnames
if "in" in sheets:
return True
except Exception:
pass
directory = Path(path)
for file in directory.iterdir():
if file.is_file() and file.suffix == ".csv":
return True
return False

@property
def asiakastiedot(self):
return AsiakastiedotSheet(self._sheet_collection)
all_data = []

# Iterate through all CSV files in the directory
for csv_file_path in Path(self._path).glob("*.csv"):
with open(csv_file_path, mode="r", encoding="cp1252", newline="") as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=";", quotechar='"')
data_list = [row for row in csv_reader]
all_data.extend(data_list)

# Convert to a list of Asiakas objects
asiakas_list = [Asiakas.parse_obj(data) for data in all_data]

return asiakas_list
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ shapely = "^1.8.2"
typer = "^0.6.1"
python-dotenv = "^0.20.0"
addrparser = "^0.1.0"
python-dateutil = "^2.8.2"

[tool.poetry.dev-dependencies]
flake8 = "^5.0.4"
Expand Down
4 changes: 4 additions & 0 deletions tests/data/test_lahti_siirtotiedosto/kuljetustiedot_csv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
UrakoitsijaId;x-koordinaatti;y.koordinaattori;Eranro;UrakoitsijankohdeId;Rakennustunnus/Kiinteistotunnus;Kiinteistonkatuosoite;Kiinteistonposti;kimppa;kimppaid;Haltijannimi;Haltijanyhteyshlo;Haltijankatuosoite;Haltijanposti;Haltijanmaakoodi;Haltijanulkomaanpaikkakunta;Pvmalk;Pvmasti;tyyppiIdEWC;tapahtumannimi;COUNT(kaynnit);SUM(astiamaara);koko;SUM(paino);tyhjennysvali;kertaaviikossa;Voimassaoloviikotalkaen;Voimassaoloviikotasti;Voimassapmvalkaen;Voimassapvmasti;Voimassaoloviikotalkaen2;Voimassaoloviikotasti2;tyhjennysvali2;kertaaviikossa2;PalveluKuuluukokimppaan;Kimpanjakoosuus;palveluKimppakohdeId;KimpanNimi;Kimpasta vastaava;Kimpankatuosoite;Kimpanposti;Kuntatun;Keskeytysalkaen;Keskeytysasti
0000000-9;6761402;427353;1234AB;01-0000001-00;123456789A;HARJUKATU 44;15100 LAHTI;;;ASUNTO OY KAHDEN LAULUMUISTO;HEIKKI LEHMUSTO;VESIJÄRVENKATU 8;15100 LAHTI;FI;;1.1.2023;31.12.2023;Sekaj;660 L SEKAJÄTEASTIA TYHJENNYS;123;4;0,66;10;1;2;1;53;1.1.1900;31.12.2099;;;;;;;;;;;;398;1.2.2023;15.2.2023
0000000-9;;;;01-0000123-01;134567890B;KIRKKOÄYRÄÄNTIE 1D;16200 ARTJÄRVI;;;LINDROTH OY;JOHN LINDROTH;KIRKKOÄYRÄÄNTIE 1D;16200 ARTJÄRVI;FI;;1.1.2023;31.12.2023;Energia;660 L ENERGIAJÄTEASTIA TYHJENNYS;66;2;0,66;8;1;;1;53;1.1.1900;31.12.2099;;;;;;;01-0000999-99;KIRKKOÄYRÄÄN ENERGIAKIMPPA;01-1110999-01;KIRKKOÄYRÄÄNTIE 11;16200 ARTJÄRVI;560;;
0000000-9;;;;01-0000123-02;100456789B;KUVAKALLIONTIE 1;15230 LAHTI;;;RIKU FORSSTRÖM;RIKU FORSSTRÖM;KUVAKALLIONTIE 1;15230 LAHTI;FI;;1.1.2023;31.12.2023;Energia;660 L ENERGIAJÄTEASTIA TYHJENNYS;22;2;0,66;7;1;;1;53;1.1.1900;31.12.2099;;;;;;;01-0000999-99;KIRKKOÄYRÄÄN ENERGIAKIMPPA;01-1110999-01;KIRKKOÄYRÄÄNTIE 11;16200 ARTJÄRVI;560;;
2 changes: 1 addition & 1 deletion tests/scripts/init_database.bat
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ ECHO Asukkaat
%OGR2OGR_PATH%\\ogr2ogr -f PostgreSQL -overwrite -progress PG:"host=%HOST% port=%PORT% dbname=%DB_NAME% user=%USER% ACTIVE_SCHEMA=jkr_dvv" -nln vanhin "./data/test_data_import/DVV_original.xlsx" "R9 huon asukk"

ECHO Muunnetaan jkr-muotoon...
psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -f "../scripts/import_dvv.sql"
psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -v formatted_date=20220128 -f "../scripts/import_dvv.sql"
2 changes: 1 addition & 1 deletion tests/scripts/update_database.bat
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ ECHO Asukkaat
%OGR2OGR_PATH%\\ogr2ogr -f PostgreSQL -overwrite -progress PG:"host=%HOST% port=%PORT% dbname=%DB_NAME% user=%USER% ACTIVE_SCHEMA=jkr_dvv" -nln vanhin "./data/test_data_import/DVV_update.xlsx" "R9 huon asukk"

ECHO Muunnetaan jkr-muotoon...
psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -v POIMINTAPVM=20230131 -f "../scripts/import_dvv.sql"
psql -h %HOST% -p %PORT% -d %DB_NAME% -U %USER% -v formatted_date=20230131 -f "../scripts/import_dvv.sql"
3 changes: 2 additions & 1 deletion tests/test_data_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def engine():

def test_osapuolenrooli(engine):
osapuolenroolit = [(1, 'Omistaja'),
(2, 'Vanhin asukas')]
(2, 'Vanhin asukas'),
(3, 'Asiakas')]
session = Session(engine)
result = session.execute(select([Osapuolenrooli.id, Osapuolenrooli.selite]))
assert [tuple(row) for row in result] == osapuolenroolit
Expand Down
4 changes: 3 additions & 1 deletion tests/test_lahti_siirtotiedosto.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def test_readable(datadir):

def test_kohteet(datadir):
asiakastiedot = LahtiSiirtotiedosto(datadir).asiakastiedot
assert 'UrakoitsijaId' in asiakastiedot.headers
header_row = asiakastiedot[0]
headers = header_row.dict().keys()
assert 'UrakoitsijaId' in headers


def test_import_data(engine, datadir):
Expand Down
Loading