From 9b26c1b1c352237e534e3130e8c0ec913e476b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sun, 18 Feb 2024 22:25:13 +0100 Subject: [PATCH] Fixes --- dashboard/dashboard_server/dashboard.py | 4 +- report/_quarto.yml | 3 +- report/report_nl.qmd | 25 +++++++- src/wijklabels/report.py | 2 +- src/wijklabels/validate.py | 24 ++++---- tmp/analyse_ep_online.py | 77 +++++++++++++------------ 6 files changed, 80 insertions(+), 55 deletions(-) diff --git a/dashboard/dashboard_server/dashboard.py b/dashboard/dashboard_server/dashboard.py index 89b2f58..7706ae6 100644 --- a/dashboard/dashboard_server/dashboard.py +++ b/dashboard/dashboard_server/dashboard.py @@ -66,7 +66,7 @@ def set_neighborhoods_value(available_options): html.Div([ html.Div( children=[ - html.H5(children="Municipality:"), + html.H5(children="Gemeente"), dcc.Dropdown( id="municipalities-dropdown", options=neighborhoods.index.get_level_values(0).unique(), @@ -77,7 +77,7 @@ def set_neighborhoods_value(available_options): ), html.Div( children=[ - html.H5(children="Neighborhood:"), + html.H5(children="Buurt"), dcc.Dropdown( id="neighborhoods-dropdown", value="Bezuidenhout-Midden", diff --git a/report/_quarto.yml b/report/_quarto.yml index 2f56094..30988b1 100644 --- a/report/_quarto.yml +++ b/report/_quarto.yml @@ -7,9 +7,8 @@ lang: en format: html: - code-fold: true + echo: false pdf: default - ipynb: default jupyter: python3 diff --git a/report/report_nl.qmd b/report/report_nl.qmd index 79ef123..c0e26f6 100644 --- a/report/report_nl.qmd +++ b/report/report_nl.qmd @@ -1,8 +1,23 @@ --- title: "Wijklabels" -author: "Balázs Dukai, Ravi Peters" +author: + - name: "Balázs Dukai" + email: "balazs.dukai@3dgi.nl" + affiliation: + - name: "3DGI" + url: "https://3dgi.nl" + - name: "Ravi Peters" + email: "ravi.peters@3dgi.nl" + affiliation: + - name: "3DGI" + url: "https://3dgi.nl" date: last-modified bibliography: references.bib +license: "CC BY" +copyright: + holder: "3DGI" + year: "2024" +lang: "nl" --- ```{python} @@ -250,6 +265,14 @@ Als we onze validatie beperken tot die woningen waar het juiste label wel bestaa Voor deze subset van de data, is de gemiddelde labelafwijking in de door ons geschatte labels ten opzichte van de @_EP_2023 labels **-0,6**, met een standaardafwijking van **1,6**. Dit is ongeveer twee keer zo laag als de labelafwijking voor de gehele dataset. +### Buurten + +Kies een gemeente en een buurt om de geschatte energielabels te bekijken. + +```{=html} + +``` + # Conclusies Gemiddeld liggen de door ons geschatte labels één label onder het echte label, met een standaardafwijking van twee labels. diff --git a/src/wijklabels/report.py b/src/wijklabels/report.py index fb56a57..cdbd02f 100644 --- a/src/wijklabels/report.py +++ b/src/wijklabels/report.py @@ -192,7 +192,7 @@ def _plot_dist(grouped, t): plt.axhline(y=0.0, color='#154273', linestyle='-') plt.grid(visible=True, which="major", axis="y", zorder=0) plt.title(f"{aggregate_id_column.title()}: {aggregate_id}\nNr. woningen: {len(b)}", fontsize=10) - plt.suptitle("Afwijking van de geschatt labels van de EP-Online labels", fontsize=14) + plt.suptitle("Afwijking van de geschatte labels van de EP-Online labels", fontsize=14) plt.tight_layout() plt.savefig(f"{dir_plots}/{aggregate_level}_{filename}_dist{t}.png") plt.close() diff --git a/src/wijklabels/validate.py b/src/wijklabels/validate.py index e08ba19..07f20df 100644 --- a/src/wijklabels/validate.py +++ b/src/wijklabels/validate.py @@ -274,12 +274,12 @@ def _ep_in_dist(df_dist, row): # Plot NL log.info(f"Writing plot of the Netherlands to {p}") plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.NL) - log.info(f"Writing plots of municipalities to {p}") - plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.GEMEENTE) - log.info(f"Writing plots of wijken to {p}") - plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.WIJK) - log.info(f"Writing plots of neighborhoods to {p}") - plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.BUURT) + # log.info(f"Writing plots of municipalities to {p}") + # plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.GEMEENTE) + # log.info(f"Writing plots of wijken to {p}") + # plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.WIJK) + # log.info(f"Writing plots of neighborhoods to {p}") + # plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.BUURT) p = PATH_OUTPUT_DIR.joinpath("plots_possible") p.mkdir(parents=True, exist_ok=True) @@ -287,12 +287,12 @@ def _ep_in_dist(df_dist, row): # Plot NL log.info(f"Writing plot of the Netherlands to {p}") plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.NL) - log.info(f"Writing plots of municipalities to {p}") - plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.GEMEENTE) - log.info(f"Writing plots of wijken to {p}") - plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.WIJK) - log.info(f"Writing plots of neighborhoods to {p}") - plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.BUURT) + # log.info(f"Writing plots of municipalities to {p}") + # plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.GEMEENTE) + # log.info(f"Writing plots of wijken to {p}") + # plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.WIJK) + # log.info(f"Writing plots of neighborhoods to {p}") + # plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.BUURT) if __name__ == "__main__": diff --git a/tmp/analyse_ep_online.py b/tmp/analyse_ep_online.py index d13eea8..bfe06ea 100644 --- a/tmp/analyse_ep_online.py +++ b/tmp/analyse_ep_online.py @@ -5,6 +5,7 @@ import pandas as pd import psycopg import matplotlib.pyplot as plt +import numpy as np from wijklabels.load import EPLoader from wijklabels.woningtype import Bouwperiode, WoningtypePreNTA8800 @@ -29,6 +30,14 @@ if __name__ == '__main__': args = parser.parse_args() + # args = parser.parse_args([ + # "/data/energylabel-ep-online/v20231101_v2_csv_subset.csv", + # "-d", "postgres", + # "--host", "localhost", + # "-p", "8001", + # "-u", "postgres", + # "--password", "password" + # ]) bouwperiode = True woningtype = True coverage = True @@ -95,31 +104,29 @@ periods_sorted_pretty ] + + fig = plt.figure(figsize=(9, 7)) - plt.barh( - y=bag_df_bouwperiode_dist.index, - width=bag_df_bouwperiode_dist.bouwperiode.values * -100, + x = np.arange(len(bag_df_bouwperiode_dist.index)) + plt.bar( + x=x, + height=bag_df_bouwperiode_dist.bouwperiode.values * 100, label="BAG", - zorder=3 + zorder=3, + width=0.25 ) - plt.barh( - y=ep_online_bouwperiode_dist.index, - width=ep_online_bouwperiode_dist.bouwperiode.values * 100, + plt.bar( + x=x + 0.25, + height=ep_online_bouwperiode_dist.bouwperiode.values * 100, label="EP-Online", - zorder=3 + zorder=3, + width=0.25 ) plt.legend(loc="best") plt.grid(which="major", axis="x", zorder=0) - plt.xticks( - ticks=range(-50, 60, 10), - labels=[str(i) for i in range(-50, 60, 10)] - ) - plt.xticks( - ticks=range(-55, 60, 10), - minor=True, - ) - plt.xlabel("Percentage (%) van het hele dataset") - plt.ylabel("Bouwperiode") + plt.ylabel("Percentage (%) van het hele dataset") + plt.xticks(x+0.125, bag_df_bouwperiode_dist.index) + plt.xlabel("Bouwperiode") plt.suptitle("Spreiding van woningen per bouwperiode", fontsize=14) plt.savefig("bouwperiode_dist.png") plt.close() @@ -142,31 +149,27 @@ normalize=True ) - fig = plt.figure(figsize=(12, 7)) - plt.barh( - y=bag_df_woningtype_dist.index, - width=bag_df_woningtype_dist.woningtype.values * -100, + fig = plt.figure(figsize=(9, 7)) + x = np.arange(len(bag_df_woningtype_dist.index)) + plt.bar( + x=x, + height=bag_df_woningtype_dist.woningtype.values * 100, label="BAG", - zorder=3 + zorder=3, + width=0.25 ) - plt.barh( - y=ep_online_woningtype_dist.index, - width=ep_online_woningtype_dist.woningtype.values * 100, + plt.bar( + x=x+0.25, + height=ep_online_woningtype_dist.woningtype.values * 100, label="EP-Online", - zorder=3 + zorder=3, + width=0.25 ) plt.legend(loc="best") plt.grid(which="major", axis="x", zorder=0) - plt.xticks( - ticks=range(-60, 70, 10), - labels=[str(i) for i in range(-60, 70, 10)], - ) - plt.xticks( - ticks=range(-65, 70, 5), - minor=True, - ) - plt.xlabel("Percentage (%) van het hele dataset") - plt.ylabel("woningtype") + plt.ylabel("Percentage (%) van het hele dataset") + plt.xticks(x + 0.125, bag_df_woningtype_dist.index) + plt.xlabel("woningtype") plt.suptitle("Spreiding van woningen per woningtype", fontsize=14) plt.savefig("woningtype_dist.png") plt.close()