From 9b26c1b1c352237e534e3130e8c0ec913e476b9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= <balazs.dukai@gmail.com>
Date: Sun, 18 Feb 2024 22:25:13 +0100
Subject: [PATCH] Fixes

---
 dashboard/dashboard_server/dashboard.py |  4 +-
 report/_quarto.yml                      |  3 +-
 report/report_nl.qmd                    | 25 +++++++-
 src/wijklabels/report.py                |  2 +-
 src/wijklabels/validate.py              | 24 ++++----
 tmp/analyse_ep_online.py                | 77 +++++++++++++------------
 6 files changed, 80 insertions(+), 55 deletions(-)
diff --git a/dashboard/dashboard_server/dashboard.py b/dashboard/dashboard_server/dashboard.py
index 89b2f58..7706ae6 100644
--- a/dashboard/dashboard_server/dashboard.py
+++ b/dashboard/dashboard_server/dashboard.py
@@ -66,7 +66,7 @@ def set_neighborhoods_value(available_options):
     html.Div([
         html.Div(
             children=[
-                html.H5(children="Municipality:"),
+                html.H5(children="Gemeente"),
                 dcc.Dropdown(
                     id="municipalities-dropdown",
                     options=neighborhoods.index.get_level_values(0).unique(),
@@ -77,7 +77,7 @@ def set_neighborhoods_value(available_options):
         ),
         html.Div(
             children=[
-                html.H5(children="Neighborhood:"),
+                html.H5(children="Buurt"),
                 dcc.Dropdown(
                     id="neighborhoods-dropdown",
                     value="Bezuidenhout-Midden",
diff --git a/report/_quarto.yml b/report/_quarto.yml
index 2f56094..30988b1 100644
--- a/report/_quarto.yml
+++ b/report/_quarto.yml
@@ -7,9 +7,8 @@ lang: en
 
 format:
   html:
-    code-fold: true
+    echo: false
   pdf: default
-  ipynb: default
 
 jupyter: python3
 
diff --git a/report/report_nl.qmd b/report/report_nl.qmd
index 79ef123..c0e26f6 100644
--- a/report/report_nl.qmd
+++ b/report/report_nl.qmd
@@ -1,8 +1,23 @@
 ---
 title: "Wijklabels"
-author: "Balázs Dukai, Ravi Peters"
+author: 
+  - name: "Balázs Dukai"
+    email: "balazs.dukai@3dgi.nl"
+    affiliation:
+      - name: "3DGI"
+        url: "https://3dgi.nl"
+  - name: "Ravi Peters"
+    email: "ravi.peters@3dgi.nl"
+    affiliation:
+      - name: "3DGI"
+        url: "https://3dgi.nl"
 date: last-modified
 bibliography: references.bib
+license: "CC BY"
+copyright: 
+  holder: "3DGI"
+  year: "2024"
+lang: "nl"
 ---
 
 ```{python}
@@ -250,6 +265,14 @@ Als we onze validatie beperken tot die woningen waar het juiste label wel bestaa
 
 Voor deze subset van de data, is de gemiddelde labelafwijking in de door ons geschatte labels ten opzichte van de @_EP_2023 labels **-0,6**, met een standaardafwijking van **1,6**. Dit is ongeveer twee keer zo laag als de labelafwijking voor de gehele dataset.
 
+### Buurten
+
+Kies een gemeente en een buurt om de geschatte energielabels te bekijken.
+
+```{=html}
+<iframe width="780" height="600" src="https://dev.3dgi.xyz/wijklabels/"></iframe>
+```
+
 # Conclusies
 
 Gemiddeld liggen de door ons geschatte labels één label onder het echte label, met een standaardafwijking van twee labels.
diff --git a/src/wijklabels/report.py b/src/wijklabels/report.py
index fb56a57..cdbd02f 100644
--- a/src/wijklabels/report.py
+++ b/src/wijklabels/report.py
@@ -192,7 +192,7 @@ def _plot_dist(grouped, t):
             plt.axhline(y=0.0, color='#154273', linestyle='-')
             plt.grid(visible=True, which="major", axis="y", zorder=0)
             plt.title(f"{aggregate_id_column.title()}: {aggregate_id}\nNr. woningen: {len(b)}", fontsize=10)
-            plt.suptitle("Afwijking van de geschatt labels van de EP-Online labels", fontsize=14)
+            plt.suptitle("Afwijking van de geschatte labels van de EP-Online labels", fontsize=14)
             plt.tight_layout()
             plt.savefig(f"{dir_plots}/{aggregate_level}_{filename}_dist{t}.png")
             plt.close()
diff --git a/src/wijklabels/validate.py b/src/wijklabels/validate.py
index e08ba19..07f20df 100644
--- a/src/wijklabels/validate.py
+++ b/src/wijklabels/validate.py
@@ -274,12 +274,12 @@ def _ep_in_dist(df_dist, row):
         # Plot NL
         log.info(f"Writing plot of the Netherlands to {p}")
         plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.NL)
-        log.info(f"Writing plots of municipalities to {p}")
-        plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.GEMEENTE)
-        log.info(f"Writing plots of wijken to {p}")
-        plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.WIJK)
-        log.info(f"Writing plots of neighborhoods to {p}")
-        plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.BUURT)
+        # log.info(f"Writing plots of municipalities to {p}")
+        # plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.GEMEENTE)
+        # log.info(f"Writing plots of wijken to {p}")
+        # plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.WIJK)
+        # log.info(f"Writing plots of neighborhoods to {p}")
+        # plot_comparison(df_with_truth, p, aggregate_level=AggregateUnit.BUURT)
 
         p = PATH_OUTPUT_DIR.joinpath("plots_possible")
         p.mkdir(parents=True, exist_ok=True)
@@ -287,12 +287,12 @@ def _ep_in_dist(df_dist, row):
         # Plot NL
         log.info(f"Writing plot of the Netherlands to {p}")
         plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.NL)
-        log.info(f"Writing plots of municipalities to {p}")
-        plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.GEMEENTE)
-        log.info(f"Writing plots of wijken to {p}")
-        plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.WIJK)
-        log.info(f"Writing plots of neighborhoods to {p}")
-        plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.BUURT)
+        # log.info(f"Writing plots of municipalities to {p}")
+        # plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.GEMEENTE)
+        # log.info(f"Writing plots of wijken to {p}")
+        # plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.WIJK)
+        # log.info(f"Writing plots of neighborhoods to {p}")
+        # plot_comparison(possible_labels, p, aggregate_level=AggregateUnit.BUURT)
 
 
 if __name__ == "__main__":
diff --git a/tmp/analyse_ep_online.py b/tmp/analyse_ep_online.py
index d13eea8..bfe06ea 100644
--- a/tmp/analyse_ep_online.py
+++ b/tmp/analyse_ep_online.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import psycopg
 import matplotlib.pyplot as plt
+import numpy as np
 
 from wijklabels.load import EPLoader
 from wijklabels.woningtype import Bouwperiode, WoningtypePreNTA8800
@@ -29,6 +30,14 @@
 
 if __name__ == '__main__':
     args = parser.parse_args()
+    # args = parser.parse_args([
+    #     "/data/energylabel-ep-online/v20231101_v2_csv_subset.csv",
+    #     "-d", "postgres",
+    #     "--host", "localhost",
+    #     "-p", "8001",
+    #     "-u", "postgres",
+    #     "--password", "password"
+    # ])
     bouwperiode = True
     woningtype = True
     coverage = True
@@ -95,31 +104,29 @@
             periods_sorted_pretty
         ]
 
+
+
         fig = plt.figure(figsize=(9, 7))
-        plt.barh(
-            y=bag_df_bouwperiode_dist.index,
-            width=bag_df_bouwperiode_dist.bouwperiode.values * -100,
+        x = np.arange(len(bag_df_bouwperiode_dist.index))
+        plt.bar(
+            x=x,
+            height=bag_df_bouwperiode_dist.bouwperiode.values * 100,
             label="BAG",
-            zorder=3
+            zorder=3,
+            width=0.25
         )
-        plt.barh(
-            y=ep_online_bouwperiode_dist.index,
-            width=ep_online_bouwperiode_dist.bouwperiode.values * 100,
+        plt.bar(
+            x=x + 0.25,
+            height=ep_online_bouwperiode_dist.bouwperiode.values * 100,
             label="EP-Online",
-            zorder=3
+            zorder=3,
+            width=0.25
         )
         plt.legend(loc="best")
         plt.grid(which="major", axis="x", zorder=0)
-        plt.xticks(
-            ticks=range(-50, 60, 10),
-            labels=[str(i) for i in range(-50, 60, 10)]
-        )
-        plt.xticks(
-            ticks=range(-55, 60, 10),
-            minor=True,
-        )
-        plt.xlabel("Percentage (%) van het hele dataset")
-        plt.ylabel("Bouwperiode")
+        plt.ylabel("Percentage (%) van het hele dataset")
+        plt.xticks(x+0.125, bag_df_bouwperiode_dist.index)
+        plt.xlabel("Bouwperiode")
         plt.suptitle("Spreiding van woningen per bouwperiode", fontsize=14)
         plt.savefig("bouwperiode_dist.png")
         plt.close()
@@ -142,31 +149,27 @@
             normalize=True
         )
 
-        fig = plt.figure(figsize=(12, 7))
-        plt.barh(
-            y=bag_df_woningtype_dist.index,
-            width=bag_df_woningtype_dist.woningtype.values * -100,
+        fig = plt.figure(figsize=(9, 7))
+        x = np.arange(len(bag_df_woningtype_dist.index))
+        plt.bar(
+            x=x,
+            height=bag_df_woningtype_dist.woningtype.values * 100,
             label="BAG",
-            zorder=3
+            zorder=3,
+            width=0.25
         )
-        plt.barh(
-            y=ep_online_woningtype_dist.index,
-            width=ep_online_woningtype_dist.woningtype.values * 100,
+        plt.bar(
+            x=x+0.25,
+            height=ep_online_woningtype_dist.woningtype.values * 100,
             label="EP-Online",
-            zorder=3
+            zorder=3,
+            width=0.25
         )
         plt.legend(loc="best")
         plt.grid(which="major", axis="x", zorder=0)
-        plt.xticks(
-            ticks=range(-60, 70, 10),
-            labels=[str(i) for i in range(-60, 70, 10)],
-        )
-        plt.xticks(
-            ticks=range(-65, 70, 5),
-            minor=True,
-        )
-        plt.xlabel("Percentage (%) van het hele dataset")
-        plt.ylabel("woningtype")
+        plt.ylabel("Percentage (%) van het hele dataset")
+        plt.xticks(x + 0.125, bag_df_woningtype_dist.index)
+        plt.xlabel("woningtype")
         plt.suptitle("Spreiding van woningen per woningtype", fontsize=14)
         plt.savefig("woningtype_dist.png")
         plt.close()