diff --git a/config/config.default.yaml b/config/config.default.yaml
index ffebff647..a15676251 100644
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@@ -870,6 +870,12 @@ costs:
 
 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#clustering
 clustering:
+  mode: admin # buses, admin, custom_admin_map, custom_busmap
+  admin:
+    level: 1 # Set general level of administrative division, e.g. 0 for countries, 1 for regions, specify individual countries if needed
+    DE: 3
+    FR: 2
+    ES: 0
   focus_weights: false
   simplify_network:
     to_substations: false
diff --git a/doc/data-bundle.rst b/doc/data-bundle.rst
index f859cd5a3..230a81ea0 100644
--- a/doc/data-bundle.rst
+++ b/doc/data-bundle.rst
@@ -33,13 +33,6 @@ scope to reduce file size, or are not provided through stable URLs elsewhere.
 - **License:** `custom <https://ec.europa.eu/eurostat/about-us/policies/copyright>`__
 - **Description:** Average annual population to calculate regional GDP data (thousand persons) by NUTS 3 regions.
 
-``data/bundle/nama_10r_3gdp.tsv.gz``
-
-- **Source:** Eurostat
-- **Link:** https://ec.europa.eu/eurostat/databrowser/view/nama_10r_3gdp/default/table?lang=en
-- **License:** `custom <https://ec.europa.eu/eurostat/about-us/policies/copyright>`__
-- **Description:** Gross domestic product (GDP) at current market prices by NUTS 3 regions.
-
 ``data/bundle/corine``
 
 - **Source:** European Environment Agency (EEA)
@@ -94,7 +87,7 @@ scope to reduce file size, or are not provided through stable URLs elsewhere.
 - **License:** CC0 (`reference <https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0>`__)
 - **Description:** Gridded GDP data.
 
-``data/bundle/ppp_2013_1km_Aggregated.tif``
+``data/bundle/ppp_2019_1km_Aggregated.tif``
 
 - **Source:** WorldPop (www.worldpop.org - School of Geography and Environmental
   Science, University of Southampton; Department of Geography and Geosciences,
@@ -104,5 +97,5 @@ scope to reduce file size, or are not provided through stable URLs elsewhere.
   Funded by The Bill and Melinda Gates Foundation (OPP1134076).
   https://dx.doi.org/10.5258/SOTON/WP00647
 - **Link:** https://hub.worldpop.org/doi/10.5258/SOTON/WP00647
-- **License:** CC-BY 4.0 (`reference <https://hub.worldpop.org/geodata/summary?id=24770>`__)
+- **License:** CC-BY 4.0 (`reference <https://hub.worldpop.org/geodata/summary?id=24776>`__)
 - **Description:** Gridded population data.
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index e931365ce..63972da76 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -115,6 +115,8 @@ Upcoming Release
 
 * Update locations and capacities of ammonia plants.
 
+* Updating all base shapes (country_shapes, europe_shape, nuts3_shapes, ...). The workflow has been modified to use higher resolution and more harmonised shapes (NUTS3 2021 01M data and OSM administration level 1 for non-NUTS3 countries, such as BA, MD, UA, and XK). Data sources for population and GDP p.c. have been updated to JRC ARDECO https://urban.jrc.ec.europa.eu/ardeco/ -- 2019 values are used. `build_gdp_pop_non_nuts3` (originally created to build regional GDP p.c. and population data for MD and UA) is now integrated into `build_shapes` and extended to build regional values for all non-NUTS3 countries using cutouts of the updated datasets `GDP_per_capita_PPP_1990_2015_v2.nc` and `ppp_2019_1km_Aggregated.tif`,
+
 
 PyPSA-Eur 0.13.0 (13th September 2024)
 ======================================
diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk
index 533429d9a..1b885c532 100755
--- a/rules/build_electricity.smk
+++ b/rules/build_electricity.smk
@@ -98,17 +98,45 @@ rule base_network:
         "../scripts/base_network.py"
 
 
+rule build_osm_boundaries:
+    input:
+        json="data/osm-boundaries/json/{country}_adm1.json",
+        eez=ancient("data/eez/World_EEZ_v12_20231025_LR/eez_v12_lowres.gpkg"),
+    output:
+        boundary="data/osm-boundaries/build/{country}_adm1.geojson",
+    log:
+        "logs/build_osm_boundaries_{country}.log",
+    threads: 1
+    resources:
+        mem_mb=1500,
+    conda:
+        "../envs/environment.yaml"
+    script:
+        "../scripts/build_osm_boundaries.py"
+
+
+rule build_osm_boundaries_all:
+    input:
+        "data/osm-boundaries/build/MD_adm1.geojson",
+        "data/osm-boundaries/build/BA_adm1.geojson",
+        "data/osm-boundaries/build/UA_adm1.geojson",
+        "data/osm-boundaries/build/XK_adm1.geojson",
+
+
 rule build_shapes:
     params:
         countries=config_provider("countries"),
     input:
-        naturalearth=ancient("data/naturalearth/ne_10m_admin_0_countries_deu.shp"),
         eez=ancient("data/eez/World_EEZ_v12_20231025_LR/eez_v12_lowres.gpkg"),
-        nuts3=ancient("data/nuts/NUTS_RG_03M_2013_4326_LEVL_3.geojson"),
-        nuts3pop=ancient("data/bundle/nama_10r_3popgdp.tsv.gz"),
-        nuts3gdp=ancient("data/bundle/nama_10r_3gdp.tsv.gz"),
-        ch_cantons=ancient("data/ch_cantons.csv"),
-        ch_popgdp=ancient("data/bundle/je-e-21.03.02.xls"),
+        nuts3_2021="data/nuts/NUTS_RG_01M_2021_4326_LEVL_3.geojson",
+        ba_adm1="data/osm-boundaries/build/BA_adm1.geojson",
+        md_adm1="data/osm-boundaries/build/MD_adm1.geojson",
+        ua_adm1="data/osm-boundaries/build/UA_adm1.geojson",
+        xk_adm1="data/osm-boundaries/build/XK_adm1.geojson",
+        nuts3_gdp="data/jrc-ardeco/ARDECO-SUVGDP.2021.table.csv",
+        nuts3_pop="data/jrc-ardeco/ARDECO-SNPTD.2021.table.csv",
+        other_gdp="data/sandbox/GDP_per_capita_PPP_1990_2015_v2.nc",  # TODO: update links to data/bundle after data bundle update
+        other_pop="data/sandbox/ppp_2019_1km_Aggregated.tif",  # TODO: update links to data/bundle after data bundle update
     output:
         country_shapes=resources("country_shapes.geojson"),
         offshore_shapes=resources("offshore_shapes.geojson"),
@@ -472,42 +500,10 @@ def input_conventional(w):
     }
 
 
-# Optional input when having Ukraine (UA) or Moldova (MD) in the countries list
-def input_gdp_pop_non_nuts3(w):
-    countries = set(config_provider("countries")(w))
-    if {"UA", "MD"}.intersection(countries):
-        return {"gdp_pop_non_nuts3": resources("gdp_pop_non_nuts3.geojson")}
-    return {}
-
-
-rule build_gdp_pop_non_nuts3:
-    params:
-        countries=config_provider("countries"),
-    input:
-        base_network=resources("networks/base_s.nc"),
-        regions=resources("regions_onshore_base_s.geojson"),
-        gdp_non_nuts3="data/bundle/GDP_per_capita_PPP_1990_2015_v2.nc",
-        pop_non_nuts3="data/bundle/ppp_2013_1km_Aggregated.tif",
-    output:
-        resources("gdp_pop_non_nuts3.geojson"),
-    log:
-        logs("build_gdp_pop_non_nuts3.log"),
-    benchmark:
-        benchmarks("build_gdp_pop_non_nuts3")
-    threads: 1
-    resources:
-        mem_mb=8000,
-    conda:
-        "../envs/environment.yaml"
-    script:
-        "../scripts/build_gdp_pop_non_nuts3.py"
-
-
 rule build_electricity_demand_base:
     params:
         distribution_key=config_provider("load", "distribution_key"),
     input:
-        unpack(input_gdp_pop_non_nuts3),
         base_network=resources("networks/base_s.nc"),
         regions=resources("regions_onshore_base_s.geojson"),
         nuts3=resources("nuts3_shapes.geojson"),
@@ -593,6 +589,8 @@ def input_cluster_network(w):
 
 rule cluster_network:
     params:
+        mode=config_provider("clustering", "mode"),
+        admin=config_provider("clustering", "admin"),
         cluster_network=config_provider("clustering", "cluster_network"),
         aggregation_strategies=config_provider(
             "clustering", "aggregation_strategies", default={}
@@ -608,9 +606,9 @@ rule cluster_network:
     input:
         unpack(input_cluster_network),
         network=resources("networks/base_s.nc"),
+        nuts3_shapes=resources("nuts3_shapes.geojson"),
         regions_onshore=resources("regions_onshore_base_s.geojson"),
         regions_offshore=resources("regions_offshore_base_s.geojson"),
-        busmap=ancient(resources("busmap_base_s.csv")),
         hac_features=lambda w: (
             resources("hac_features.nc")
             if config_provider("clustering", "cluster_network", "algorithm")(w)
diff --git a/rules/build_sector.smk b/rules/build_sector.smk
index 087ed1fb0..523ae58f0 100755
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@@ -468,6 +468,9 @@ rule build_heat_totals:
         "../scripts/build_heat_totals.py"
 
 
+# TODO: update long-term to only need NUTS2021, nuts3_shapes.geojson
+# However: requires manual updating of NUTS code changes 2016 to 2021:
+# https://ec.europa.eu/eurostat/web/nuts/history
 rule build_biomass_potentials:
     params:
         biomass=config_provider("biomass"),
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index f34daef95..6a66ac360 100755
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -18,7 +18,6 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle",
     datafiles = [
         "je-e-21.03.02.xls",
         "nama_10r_3popgdp.tsv.gz",
-        "nama_10r_3gdp.tsv.gz",
         "corine/g250_clc06_V18_5.tif",
         "eea/UNFCCC_v23.csv",
         "emobility/KFZ__count",
@@ -26,8 +25,8 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle",
         "h2_salt_caverns_GWh_per_sqkm.geojson",
         "natura/natura.tiff",
         "gebco/GEBCO_2014_2D.nc",
-        "GDP_per_capita_PPP_1990_2015_v2.nc",
-        "ppp_2013_1km_Aggregated.tif",
+        "GDP_per_capita_PPP_1990_2015_v2.nc",  # TODO: update
+        "ppp_2013_1km_Aggregated.tif",  # TODO: update
     ]
 
     rule retrieve_databundle:
@@ -77,7 +76,7 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle",
 
 if config["enable"]["retrieve"]:
 
-    rule retrieve_nuts_shapes:
+    rule retrieve_nuts_2013_shapes:
         input:
             shapes=storage(
                 "https://gisco-services.ec.europa.eu/distribution/v2/nuts/download/ref-nuts-2013-03m.geojson.zip"
@@ -101,6 +100,34 @@ if config["enable"]["retrieve"]:
 
 
 
+if config["enable"]["retrieve"]:
+
+    rule retrieve_nuts_2021_shapes:
+        input:
+            shapes=storage(
+                "https://gisco-services.ec.europa.eu/distribution/v2/nuts/download/ref-nuts-2021-01m.geojson.zip"
+            ),
+        output:
+            shapes_level_3="data/nuts/NUTS_RG_01M_2021_4326_LEVL_3.geojson",
+            shapes_level_2="data/nuts/NUTS_RG_01M_2021_4326_LEVL_2.geojson",
+            shapes_level_1="data/nuts/NUTS_RG_01M_2021_4326_LEVL_1.geojson",
+            shapes_level_0="data/nuts/NUTS_RG_01M_2021_4326_LEVL_0.geojson",
+        params:
+            zip_file="data/nuts/ref-nuts-2021-01m.geojson.zip",
+        run:
+            os.rename(input.shapes, params.zip_file)
+            with ZipFile(params.zip_file, "r") as zip_ref:
+                for level in ["LEVL_3", "LEVL_2", "LEVL_1", "LEVL_0"]:
+                    filename = f"NUTS_RG_01M_2021_4326_{level}.geojson"
+                    zip_ref.extract(filename, Path(output.shapes_level_0).parent)
+                    extracted_file = Path(output.shapes_level_0).parent / filename
+                    extracted_file.rename(
+                        getattr(output, f"shapes_level_{level[-1]}")
+                    )
+            os.remove(params.zip_file)
+
+
+
 if config["enable"]["retrieve"] and config["enable"].get("retrieve_cutout", True):
 
     rule retrieve_cutout:
@@ -371,27 +398,6 @@ if config["enable"]["retrieve"]:
 
 
 
-if config["enable"]["retrieve"]:
-
-    # Download directly from naciscdn.org which is a redirect from naturalearth.com
-    # (https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/)
-    # Use point-of-view (POV) variant of Germany so that Crimea is included.
-    rule retrieve_naturalearth_countries:
-        input:
-            storage(
-                "https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_deu.zip"
-            ),
-        params:
-            zip="data/naturalearth/ne_10m_admin_0_countries_deu.zip",
-        output:
-            countries="data/naturalearth/ne_10m_admin_0_countries_deu.shp",
-        run:
-            move(input[0], params["zip"])
-            output_folder = Path(output["countries"]).parent
-            unpack_archive(params["zip"], output_folder)
-            os.remove(params["zip"])
-
-
 if config["enable"]["retrieve"]:
 
     rule retrieve_gem_europe_gas_tracker:
@@ -629,6 +635,20 @@ if config["enable"]["retrieve"] and (
             ),
 
 
+if config["enable"]["retrieve"]:
+
+    rule retrieve_osm_boundaries:
+        output:
+            json="data/osm-boundaries/json/{country}_adm1.json",
+        log:
+            "logs/retrieve_osm_boundaries_{country}_adm1.log",
+        threads: 1
+        conda:
+            "../envs/retrieve.yaml"
+        script:
+            "../scripts/retrieve_osm_boundaries.py"
+
+
 if config["enable"]["retrieve"]:
 
     rule retrieve_heat_source_utilisation_potentials:
@@ -645,3 +665,48 @@ if config["enable"]["retrieve"]:
             "data/heat_source_utilisation_potentials/{heat_source}.gpkg",
         script:
             "../scripts/retrieve_heat_source_utilisation_potentials.py"
+
+
+if config["enable"]["retrieve"]:
+
+    rule retrieve_jrc_ardeco:
+        output:
+            ardeco_gdp="data/jrc-ardeco/ARDECO-SUVGDP.2021.table.csv",
+            ardeco_pop="data/jrc-ardeco/ARDECO-SNPTD.2021.table.csv",
+        run:
+            import requests
+
+            urls = {
+                "ardeco_gdp": "https://urban.jrc.ec.europa.eu/ardeco-api-v2/rest/export/SUVGDP?version=2021&format=csv-table",
+                "ardeco_pop": "https://urban.jrc.ec.europa.eu/ardeco-api-v2/rest/export/SNPTD?version=2021&format=csv-table",
+            }
+
+            for key, url in urls.items():
+                response = requests.get(url)
+                output_path = output[key] if key in urls else None
+                if output_path:
+                    with open(output_path, "wb") as f:
+                        f.write(response.content)
+
+
+
+# TODO: Remove before merging into master and after updating databundle on Zenodo:
+if config["enable"]["retrieve"]:
+
+    rule retrieve_sandbox_data:
+        input:
+            gdp_non_nuts3=storage(
+                "https://github.com/bobbyxng/sandbox/raw/refs/heads/main/data/GDP_per_capita_PPP_1990_2015_v2.nc",
+                keep_local=True,
+            ),
+            pop_non_nuts_3=storage(
+                "https://github.com/bobbyxng/sandbox/raw/refs/heads/main/data/ppp_2019_1km_Aggregated.tif",
+                keep_local=True,
+            ),
+        output:
+            gdp_non_nuts3="data/sandbox/GDP_per_capita_PPP_1990_2015_v2.nc",
+            pop_non_nuts_3="data/sandbox/ppp_2019_1km_Aggregated.tif",
+        retries: 1
+        run:
+            for key in input.keys():
+                move(input[key], output[key])
diff --git a/scripts/build_electricity_demand_base.py b/scripts/build_electricity_demand_base.py
index d999d6f3b..192faf570 100644
--- a/scripts/build_electricity_demand_base.py
+++ b/scripts/build_electricity_demand_base.py
@@ -44,7 +44,6 @@ def upsample_load(
     regions_fn: str,
     load_fn: str,
     nuts3_fn: str,
-    gdp_pop_non_nuts3_fn: str,
     distribution_key: dict[str, float],
 ) -> pd.DataFrame:
     substation_lv_i = n.buses.index[n.buses["substation_lv"]]
@@ -61,19 +60,7 @@ def upsample_load(
     for cntry, group in gdf_regions.geometry.groupby(gdf_regions.country):
         load_ct = load[cntry]
 
-        if cntry in ["UA", "MD"]:
-            # separate handling because nuts3 provides no data for UA+MD
-            gdp_pop_non_nuts3 = gpd.read_file(gdp_pop_non_nuts3_fn).set_index("Bus")
-            gdp_pop_non_nuts3 = gdp_pop_non_nuts3.loc[
-                (gdp_pop_non_nuts3.country == cntry)
-                & (gdp_pop_non_nuts3.index.isin(substation_lv_i))
-            ]
-            factors = normed(
-                gdp_weight * normed(gdp_pop_non_nuts3["gdp"])
-                + pop_weight * normed(gdp_pop_non_nuts3["pop"])
-            )
-
-        elif len(group) == 1:
+        if len(group) == 1:
             factors = pd.Series(1.0, index=group.index)
 
         else:
@@ -116,7 +103,6 @@ def upsample_load(
         regions_fn=snakemake.input.regions,
         load_fn=snakemake.input.load,
         nuts3_fn=snakemake.input.nuts3,
-        gdp_pop_non_nuts3_fn=snakemake.input.get("gdp_pop_non_nuts3"),
         distribution_key=params.distribution_key,
     )
 
diff --git a/scripts/build_gdp_pop_non_nuts3.py b/scripts/build_gdp_pop_non_nuts3.py
deleted file mode 100644
index 79d141cc7..000000000
--- a/scripts/build_gdp_pop_non_nuts3.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# SPDX-FileCopyrightText: Contributors to PyPSA-Eur <https://github.com/pypsa/pypsa-eur>
-#
-# SPDX-License-Identifier: MIT
-"""
-Maps the per-capita GDP and population values to non-NUTS3 regions.
-
-The script takes as input the country code, a GeoDataFrame containing
-the regions, and the file paths to the datasets containing the GDP and
-POP values for non-NUTS3 countries.
-"""
-
-import logging
-
-import geopandas as gpd
-import numpy as np
-import pandas as pd
-import pypsa
-import rasterio
-import xarray as xr
-from _helpers import configure_logging, set_scenario_config
-from rasterio.mask import mask
-from shapely.geometry import box
-
-logger = logging.getLogger(__name__)
-
-
-def calc_gdp_pop(country, regions, gdp_non_nuts3, pop_non_nuts3):
-    """
-    Calculate the GDP p.c. and population values for non NUTS3 regions.
-
-    Parameters
-    ----------
-    country (str): The two-letter country code of the non-NUTS3 region.
-    regions (GeoDataFrame): A GeoDataFrame containing the regions.
-    gdp_non_nuts3 (str): The file path to the dataset containing the GDP p.c values
-    for non NUTS3 countries (e.g. MD, UA)
-    pop_non_nuts3 (str): The file path to the dataset containing the POP values
-    for non NUTS3 countries (e.g. MD, UA)
-
-    Returns
-    -------
-    tuple: A tuple containing two GeoDataFrames:
-        - gdp: A GeoDataFrame with the mean GDP p.c. values mapped to each bus.
-        - pop: A GeoDataFrame with the summed POP values mapped to each bus.
-    """
-    regions = regions.rename(columns={"name": "Bus"}).set_index("Bus")
-    regions = regions[regions.country == country]
-    # Create a bounding box for UA, MD from region shape, including a buffer of 10000 metres
-    bounding_box = (
-        gpd.GeoDataFrame(geometry=[box(*regions.total_bounds)], crs=regions.crs)
-        .to_crs(epsg=3857)
-        .buffer(10000)
-        .to_crs(regions.crs)
-    )
-
-    # GDP Mapping
-    logger.info(f"Mapping mean GDP p.c. to non-NUTS3 region: {country}")
-    with xr.open_dataset(gdp_non_nuts3) as src_gdp:
-        src_gdp = src_gdp.where(
-            (src_gdp.longitude >= bounding_box.bounds.minx.min())
-            & (src_gdp.longitude <= bounding_box.bounds.maxx.max())
-            & (src_gdp.latitude >= bounding_box.bounds.miny.min())
-            & (src_gdp.latitude <= bounding_box.bounds.maxy.max()),
-            drop=True,
-        )
-        gdp = src_gdp.to_dataframe().reset_index()
-    gdp = gdp.rename(columns={"GDP_per_capita_PPP": "gdp"})
-    gdp = gdp[gdp.time == gdp.time.max()]
-    gdp_raster = gpd.GeoDataFrame(
-        gdp,
-        geometry=gpd.points_from_xy(gdp.longitude, gdp.latitude),
-        crs="EPSG:4326",
-    )
-    gdp_mapped = gpd.sjoin(gdp_raster, regions, predicate="within")
-    gdp = (
-        gdp_mapped.copy()
-        .groupby(["Bus", "country"])
-        .agg({"gdp": "mean"})
-        .reset_index(level=["country"])
-    )
-
-    # Population Mapping
-    logger.info(f"Mapping summed population to non-NUTS3 region: {country}")
-    with rasterio.open(pop_non_nuts3) as src_pop:
-        # Mask the raster with the bounding box
-        out_image, out_transform = mask(src_pop, bounding_box, crop=True)
-        out_meta = src_pop.meta.copy()
-        out_meta.update(
-            {
-                "driver": "GTiff",
-                "height": out_image.shape[1],
-                "width": out_image.shape[2],
-                "transform": out_transform,
-            }
-        )
-    masked_data = out_image[0]  # Use the first band (rest is empty)
-    row_indices, col_indices = np.where(masked_data != src_pop.nodata)
-    values = masked_data[row_indices, col_indices]
-
-    # Affine transformation from pixel coordinates to geo coordinates
-    x_coords, y_coords = rasterio.transform.xy(out_transform, row_indices, col_indices)
-    pop_raster = pd.DataFrame({"x": x_coords, "y": y_coords, "pop": values})
-    pop_raster = gpd.GeoDataFrame(
-        pop_raster,
-        geometry=gpd.points_from_xy(pop_raster.x, pop_raster.y),
-        crs=src_pop.crs,
-    )
-    pop_mapped = gpd.sjoin(pop_raster, regions, predicate="within")
-    pop = (
-        pop_mapped.groupby(["Bus", "country"])
-        .agg({"pop": "sum"})
-        .reset_index()
-        .set_index("Bus")
-    )
-    gdp_pop = regions.join(gdp.drop(columns="country"), on="Bus").join(
-        pop.drop(columns="country"), on="Bus"
-    )
-    gdp_pop.fillna(0, inplace=True)
-
-    return gdp_pop
-
-
-if __name__ == "__main__":
-    if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
-        snakemake = mock_snakemake(
-            "build_gdp_pop_non_nuts3", configfiles=["config/config.osm-raw.yaml"]
-        )
-    configure_logging(snakemake)
-    set_scenario_config(snakemake)
-
-    n = pypsa.Network(snakemake.input.base_network)
-    regions = gpd.read_file(snakemake.input.regions)
-
-    gdp_non_nuts3 = snakemake.input.gdp_non_nuts3
-    pop_non_nuts3 = snakemake.input.pop_non_nuts3
-
-    subset = {"MD", "UA"}.intersection(snakemake.params.countries)
-
-    gdp_pop = pd.concat(
-        [
-            calc_gdp_pop(country, regions, gdp_non_nuts3, pop_non_nuts3)
-            for country in subset
-        ],
-        axis=0,
-    )
-
-    logger.info(
-        f"Exporting GDP and POP values for non-NUTS3 regions {snakemake.output}"
-    )
-    gdp_pop.reset_index().to_file(snakemake.output[0], driver="GeoJSON")
diff --git a/scripts/build_osm_boundaries.py b/scripts/build_osm_boundaries.py
new file mode 100755
index 000000000..923b5416b
--- /dev/null
+++ b/scripts/build_osm_boundaries.py
@@ -0,0 +1,184 @@
+# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+import json
+import logging
+
+import country_converter as coco
+import geopandas as gpd
+import pandas as pd
+from _helpers import configure_logging, set_scenario_config
+from build_shapes import eez
+from shapely import line_merge
+from shapely.geometry import LineString, MultiLineString, MultiPolygon, Polygon
+
+logger = logging.getLogger(__name__)
+
+cc = coco.CountryConverter()
+
+GEO_CRS = "EPSG:4326"
+EXCLUDER_LIST = [
+    3788485,  # RU version of Sevastopol
+    3795586,  # RU version of Crimea
+]
+
+
+def _create_linestring(row):
+    """
+    Create a LineString object from the given row.
+
+    Parameters
+    ----------
+        row (dict): A dictionary containing the row data.
+
+    Returns
+    -------
+        LineString: A LineString object representing the geometry.
+    """
+    coords = [(coord["lon"], coord["lat"]) for coord in row["geometry"]]
+    return LineString(coords)
+
+
+def _create_geometries(row, crs=GEO_CRS):
+    """
+    Create geometries from OSM data.
+
+    This function processes OpenStreetMap (OSM) data to create geometries
+    based on the roles of the members in the data. It supports "outer" and
+    "inner" roles to construct polygons and multipolygons.
+
+    Parameters
+    ----------
+        - row (dict): A dictionary containing OSM data with a "members" key.
+        - crs (str, optional): Coordinate reference system for the geometries.
+          Defaults to GEO_CRS.
+
+    Returns
+    -------
+        - shapely.geometry.Polygon or shapely.geometry.MultiPolygon: The resulting
+          geometry after processing the OSM data.
+    """
+    valid_roles = ["outer", "inner"]
+    df = pd.json_normalize(row["members"])
+    df = df[df["role"].isin(valid_roles) & ~df["geometry"].isna()]
+    df.loc[:, "geometry"] = df.apply(_create_linestring, axis=1)
+
+    gdf = gpd.GeoDataFrame(df, geometry="geometry", crs=crs)
+    outer = line_merge(gdf[gdf["role"] == "outer"].union_all())
+
+    if isinstance(outer, LineString):
+        outer = Polygon(outer)
+    if isinstance(outer, MultiLineString):
+        polygons = [Polygon(line) for line in outer.geoms]
+        outer = MultiPolygon(polygons)
+
+    if not gdf[gdf["role"] == "inner"].empty:
+        inner = line_merge(gdf[gdf["role"] == "inner"].union_all())
+
+        if isinstance(inner, LineString):
+            inner = Polygon(inner)
+        if isinstance(inner, MultiLineString):
+            polygons = [Polygon(line) for line in inner.geoms]
+            inner = MultiPolygon(polygons)
+
+        outer = outer.difference(inner)
+
+    return outer
+
+
+def build_osm_boundaries(country, adm1_path, offshore_shapes):
+    """
+    Build administrative boundaries from OSM data for a given country.
+
+    Parameters
+    ----------
+        - country (str): The country code (e.g., 'DE' for Germany).
+        - adm1_path (str): The file path to the administrative level 1 OSM data in JSON format.
+        - offshore_shapes (GeoDataFrame): A GeoDataFrame containing offshore shapes to clip the boundaries.
+
+    Returns
+    -------
+    GeoDataFrame: A GeoDataFrame containing the administrative boundaries with columns:
+        - id: The administrative level 1 code.
+        - country: The country code.
+        - name: The administrative level 1 name.
+        - osm_id: The OSM ID.
+        - geometry: The geometry of the boundary.
+    """
+    logger.info(
+        f"Building administrative boundaries from OSM data for country {country}."
+    )
+
+    df = json.load(open(adm1_path))
+    df = pd.DataFrame(df["elements"])
+
+    # Filter out ids in excluder list
+    df = df[~df["id"].isin(EXCLUDER_LIST)]
+
+    df.loc[:, "geometry"] = df.apply(_create_geometries, axis=1)
+
+    col_tags = [
+        "ISO3166-2",
+        "name:en",
+    ]
+
+    tags = pd.json_normalize(df["tags"]).map(lambda x: str(x) if pd.notnull(x) else x)
+
+    for ct in col_tags:
+        if ct not in tags.columns:
+            tags[ct] = pd.NA
+
+    tags = tags.loc[:, col_tags]
+
+    df = pd.concat([df, tags], axis="columns")
+    df.drop(columns=["type", "tags", "bounds", "members"], inplace=True)
+
+    # Rename columns
+    df.rename(
+        columns={
+            "id": "osm_id",
+            "ISO3166-2": "id",
+            "name:en": "name",
+        },
+        inplace=True,
+    )
+
+    df["country"] = country
+
+    # Resort columns
+    df = df[["id", "country", "name", "osm_id", "geometry"]]
+
+    # If any id is missing, sort by osm_id and number starting from 1
+    if df["id"].isna().any():
+        df.sort_values(by="osm_id", inplace=True)
+        df["id"] = country + "-" + df.index.to_series().add(1).astype(str)
+
+    gdf = gpd.GeoDataFrame(df, geometry="geometry", crs=GEO_CRS)
+
+    # Clip gdf by offshore shapes
+    gdf = gpd.overlay(gdf, offshore_shapes, how="difference")
+
+    # Check if substring in "id" is equal to country, if not, drop
+    gdf = gdf[gdf["id"].str.startswith(country)]
+
+    return gdf
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("build_osm_boundaries", country="MD")
+
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    country = snakemake.wildcards.country
+    adm1_path = snakemake.input.json
+    offshore_shapes = eez(snakemake.input.eez)
+
+    boundaries = build_osm_boundaries(country, adm1_path, offshore_shapes)
+
+    # Export
+    boundaries.to_file(snakemake.output[0], driver="GeoJSON")
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 099e77931..405ef7721 100755
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -4,71 +4,11 @@
 """
 Creates GIS shape files of the countries, exclusive economic zones and `NUTS3 <
 https://en.wikipedia.org/wiki/Nomenclature_of_Territorial_Units_for_Statistics>
-`_ areas.
-
-Relevant Settings
------------------
-
-.. code:: yaml
-
-    countries:
-
-.. seealso::
-    Documentation of the configuration file ``config/config.yaml`` at
-    :ref:`toplevel_cf`
-
-Inputs
-------
-
-- ``data/bundle/naturalearth/ne_10m_admin_0_countries.shp``: World country shapes
-
-    .. image:: img/countries.png
-        :scale: 33 %
-
-- ``data/eez/World_EEZ_v12_20231025_gpkg/eez_v12.gpkg   ``: World `exclusive economic zones <https://en.wikipedia.org/wiki/Exclusive_economic_zone>`_ (EEZ)
-
-    .. image:: img/eez.png
-        :scale: 33 %
-
-- ``data/bundle/NUTS_2013_60M_SH/data/NUTS_RG_60M_2013.shp``: Europe NUTS3 regions
-
-    .. image:: img/nuts3.png
-        :scale: 33 %
-
-- ``data/bundle/nama_10r_3popgdp.tsv.gz``: Average annual population by NUTS3 region (`eurostat <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=nama_10r_3popgdp&lang=en>`__)
-- ``data/bundle/nama_10r_3gdp.tsv.gz``: Gross domestic product (GDP) by NUTS 3 regions (`eurostat <http://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=nama_10r_3gdp&lang=en>`__)
-- ``data/ch_cantons.csv``: Mapping between Swiss Cantons and NUTS3 regions
-- ``data/bundle/je-e-21.03.02.xls``: Population and GDP data per Canton (`BFS - Swiss Federal Statistical Office <https://www.bfs.admin.ch/bfs/en/home/news/whats-new.assetdetail.7786557.html>`_ )
-
-Outputs
--------
-
-- ``resources/country_shapes.geojson``: country shapes out of country selection
-
-    .. image:: img/country_shapes.png
-        :scale: 33 %
-
-- ``resources/offshore_shapes.geojson``: EEZ shapes out of country selection
-
-    .. image:: img/offshore_shapes.png
-        :scale: 33 %
-
-- ``resources/europe_shape.geojson``: Shape of Europe including countries and EEZ
-
-    .. image:: img/europe_shape.png
-        :scale: 33 %
-
-- ``resources/nuts3_shapes.geojson``: NUTS3 shapes out of country selection including population and GDP data.
-
-    .. image:: img/nuts3_shapes.png
-        :scale: 33 %
-
-Description
------------
+`_ and OSM ADM1 areas (for BA, MD, UA, and XK).
 """
 
 import logging
-from functools import reduce
+import unicodedata
 from itertools import takewhile
 from operator import attrgetter
 
@@ -76,19 +16,119 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
+import rasterio
+import xarray as xr
 from _helpers import configure_logging, set_scenario_config
-from shapely.geometry import MultiPolygon, Polygon
+from rasterio.mask import mask
+from shapely.geometry import MultiPolygon, Polygon, box
 
 logger = logging.getLogger(__name__)
-
 cc = coco.CountryConverter()
 
 
-def _simplify_polys(polys, minarea=0.1, tolerance=None, filterremote=True):
+GEO_CRS = "EPSG:4326"
+DISTANCE_CRS = "EPSG:3035"
+GDP_YEAR = 2019
+POP_YEAR = 2019
+EUROPE_COUNTRIES = [
+    "AL",
+    "AT",
+    "BA",
+    "BE",
+    "BG",
+    "CH",
+    "CZ",
+    "DE",
+    "DK",
+    "EE",
+    "ES",
+    "FI",
+    "FR",
+    "GB",
+    "GR",
+    "HR",
+    "HU",
+    "IE",
+    "IT",
+    "LT",
+    "LU",
+    "LV",
+    "ME",
+    "MK",
+    "NL",
+    "NO",
+    "PL",
+    "PT",
+    "RO",
+    "RS",
+    "SE",
+    "SI",
+    "SK",
+    "XK",
+    "UA",
+    "MD",
+]
+DROP_REGIONS = [
+    "ES703",
+    "ES704",
+    "ES705",
+    "ES706",
+    "ES707",
+    "ES708",
+    "ES709",
+    "ES630",
+    "ES640",
+    "FRY10",
+    "FRY20",
+    "FRY30",
+    "FRY40",
+    "FRY50",
+    "NO0B1",
+    "NO0B2",
+    "PT200",
+    "PT300",
+]
+OTHER_GDP_TOTAL_2019 = {  # in bn. USD
+    "BA": 20.48,  # World Bank
+    "MD": 11.74,  # World Bank
+    "UA": 153.9,  # World Bank
+    "XK": 7.9,  # https://de.statista.com/statistik/daten/studie/415738/umfrage/bruttoinlandsprodukt-bip-des-kosovo/
+}
+OTHER_POP_2019 = {  # in 1000 persons
+    "BA": 3361,  # World Bank
+    "MD": 2664,  # World Bank
+    "UA": 44470,  # World Bank
+    "XK": 1782,  # World Bank
+}
+EXCHANGE_EUR_USD_2019 = 1.1
+NUTS3_INCLUDE = [
+    "DE80N",
+    "DEF08",
+    "DK014",
+    "DK050",
+    "GBH34",
+    "GBJ34",
+    "GBJ43",
+    "NL33A",
+    "NL33C",
+    "NL342",
+    "NL411",
+    "NL412",
+    "PL428",
+]
+
+
+def _simplify_polys(
+    polys, minarea=100 * 1e6, maxdistance=None, tolerance=None, filterremote=True
+):  # 100*1e6 = 100 km² if CRS is DISTANCE_CRS
     if isinstance(polys, MultiPolygon):
         polys = sorted(polys.geoms, key=attrgetter("area"), reverse=True)
         mainpoly = polys[0]
         mainlength = np.sqrt(mainpoly.area / (2.0 * np.pi))
+
+        if maxdistance is not None:
+            mainlength = maxdistance
+
         if mainpoly.area > minarea:
             polys = MultiPolygon(
                 [
@@ -104,32 +144,14 @@ def _simplify_polys(polys, minarea=0.1, tolerance=None, filterremote=True):
     return polys
 
 
-def countries(naturalearth, country_list):
-    df = gpd.read_file(naturalearth)
-
-    # Names are a hassle in naturalearth, try several fields
-    fieldnames = (
-        df[x].where(lambda s: s != "-99") for x in ("ISO_A2", "WB_A2", "ADM0_A3")
-    )
-    df["name"] = reduce(lambda x, y: x.fillna(y), fieldnames, next(fieldnames)).str[:2]
-    df.replace({"name": {"KV": "XK"}}, inplace=True)
-
-    df = df.loc[
-        df.name.isin(country_list) & ((df["scalerank"] == 0) | (df["scalerank"] == 5))
-    ]
-    s = df.set_index("name")["geometry"].map(_simplify_polys).set_crs(df.crs)
-
-    return s
-
-
-def eez(eez, country_list):
+def eez(eez, country_list=EUROPE_COUNTRIES):
     df = gpd.read_file(eez)
     iso3_list = cc.convert(country_list, src="ISO2", to="ISO3")  # noqa: F841
     pol_type = ["200NM", "Overlapping claim"]  # noqa: F841
     df = df.query("ISO_TER1 in @iso3_list and POL_TYPE in @pol_type").copy()
     df["name"] = cc.convert(df.ISO_TER1, src="ISO3", to="ISO2")
     s = df.set_index("name").geometry.map(
-        lambda s: _simplify_polys(s, filterremote=False)
+        lambda s: _simplify_polys(s, minarea=0.1, filterremote=False)
     )
     s = s.to_frame("geometry").set_crs(df.crs)
     s.index.name = "name"
@@ -146,96 +168,324 @@ def country_cover(country_shapes, eez_shapes=None):
     return Polygon(shell=europe_shape.exterior)
 
 
-def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
-    df = gpd.read_file(nuts3)
-    df["geometry"] = df["geometry"].map(_simplify_polys)
-    df = df.rename(columns={"NUTS_ID": "id"})[["id", "geometry"]].set_index("id")
+def normalise_text(text):
+    """
+    Removes diacritics from non-standard Latin letters, converts them to their
+    closest standard 26-letter Latin equivalents, and removes asterisks (*) from the text.
+
+    Args:
+        text (str): Input string to normalize.
+
+    Returns:
+        str: Normalized string with only standard Latin letters and no asterisks.
+    """
+    # Normalize Unicode to decompose characters (e.g., č -> c + ̌)
+    text = unicodedata.normalize("NFD", text)
+    # Remove diacritical marks by filtering out characters of the 'Mn' (Mark, Nonspacing) category
+    text = "".join(char for char in text if unicodedata.category(char) != "Mn")
+    # Remove asterisks
+    text = text.replace("*", "")
+    # Optionally, ensure only ASCII characters remain
+    text = "".join(char for char in text if char.isascii())
+    return text
+
+
+def simplify_europe(regions):
+    """
+    Simplifies the geometries of European regions by removing small islands and re-adding selected regions manually.
+
+    Parameters
+    ----------
+        regions (GeoDataFrame): A GeoDataFrame containing the geometries of European regions.
+
+    Returns
+    -------
+        regions (GeoDataFrame): A simplified GeoDataFrame with updated geometries and dropped entries.
+    """
+    logger.info(
+        "Simplifying geometries for Europe by removing small islands smaller than 500 km2 or further than 200 km away."
+    )
+    coverage = (
+        regions.to_crs(DISTANCE_CRS)
+        .groupby("country")["geometry"]
+        .apply(lambda x: x.union_all())
+    )
+    coverage_dk = coverage.loc[["DK"]]
+    coverage = coverage.apply(_simplify_polys, minarea=500 * 1e6, maxdistance=200 * 1e3)
+    coverage_dk = coverage_dk.apply(
+        _simplify_polys, minarea=65 * 1e6, maxdistance=200 * 1e3
+    )
+    coverage.loc["DK"] = coverage_dk.values[0]
+    coverage = gpd.GeoDataFrame(geometry=coverage, crs=DISTANCE_CRS).to_crs(GEO_CRS)
+
+    # Re-add selected regions manually
+    coverage = pd.concat([coverage, regions.loc[NUTS3_INCLUDE, ["geometry"]]])
+    shape = coverage.union_all()
+
+    regions_polygon = regions.explode()
+    regions_polygon = gpd.sjoin(
+        regions_polygon,
+        gpd.GeoDataFrame(geometry=[shape], crs=GEO_CRS),
+        how="inner",
+        predicate="intersects",
+    )
+
+    # Group by level3 and country, and aggregate by union, and name index "index"
+    regions_polygon = regions_polygon.groupby(["level3"])["geometry"].apply(
+        lambda x: x.union_all()
+    )
 
-    pop = pd.read_table(nuts3pop, na_values=[":"], delimiter=" ?\t", engine="python")
-    pop = (
-        pop.set_index(
-            pd.MultiIndex.from_tuples(pop.pop("unit,geo\\time").str.split(","))
-        )
-        .loc["THS"]
-        .map(lambda x: pd.to_numeric(x, errors="coerce"))
-        .bfill(axis=1)
-    )["2014"]
-
-    gdp = pd.read_table(nuts3gdp, na_values=[":"], delimiter=" ?\t", engine="python")
-    gdp = (
-        gdp.set_index(
-            pd.MultiIndex.from_tuples(gdp.pop("unit,geo\\time").str.split(","))
-        )
-        .loc["EUR_HAB"]
-        .map(lambda x: pd.to_numeric(x, errors="coerce"))
-        .bfill(axis=1)
-    )["2014"]
-
-    cantons = pd.read_csv(ch_cantons)
-    cantons = cantons.set_index(cantons["HASC"].str[3:])["NUTS"]
-    cantons = cantons.str.pad(5, side="right", fillchar="0")
-
-    swiss = pd.read_excel(ch_popgdp, skiprows=3, index_col=0)
-    swiss.columns = swiss.columns.to_series().map(cantons)
-
-    swiss_pop = pd.to_numeric(swiss.loc["Residents in 1000", "CH040":])
-    pop = pd.concat([pop, swiss_pop])
-    swiss_gdp = pd.to_numeric(
-        swiss.loc["Gross domestic product per capita in Swiss francs", "CH040":]
+    # Update regions
+    regions = regions.loc[regions.index.isin(regions_polygon.index)]
+    regions.loc[regions_polygon.index, "geometry"] = regions_polygon
+
+    return regions
+
+
+def create_regions(
+    country_list,
+    nuts3_path,
+    ba_adm1_path,
+    md_adm1_path,
+    ua_adm1_path,
+    xk_adm1_path,
+    offshore_shapes,
+    nuts3_gdp,
+    nuts3_pop,
+    other_gdp,
+    other_pop,
+):
+    """
+    Create regions by processing NUTS and non-NUTS geographical shapes.
+
+    Parameters
+    ----------
+        - country_list (list): List of country codes to include.
+        - nuts3_path (str): Path to the NUTS3 2021 shapefile.
+        - ba_adm1_path (str): Path to adm1 boundaries for Bosnia and  Herzegovina.
+        - md_adm1_path (str): Path to adm1 boundaries for Moldova.
+        - ua_adm1_path (str): Path to adm1 boundaries for Ukraine.
+        - xk_adm1_path (str): Path to adm1 boundaries for Kosovo.
+        - offshore_shapes (geopandas.GeoDataFrame): Geographical shapes of the exclusive economic zones.
+
+    Returns
+    -------
+        geopandas.GeoDataFrame: A GeoDataFrame containing the processed regions with columns:
+            - id: Region identifier.
+            - country: Country code.
+            - name: Region name.
+            - geometry: Geometrical shape of the region.
+            - level1: Level 1 region identifier.
+            - level2: Level 2 region identifier.
+            - level3: Level 3 region identifier.
+    """
+    # Prepare NUTS shapes
+    logger.info("Processing NUTS regions.")
+    regions = gpd.read_file(nuts3_path)
+    regions.loc[regions.CNTR_CODE == "EL", "CNTR_CODE"] = "GR"  # Rename "EL" to "GR
+    regions["NUTS_ID"] = regions["NUTS_ID"].str.replace("EL", "GR")
+    regions.loc[regions.CNTR_CODE == "UK", "CNTR_CODE"] = "GB"  # Rename "UK" to "GB"
+    regions["NUTS_ID"] = regions["NUTS_ID"].str.replace("UK", "GB")
+
+    # Create new df
+    regions = regions[["NUTS_ID", "CNTR_CODE", "NAME_LATN", "geometry"]]
+
+    # Rename columns and add level columns
+    regions = regions.rename(
+        columns={"NUTS_ID": "id", "CNTR_CODE": "country", "NAME_LATN": "name"}
+    )
+
+    # Normalise text
+    regions["id"] = regions["id"].apply(normalise_text)
+
+    regions["level1"] = regions["id"].str[:3]
+    regions["level2"] = regions["id"].str[:4]
+    regions["level3"] = regions["id"]
+
+    # Non NUTS countries
+    logger.info("Processing non-NUTS regions.")
+
+    ba_adm1 = gpd.read_file(ba_adm1_path)
+    md_adm1 = gpd.read_file(md_adm1_path)
+    ua_adm1 = gpd.read_file(ua_adm1_path)
+    xk_adm1 = gpd.read_file(xk_adm1_path)
+
+    regions_non_nuts = pd.concat([ba_adm1, md_adm1, ua_adm1, xk_adm1])
+    regions_non_nuts = regions_non_nuts.drop(columns=["osm_id"])
+
+    # Normalise text
+    regions_non_nuts["id"] = regions_non_nuts["id"].apply(normalise_text)
+    regions_non_nuts["name"] = regions_non_nuts["name"].apply(normalise_text)
+
+    # Add level columns
+    regions_non_nuts["level1"] = regions_non_nuts["id"]
+    regions_non_nuts["level2"] = regions_non_nuts["id"]
+    regions_non_nuts["level3"] = regions_non_nuts["id"]
+
+    # Clip regions by non-NUTS shapes
+    regions["geometry"] = regions["geometry"].difference(
+        regions_non_nuts.geometry.union_all()
+    )
+
+    # Concatenate NUTS and non-NUTS regions
+    logger.info("Harmonising NUTS and non-NUTS regions.")
+    regions = pd.concat([regions, regions_non_nuts])
+    regions.set_index("id", inplace=True)
+
+    # Drop regions out of geographical scope
+    regions = regions.drop(DROP_REGIONS, errors="ignore")
+
+    # Clip regions by offshore shapes
+    logger.info("Clipping regions by offshore shapes.")
+    regions["geometry"] = regions["geometry"].difference(
+        offshore_shapes.geometry.union_all()
     )
-    gdp = pd.concat([gdp, swiss_gdp])
 
-    df = df.join(pd.DataFrame(dict(pop=pop, gdp=gdp)))
+    # GDP and POP for NUTS3 regions
+    # GDP
+    logger.info(f"Importing JRC ARDECO GDP data for year {GDP_YEAR}.")
+    nuts3_gdp = pd.read_csv(nuts3_gdp, index_col=[0])
+    nuts3_gdp = nuts3_gdp.query("LEVEL_ID == 3 and UNIT == 'EUR'")
+    nuts3_gdp.index = nuts3_gdp.index.str.replace("UK", "GB").str.replace("EL", "GR")
+    nuts3_gdp = nuts3_gdp[str(GDP_YEAR)]
+    regions["gdp"] = nuts3_gdp
+
+    # Population
+    logger.info(f"Importing JRC ARDECO population data for year {POP_YEAR}.")
+    nuts3_pop = pd.read_csv(nuts3_pop, index_col=[0])
+    nuts3_pop = nuts3_pop.query("LEVEL_ID == 3")
+    nuts3_pop.index = nuts3_pop.index.str.replace("UK", "GB").str.replace("EL", "GR")
+    nuts3_pop = nuts3_pop[str(POP_YEAR)]
+    regions["pop"] = nuts3_pop.div(1e3).round(0)
+
+    # GDP and POP for non-NUTS3 regions
+    other_countries = {"BA", "MD", "UA", "XK"}
+
+    if any(country in country_list for country in other_countries):
+        gdp_pop = pd.concat(
+            [
+                calc_gdp_pop(country, regions, other_gdp, other_pop)
+                for country in other_countries
+            ],
+            axis=0,
+        )
+
+        # Merge NUTS3 and non-NUTS3 regions
+        regions.loc[gdp_pop.index, ["gdp", "pop"]] = gdp_pop[["gdp", "pop"]]
 
-    df["country"] = (
-        df.index.to_series().str[:2].replace(dict(UK="GB", EL="GR", KV="XK"))
+    # Resort columns and rename index
+    regions = regions[
+        ["name", "level1", "level2", "level3", "gdp", "pop", "country", "geometry"]
+    ]
+    regions.index.name = "index"
+
+    # Simplify geometries
+    regions = simplify_europe(regions)
+
+    # Only include countries in the config
+    regions = regions.query("country in @country_list")
+
+    return regions
+
+
+def calc_gdp_pop(country, regions, gdp_non_nuts3, pop_non_nuts3):
+    """
+    Calculate the GDP p.c. and population values for non NUTS3 regions.
+
+    Parameters
+    ----------
+        - country (str): The two-letter country code of the non-NUTS3 region.
+        - regions (GeoDataFrame): A GeoDataFrame containing the regions.
+        - gdp_non_nuts3 (str): The file path to the dataset containing the GDP p.c values
+          for non NUTS3 countries (e.g. MD, UA)
+        - pop_non_nuts3 (str): The file path to the dataset containing the POP values
+        for non NUTS3 countries (e.g. MD, UA)
+
+    Returns
+    -------
+    tuple: A tuple containing two GeoDataFrames:
+        - gdp: A GeoDataFrame with the mean GDP p.c. values mapped to each bus.
+        - pop: A GeoDataFrame with the summed POP values mapped to each bus.
+    """
+    region = regions.loc[regions.country == country, ["geometry"]]
+    # Create a bounding box for UA, MD from region shape, including a buffer of 10000 metres
+    bounding_box = (
+        gpd.GeoDataFrame(geometry=[box(*region.total_bounds)], crs=region.crs)
+        .to_crs(epsg=3857)
+        .buffer(10000)
+        .to_crs(region.crs)
     )
 
-    excludenuts = pd.Index(
-        (
-            "FRA10",
-            "FRA20",
-            "FRA30",
-            "FRA40",
-            "FRA50",
-            "PT200",
-            "PT300",
-            "ES707",
-            "ES703",
-            "ES704",
-            "ES705",
-            "ES706",
-            "ES708",
-            "ES709",
-            "FI2",
-            "FR9",
+    # GDP Mapping
+    logger.info(f"Mapping mean GDP p.c. to non-NUTS3 region: {country}")
+    with xr.open_dataset(gdp_non_nuts3) as src_gdp:
+        src_gdp = src_gdp.where(
+            (src_gdp.longitude >= bounding_box.bounds.minx.min())
+            & (src_gdp.longitude <= bounding_box.bounds.maxx.max())
+            & (src_gdp.latitude >= bounding_box.bounds.miny.min())
+            & (src_gdp.latitude <= bounding_box.bounds.maxy.max()),
+            drop=True,
         )
+        gdp = src_gdp.to_dataframe().reset_index()
+    gdp = gdp.rename(columns={"GDP_per_capita_PPP": "gdp"})
+    gdp = gdp[gdp.time == gdp.time.max()]
+    gdp_raster = gpd.GeoDataFrame(
+        gdp,
+        geometry=gpd.points_from_xy(gdp.longitude, gdp.latitude),
+        crs="EPSG:4326",
     )
-    excludecountry = pd.Index(("MT", "TR", "LI", "IS", "CY"))
-
-    df = df.loc[df.index.difference(excludenuts)]
-    df = df.loc[~df.country.isin(excludecountry)]
-
-    manual = gpd.GeoDataFrame(
-        [
-            ["BA1", "BA", 3234.0],
-            ["RS1", "RS", 6664.0],
-            ["AL1", "AL", 2778.0],
-            ["XK1", "XK", 1587.0],
-        ],
-        columns=["NUTS_ID", "country", "pop"],
-        geometry=gpd.GeoSeries(),
-        crs=df.crs,
+    gdp_mapped = gpd.sjoin(gdp_raster, region, predicate="within")
+    gdp = gdp_mapped.groupby(["id"]).agg({"gdp": "mean"})
+
+    # Population Mapping
+    logger.info(f"Mapping summed population to non-NUTS3 region: {country}")
+    with rasterio.open(pop_non_nuts3) as src_pop:
+        # Mask the raster with the bounding box
+        out_image, out_transform = mask(src_pop, bounding_box, crop=True)
+        out_meta = src_pop.meta.copy()
+        out_meta.update(
+            {
+                "driver": "GTiff",
+                "height": out_image.shape[1],
+                "width": out_image.shape[2],
+                "transform": out_transform,
+            }
+        )
+    masked_data = out_image[0]  # Use the first band (rest is empty)
+    row_indices, col_indices = np.where(masked_data != src_pop.nodata)
+    values = masked_data[row_indices, col_indices]
+
+    # Affine transformation from pixel coordinates to geo coordinates
+    x_coords, y_coords = rasterio.transform.xy(out_transform, row_indices, col_indices)
+    pop_raster = pd.DataFrame({"x": x_coords, "y": y_coords, "pop": values})
+    pop_raster = gpd.GeoDataFrame(
+        pop_raster,
+        geometry=gpd.points_from_xy(pop_raster.x, pop_raster.y),
+        crs=src_pop.crs,
     )
-    manual["geometry"] = manual["country"].map(country_shapes.to_crs(df.crs))
-    manual = manual.dropna()
-    manual = manual.set_index("NUTS_ID")
+    pop_mapped = gpd.sjoin(pop_raster, region, predicate="within")
+    pop = pop_mapped.groupby(["id"]).agg({"pop": "sum"}).reset_index().set_index("id")
+    gdp_pop = region.join(gdp).join(pop).drop(columns="geometry")
+    gdp_pop.fillna(0, inplace=True)
 
-    df = pd.concat([df, manual], sort=False)
+    # Clean and rescale data to 2019 historical values
+    gdp_pop["gdp"] = gdp_pop["gdp"].round(0)
+    gdp_pop["pop"] = gdp_pop["pop"].div(1e3).round(0)
+
+    gdp_pop["pop"] = (
+        gdp_pop["pop"].div(gdp_pop["pop"].sum()).mul(OTHER_POP_2019[country]).round(0)
+    )
 
-    df.loc["ME000", "pop"] = 617.0
+    gdp_pop["gdp"] = (
+        gdp_pop["gdp"]
+        .mul(1e9)
+        .div(gdp_pop["gdp"].sum())
+        .mul(OTHER_GDP_TOTAL_2019[country])
+        .div(EXCHANGE_EUR_USD_2019)
+        / (1e3 * gdp_pop["pop"])
+    ).round(0)
 
-    return df
+    return gdp_pop
 
 
 if __name__ == "__main__":
@@ -246,25 +496,40 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
     configure_logging(snakemake)
     set_scenario_config(snakemake)
 
-    country_shapes = countries(snakemake.input.naturalearth, snakemake.params.countries)
-
-    country_shapes.reset_index().to_file(snakemake.output.country_shapes)
-
+    # Offshore regions
     offshore_shapes = eez(snakemake.input.eez, snakemake.params.countries)
     offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)
 
+    # Onshore regions
+    regions = create_regions(
+        snakemake.params.countries,
+        snakemake.input.nuts3_2021,
+        snakemake.input.ba_adm1,
+        snakemake.input.md_adm1,
+        snakemake.input.ua_adm1,
+        snakemake.input.xk_adm1,
+        offshore_shapes,
+        snakemake.input.nuts3_gdp,
+        snakemake.input.nuts3_pop,
+        snakemake.input.other_gdp,
+        snakemake.input.other_pop,
+    )
+
+    country_shapes = regions.groupby("country")["geometry"].apply(
+        lambda x: x.union_all()
+    )
+    country_shapes.crs = regions.crs
+    country_shapes.index.name = "name"
+    country_shapes.reset_index().to_file(snakemake.output.country_shapes)
+
     europe_shape = gpd.GeoDataFrame(
         geometry=[country_cover(country_shapes, offshore_shapes.geometry)],
         crs=country_shapes.crs,
     )
     europe_shape.reset_index().to_file(snakemake.output.europe_shape)
 
-    nuts3_shapes = nuts3(
-        country_shapes,
-        snakemake.input.nuts3,
-        snakemake.input.nuts3pop,
-        snakemake.input.nuts3gdp,
-        snakemake.input.ch_cantons,
-        snakemake.input.ch_popgdp,
+    # Export regions including GDP and POP data
+    logger.info(
+        f"Exporting NUTS3 and ADM1 shapes with GDP and POP values to {snakemake.output.nuts3_shapes}."
     )
-    nuts3_shapes.reset_index().to_file(snakemake.output.nuts3_shapes)
+    regions.reset_index().to_file(snakemake.output.nuts3_shapes)
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 1ea72d66c..3288dd0d2 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -102,6 +102,7 @@
 import numpy as np
 import pandas as pd
 import pypsa
+import tqdm
 import xarray as xr
 from _helpers import configure_logging, set_scenario_config
 from packaging.version import Version, parse
@@ -340,7 +341,10 @@ def cluster_regions(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        snakemake = mock_snakemake("cluster_network", clusters=60)
+        snakemake = mock_snakemake(
+            "cluster_network",
+            clusters=60,
+        )
     configure_logging(snakemake)
     set_scenario_config(snakemake)
 
@@ -378,6 +382,83 @@ def cluster_regions(
             custom_busmap.index = custom_busmap.index.astype(str)
             logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
             busmap = custom_busmap
+
+        mode = params.mode
+        if mode == "admin":
+            level_map = {
+                0: "country",
+                1: "level1",
+                2: "level2",
+                3: "level3",
+            }
+            admin_level = params.admin["level"]
+            logger.info(f"Clustering at administrative level {admin_level}.")
+            # check if BA, MD, UA, or XK are in the network
+            adm1_countries = ["BA", "MD", "UA", "XK"]
+            buses = n.buses[["x", "y", "country"]].copy()
+            # Find the intersection of adm1_countries and n.buses.country
+            adm1_countries = list(
+                set(adm1_countries).intersection(buses["country"].unique())
+            )
+            if adm1_countries:
+                logger.info(
+                    f"Note that the following countries can only be clustered at a maximum administration level of 1: {adm1_countries}."
+                )
+
+            nuts3_regions = gpd.read_file(snakemake.input.nuts3_shapes).set_index(
+                "index"
+            )
+            nuts3_regions["column"] = level_map[admin_level]
+
+            country_level = {k: v for k, v in params.admin.items() if k != "level"}
+            if country_level:
+                country_level_list = "\n".join(
+                    [f"- {k}: level {v}" for k, v in country_level.items()]
+                )
+                logger.info(
+                    f"Setting individual administrative levels for:\n{country_level_list}"
+                )
+                nuts3_regions.loc[
+                    nuts3_regions["country"].isin(country_level.keys()), "column"
+                ] = (
+                    nuts3_regions.loc[
+                        nuts3_regions["country"].isin(country_level.keys()), "country"
+                    ]
+                    .map(country_level)
+                    .map(level_map)
+                )
+            nuts3_regions["busmap"] = nuts3_regions.apply(
+                lambda row: row[row["column"]], axis=1
+            )
+
+            # Group by busmap
+            admin_shapes = nuts3_regions[["busmap", "geometry"]]
+            admin_shapes = admin_shapes.groupby("busmap")["geometry"].apply(
+                lambda x: x.union_all()
+            )
+            admin_shapes = gpd.GeoDataFrame(
+                admin_shapes, geometry="geometry", crs=nuts3_regions.crs
+            )
+            admin_shapes["country"] = admin_shapes.index.str[:2]
+
+            buses["geometry"] = gpd.points_from_xy(buses["x"], buses["y"])
+            buses = gpd.GeoDataFrame(buses, geometry="geometry", crs="EPSG:4326")
+            buses["busmap"] = ""
+
+            # Map based for each country
+            for country in tqdm.tqdm(buses["country"].unique()):
+                buses_subset = buses.loc[buses["country"] == country]
+
+                buses.loc[buses_subset.index, "busmap"] = gpd.sjoin_nearest(
+                    buses_subset.to_crs(epsg=3857),
+                    admin_shapes.loc[admin_shapes["country"] == country].to_crs(
+                        epsg=3857
+                    ),
+                    how="left",
+                )["busmap_right"]
+
+            busmap = buses["busmap"]
+
         else:
             algorithm = params.cluster_network["algorithm"]
             features = None
diff --git a/scripts/retrieve_osm_boundaries.py b/scripts/retrieve_osm_boundaries.py
new file mode 100644
index 000000000..6070a57d6
--- /dev/null
+++ b/scripts/retrieve_osm_boundaries.py
@@ -0,0 +1,124 @@
+# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+"""
+Retrieve administrative boundaries for the specified country using the overpass API and save it
+to the specified output files.
+
+Note that overpass requests are based on a fair
+use policy. `retrieve_osm_data` is meant to be used in a way that respects this
+policy by fetching the needed data once, only.
+"""
+
+import json
+import logging
+import time
+
+import requests
+from _helpers import (  # set_scenario_config,; update_config_from_wildcards,; update_config_from_wildcards,
+    configure_logging,
+    set_scenario_config,
+)
+
+logger = logging.getLogger(__name__)
+
+
+ADM1_SPECIALS = {
+    "XK": 5,
+}
+
+
+def retrieve_osm_boundaries(
+    country,
+    adm1_specials,
+    output,
+):
+    """
+    Retrieve OSM administrative boundaries for the specified country and save it to the specified
+    output files.
+
+    Parameters
+    ----------
+    country : str
+        The country code for which the OSM data should be retrieved.
+    """
+    # Overpass API endpoint URL
+    overpass_url = "https://overpass-api.de/api/interpreter"
+
+    wait_time = 5
+
+    osm_adm_level = "4"
+    if country in adm1_specials:
+        osm_adm_level = adm1_specials[country]  # special case e.g. for Kosovo
+
+    retries = 3
+    for attempt in range(retries):
+        logger.info(
+            f" - Fetching OSM administrative boundaries for {country} (Attempt {attempt + 1})..."
+        )
+
+        # Build the overpass query
+        op_area = f'area["ISO3166-1"="{country}"]'
+        op_query = f"""
+            [out:json];
+            {op_area}->.searchArea;
+            (
+            relation["boundary"="administrative"]["admin_level"={osm_adm_level}]["name"](area.searchArea);
+            );
+            out body geom;
+        """
+        try:
+            # Send the request
+            response = requests.post(overpass_url, data=op_query)
+            response.raise_for_status()  # Raise HTTPError for bad responses
+
+            filepath = output[0]
+
+            with open(filepath, mode="w") as f:
+                json.dump(response.json(), f, indent=2)
+            logger.info(" - Done.")
+            break  # Exit the retry loop on success
+        except (json.JSONDecodeError, requests.exceptions.RequestException) as e:
+            logger.error(
+                f"Error for retrieving administrative boundaries in country {country}: {e}"
+            )
+            logger.debug(
+                f"Response text: {response.text if response else 'No response'}"
+            )
+            if attempt < retries - 1:
+                wait_time += 15
+                logger.info(f"Waiting {wait_time} seconds before retrying...")
+                time.sleep(wait_time)
+            else:
+                logger.error(
+                    f"Failed to retrieve administrative boundaries in country {country} after {retries} attempts."
+                )
+        except Exception as e:
+            # For now, catch any other exceptions and log them. Treat this
+            # the same as a RequestException and try to run again two times.
+            logger.error(
+                f"Unexpected error in retrieving administrative boundaries in country {country}: {e}"
+            )
+            if attempt < retries - 1:
+                wait_time += 10
+                logger.info(f"Waiting {wait_time} seconds before retrying...")
+                time.sleep(wait_time)
+            else:
+                logger.error(
+                    f"Failed to retrieve administrative boundaries for country {country} after {retries} attempts."
+                )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        from _helpers import mock_snakemake
+
+        snakemake = mock_snakemake("retrieve_osm_boundaries", country="XK")
+    configure_logging(snakemake)
+    set_scenario_config(snakemake)
+
+    # Retrieve the OSM data
+    country = snakemake.wildcards.country
+    output = snakemake.output
+
+    retrieve_osm_boundaries(country, ADM1_SPECIALS, output)
diff --git a/test/test_build_shapes.py b/test/test_build_shapes.py
index 48aeac7f5..8660d0011 100644
--- a/test/test_build_shapes.py
+++ b/test/test_build_shapes.py
@@ -15,7 +15,7 @@
 
 sys.path.append("./scripts")
 
-from build_shapes import _simplify_polys, countries, country_cover, eez
+from build_shapes import _simplify_polys, eez
 
 path_cwd = pathlib.Path.cwd()
 
@@ -59,20 +59,6 @@ def test_simplify_polys(tolerance, expected_tuple, italy_shape):
     assert all([x == y for x, y in zip(output_tuple, expected_tuple)])
 
 
-@pytest.mark.parametrize(
-    "country_list",
-    [["MK"], ["IT"]],
-)
-def test_countries(config, download_natural_earth, country_list):
-    """
-    Verify what is returned by countries.
-    """
-    natural_earth = download_natural_earth
-    country_shapes_df = countries(natural_earth, country_list)
-    assert country_shapes_df.shape == (1,)
-    assert country_shapes_df.index.unique().tolist() == country_list
-
-
 @pytest.mark.parametrize(
     "country_list",
     [["DE"], ["IT"]],
@@ -85,46 +71,3 @@ def test_eez(config, country_list, download_eez):
     offshore_shapes_gdf = eez(eez_path, country_list)
     assert offshore_shapes_gdf.shape == (1, 1)
     assert offshore_shapes_gdf.index == country_list[0]
-
-
-@pytest.mark.parametrize(
-    "country_list,expected_tuple",
-    [
-        (
-            ["IT"],
-            (
-                89.49,
-                61.67,
-            ),
-        ),
-        (
-            ["DE"],
-            (
-                53.72,
-                56.46,
-            ),
-        ),
-    ],
-)
-def test_country_cover(
-    country_list, download_natural_earth, download_eez, expected_tuple
-):
-    """
-    Verify what is returned by country_cover.
-    """
-    natural_earth = download_natural_earth
-    eez_path = download_eez
-    country_shapes_gdf = countries(natural_earth, country_list).reset_index()
-    offshore_shapes_gdf = eez(eez_path, country_list).reset_index()
-    europe_shape_gdf = gpd.GeoDataFrame(
-        geometry=[country_cover(country_shapes_gdf, offshore_shapes_gdf.geometry)],
-        crs=6933,
-    )
-    europe_shape_gdf["area"] = europe_shape_gdf.area
-    europe_shape_gdf["perimeter"] = europe_shape_gdf.length
-    output_tuple = (
-        np.round(europe_shape_gdf["area"][0], 2),
-        np.round(europe_shape_gdf["perimeter"][0], 2),
-    )
-    assert len(output_tuple) == len(expected_tuple)
-    assert all([x == y for x, y in zip(output_tuple, expected_tuple)])