From cdd6bfd8c8433cf51890cf879ac665db9c45e331 Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Wed, 11 Dec 2024 11:01:47 -0500
Subject: [PATCH 1/9] storage OAR fix

---
 workflow/scripts/osemosys_global/storage/activity.py            | 2 +-
 .../scripts/osemosys_global/storage/user_defined_capacity.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/scripts/osemosys_global/storage/activity.py b/workflow/scripts/osemosys_global/storage/activity.py
index 6e50756e..6057a2ef 100644
--- a/workflow/scripts/osemosys_global/storage/activity.py
+++ b/workflow/scripts/osemosys_global/storage/activity.py
@@ -40,7 +40,7 @@ def activity_storage(storage_set, df_iar_base, df_oar_base, storage_param,
     
     for each_tech in storage_param.keys():
         df_storage_oar.loc[df_storage_oar['TECHNOLOGY'].str.contains(each_tech),
-                           'VALUE'] = round(1 / (efficiency_dict[each_tech] / 100), 3)
+                           'VALUE'] = round(efficiency_dict[each_tech] / 100, 3)
 
     df_storage_oar["TECHNOLOGY"] = "PWR" + df_storage_oar["TECHNOLOGY"] 
     df_storage_oar["FUEL"] = "ELC" + df_storage_oar["TECHNOLOGY"].str[6:13]
diff --git a/workflow/scripts/osemosys_global/storage/user_defined_capacity.py b/workflow/scripts/osemosys_global/storage/user_defined_capacity.py
index d2fd7109..fbb853c7 100644
--- a/workflow/scripts/osemosys_global/storage/user_defined_capacity.py
+++ b/workflow/scripts/osemosys_global/storage/user_defined_capacity.py
@@ -171,7 +171,7 @@ def set_user_defined_capacity_sto(tech_capacity_sto,
 
     for idx, tech_params in tech_capacity_sto.items():
         df_oar.loc[df_oar['TECHNOLOGY'] == tech_params[0],
-                   'VALUE'] = round(1 / (efficiency_dict[idx] / 100), 3)
+                   'VALUE'] = round(efficiency_dict[idx] / 100, 3)
     
     # Update CapitalCostStorage with user-defined capex costs by storage technology
     df_cap_cost_sto = cap_cost_sto_base.copy()

From 92cc09c81f3b5c208d7473b29acc9a832fc16f8b Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Fri, 20 Dec 2024 08:56:17 +0100
Subject: [PATCH 2/9] Delete legacy user_defined_capacity.py

---
 .../osemosys_global/user_defined_capacity.py  | 70 -------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 workflow/scripts/osemosys_global/user_defined_capacity.py

diff --git a/workflow/scripts/osemosys_global/user_defined_capacity.py b/workflow/scripts/osemosys_global/user_defined_capacity.py
deleted file mode 100644
index ff8d40d1..00000000
--- a/workflow/scripts/osemosys_global/user_defined_capacity.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import os
-import pandas as pd
-from configuration import ConfigFile, ConfigPaths
-from utils import apply_dtypes
-
-# LOGGING
-import logging
-logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
-
-def main():
-    '''Creates capacity limits on renewable technologies.'''
-    
-    # CONFIGURATION PARAMETERS
-    config_paths = ConfigPaths()
-    config = ConfigFile('config')  
-
-    scenario_data_dir = config_paths.scenario_data_dir
-    region = config.get('region')
-    years = range(config.get('startYear'), config.get('endYear') + 1)
-    tech_capacity = config.get('user_defined_capacity')
-    techCapacity = []
-    
-    for tech, tech_params in tech_capacity.items():
-        techCapacity.append([tech, tech_params[0], tech_params[1]])
-    tech_capacity_df = pd.DataFrame(techCapacity,
-                                    columns=['TECHNOLOGY', 'VALUE', 'YEAR'])
-    tech_capacity_df['REGION'] = region
-    tech_capacity_df = tech_capacity_df[['REGION', 'TECHNOLOGY', 'YEAR', 'VALUE']]
-
-    tech_set = pd.read_csv(os.path.join(scenario_data_dir, 'TECHNOLOGY.csv'))
-
-    for each_tech in list(tech_capacity_df['TECHNOLOGY'].unique()):
-        if each_tech not in list(tech_set['VALUE']):
-            tech_capacity_df = tech_capacity_df.loc[~(tech_capacity_df['TECHNOLOGY'].isin([each_tech]))]
-    df_min_cap_inv = pd.read_csv(os.path.join(scenario_data_dir, 'TotalAnnualMinCapacityInvestment.csv'))
-    df_min_cap_inv = pd.concat([df_min_cap_inv, tech_capacity_df]).reset_index(drop=True)
-    df_min_cap_inv.drop_duplicates(inplace=True)
-
-    df_max_cap_inv = pd.read_csv(os.path.join(scenario_data_dir, 'TotalAnnualMaxCapacityInvestment.csv'))
-
-    max_cap_techs = []
-    for index, row in tech_capacity_df.iterrows():
-        for each_year in years:
-            if row['YEAR'] == each_year:
-                value = row['VALUE']
-            else:
-                value = 0
-            max_cap_techs.append([row['REGION'],
-                                  row['TECHNOLOGY'],
-                                  each_year,
-                                  value])
-    max_cap_techs_df = pd.DataFrame(max_cap_techs,
-                                    columns=['REGION',
-                                             'TECHNOLOGY',
-                                             'YEAR',
-                                             'VALUE'])
-    df_max_cap_inv = pd.concat([df_max_cap_inv, max_cap_techs_df]).reset_index(drop=True)
-    df_max_cap_inv.drop_duplicates(inplace=True)
-
-    df_max_cap_inv = apply_dtypes(df_max_cap_inv, "TotalAnnualMaxCapacityInvestment")
-    df_min_cap_inv = apply_dtypes(df_min_cap_inv, "TotalAnnualMinCapacityInvestment")
-
-    df_max_cap_inv.to_csv(os.path.join(
-        scenario_data_dir, "TotalAnnualMaxCapacityInvestment.csv"), index=None)
-    df_min_cap_inv.to_csv(os.path.join(
-        scenario_data_dir, "TotalAnnualMinCapacityInvestment.csv"), index=None)
-
-if __name__ == '__main__':
-    main()
-    logging.info('User-defined capacities sucessfully set')
\ No newline at end of file

From 7476a898e971774764520be736d1816345b0e0d7 Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Fri, 20 Dec 2024 09:03:26 +0100
Subject: [PATCH 3/9] Removed ConfigPaths dependency external files retrieval

---
 workflow/rules/retrieve.smk                        | 3 ++-
 workflow/scripts/osemosys_global/external_files.py | 8 +++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/workflow/rules/retrieve.smk b/workflow/rules/retrieve.smk
index cd255d54..cfd75468 100644
--- a/workflow/rules/retrieve.smk
+++ b/workflow/rules/retrieve.smk
@@ -39,7 +39,8 @@ rule download_external_files:
     message:
         "Downloading external files..."
     params:
-        files = get_external_links()
+        files = get_external_links(),
+        input_data_dir = 'resources/data'
     log:
         log = "results/logs/external_files.log"
     output:
diff --git a/workflow/scripts/osemosys_global/external_files.py b/workflow/scripts/osemosys_global/external_files.py
index 543be3fa..2cbd7452 100644
--- a/workflow/scripts/osemosys_global/external_files.py
+++ b/workflow/scripts/osemosys_global/external_files.py
@@ -2,7 +2,6 @@
 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 
 from pathlib import Path
-from configuration import ConfigPaths
 import os
 import requests
 import sys
@@ -21,14 +20,11 @@ def download_file(file: str, url: Path | str) -> None:
     with open(path, "wb") as f:
         f.write(data.content)
 
-# CONFIGURATION PARAMETERS
-config_paths = ConfigPaths()
-input_data_dir = config_paths.input_data_dir
-
 if __name__ == "__main__":
 
     if "snakemake" in globals():
         external_files = snakemake.params.files
+        input_data_dir = snakemake.params.input_data_dir
     else:
         if len(sys.argv) != 3:
             msg = "Usage: python {} <save_name> <url>"
@@ -38,6 +34,8 @@ def download_file(file: str, url: Path | str) -> None:
             in_file = sys.argv[1]
             in_url = sys.argv[2]
             external_files = {in_file: in_url}
+            
+        input_data_dir = 'resources/data'
 
     for file, url in external_files.items():
         path = os.path.join(input_data_dir, file)

From e2dc657a3152dd7c7591dd228d3533cf37824358 Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Fri, 20 Dec 2024 12:11:32 +0100
Subject: [PATCH 4/9] Removed ConfigPaths dependency timeslice script

---
 workflow/rules/preprocess.smk               |  38 +-
 workflow/scripts/osemosys_global/TS_data.py | 962 ++++++++++----------
 2 files changed, 516 insertions(+), 484 deletions(-)

diff --git a/workflow/rules/preprocess.smk b/workflow/rules/preprocess.smk
index 69a2af7a..fd5b2518 100644
--- a/workflow/rules/preprocess.smk
+++ b/workflow/rules/preprocess.smk
@@ -287,24 +287,32 @@ rule timeslice:
     message:
         'Generating timeslice data...'
     input:
-        'resources/data/All_Demand_UTC_2015.csv',
-        'resources/data/CSP 2015.csv',
-        'resources/data/SolarPV 2015.csv',
-        'resources/data/Hydro_Monthly_Profiles (15 year average).csv',
-        'resources/data/Won 2015.csv',
-        'resources/data/Woff 2015.csv',
-        'resources/data/custom_nodes/specified_demand_profile.csv',
-        'resources/data/custom_nodes/RE_profiles_CSP.csv',
-        'resources/data/custom_nodes/RE_profiles_HYD.csv',
-        'resources/data/custom_nodes/RE_profiles_SPV.csv',
-        'resources/data/custom_nodes/RE_profiles_WOF.csv',
-        'resources/data/custom_nodes/RE_profiles_WON.csv',                                
+        plexos_demand = 'resources/data/All_Demand_UTC_2015.csv',
+        plexos_csp_2015 = 'resources/data/CSP 2015.csv',
+        plexos_spv_2015 = 'resources/data/SolarPV 2015.csv',
+        plexos_hyd_2015 = 'resources/data/Hydro_Monthly_Profiles (15 year average).csv',
+        plexos_won_2015 = 'resources/data/Won 2015.csv',
+        plexos_wof_2015 = 'resources/data/Woff 2015.csv',
+        custom_specified_demand_profiles = 'resources/data/custom_nodes/specified_demand_profile.csv',
+        custom_csp_profiles = 'resources/data/custom_nodes/RE_profiles_CSP.csv',
+        custom_hyd_profiles = 'resources/data/custom_nodes/RE_profiles_HYD.csv',
+        custom_spv_profiles = 'resources/data/custom_nodes/RE_profiles_SPV.csv',
+        custom_wof_profiles = 'resources/data/custom_nodes/RE_profiles_WOF.csv',
+        custom_won_profiles = 'resources/data/custom_nodes/RE_profiles_WON.csv',                                
     params:
         start_year = config['startYear'],
         end_year = config['endYear'],
-        daytype = config['daytype'],
-        daypart = config['dayparts'],
+        region_name = 'GLOBAL',
+        output_data_dir = 'results/data',
+        input_data_dir = 'resources/data',
+        input_dir = 'resources',
+        output_dir = 'results',
+        custom_nodes_dir = 'resources/data/custom_nodes',
+        geographic_scope = config['geographic_scope'],
         seasons = config['seasons'],
+        dayparts = config['dayparts'],
+        daytype = config['daytype'],        
+        timeshift = config['timeshift'],
     output:
         csv_files = expand('results/data/{output_file}.csv', output_file=timeslice_files),
     log:
@@ -383,4 +391,4 @@ rule create_missing_csv:
     output:
         csvs = expand("results/data/{empty}.csv", empty=EMPTY_CSVS)
     script:
-        "../scripts/osemosys_global/create_missing_csvs.py"
+        "../scripts/osemosys_global/create_missing_csvs.py"
\ No newline at end of file
diff --git a/workflow/scripts/osemosys_global/TS_data.py b/workflow/scripts/osemosys_global/TS_data.py
index 8be00ac3..60d7affc 100644
--- a/workflow/scripts/osemosys_global/TS_data.py
+++ b/workflow/scripts/osemosys_global/TS_data.py
@@ -5,506 +5,530 @@
 sns.set()
 import os
 
-# from osemosys_global.configuration import ConfigFile, ConfigPaths
-from configuration import ConfigFile, ConfigPaths
 from osemosys_global.utils import apply_timeshift
 from utils import apply_dtypes
 from constants import SET_DTYPES
-import time
 from datetime import datetime
 
-# from OPG_configuration import ConfigFile, ConfigPaths
-import logging
-
-logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
-
-# ### Input data files and user input
-
-# CONFIGURATION PARAMETERS
-
-config_paths = ConfigPaths()
-config = ConfigFile("config")
-
-input_dir = config_paths.input_dir
-input_data_dir = config_paths.input_data_dir
-output_dir = config_paths.output_dir
-output_data_dir = config_paths.output_data_dir
-custom_nodes_dir = config_paths.custom_nodes_dir
-geographic_scope = config.get("geographic_scope")
-seasons = config.get("seasons")
-daytype = config.get("daytype")
-dayparts = config.get("dayparts")
-
-# Check for custom nodes directory
-try:
-    os.makedirs(custom_nodes_dir)
-except FileExistsError:
-    pass
-
-region_name = config.region_name
-custom_nodes = config.get("nodes_to_add")
-
-# Inputs PLEXOS-World 2015 data.
-
-demand_df = pd.read_csv(
-    os.path.join(input_data_dir, "All_Demand_UTC_2015.csv"), encoding="latin-1"
-)
-
-seasons_raw = config.get("seasons")
-seasonsData = []
-for s, months in seasons_raw.items():
-    for month in months:
-        seasonsData.append([month, s])
-seasons_df = pd.DataFrame(seasonsData, columns=["month", "season"])
-seasons_df = seasons_df.sort_values(by=["month"]).reset_index(drop=True)
-
-dayparts_raw = config.get("dayparts")
-daypartData = []
-for dp, hr in dayparts_raw.items():
-    daypartData.append([dp, hr[0], hr[1]])
-dayparts_df = pd.DataFrame(daypartData, columns=["daypart", "start_hour", "end_hour"])
-timeshift = config.get("timeshift")
-dayparts_df["start_hour"] = dayparts_df["start_hour"].map(
-    lambda x: apply_timeshift(x, timeshift)
-)
-dayparts_df["end_hour"] = dayparts_df["end_hour"].map(
-    lambda x: apply_timeshift(x, timeshift)
-)
-
-daytype_included = config.get("daytype")
-model_start_year = config.get("startYear")
-model_end_year = config.get("endYear")
-years = list(range(model_start_year, model_end_year + 1))
-
-# Read renewable profile files
-csp_df = pd.read_csv(os.path.join(input_data_dir, "CSP 2015.csv"), encoding="latin-1")
-
-csp_df_custom = pd.read_csv(
-    os.path.join(custom_nodes_dir, "RE_profiles_CSP.csv"), encoding="latin-1"
-)
-csp_df_custom.drop(["Datetime"], axis=1, inplace=True)
-csp_df = pd.concat([csp_df, csp_df_custom], axis=1)
-
-csp_df.name = "CSP"
-
-spv_df = pd.read_csv(
-    os.path.join(input_data_dir, "SolarPV 2015.csv"), encoding="latin-1"
-)
-
-spv_df_custom = pd.read_csv(
-    os.path.join(custom_nodes_dir, "RE_profiles_SPV.csv"), encoding="latin-1"
-)
-spv_df_custom.drop(["Datetime"], axis=1, inplace=True)
-spv_df = pd.concat([spv_df, spv_df_custom], axis=1)
-
-spv_df.name = "SPV"
-
-nodes = ["-".join(x.split("-")[1:]) for x in spv_df.columns if x not in ["Datetime"]]
-regions = [x for x in spv_df.columns if x not in ["Datetime"]]
-
-node_region_dict = dict(zip(nodes, regions))
-
-hyd_df = pd.read_csv(
-    os.path.join(input_data_dir, "Hydro_Monthly_Profiles (15 year average).csv"),
-    encoding="latin-1",
-)
+def main(
+        demand_df: pd.DataFrame,
+        csp_df: pd.DataFrame,
+        spv_df: pd.DataFrame,
+        hyd_df: pd.DataFrame,
+        won_df: pd.DataFrame,
+        wof_df: pd.DataFrame,
+        custom_sp_demand_profile: pd.DataFrame,
+        csp_df_custom: pd.DataFrame,
+        hyd_df_custom: pd.DataFrame,
+        spv_df_custom: pd.DataFrame,
+        wof_df_custom: pd.DataFrame,
+        won_df_custom: pd.DataFrame,
+        seasons_raw: dict,
+        dayparts_raw: dict,
+        ):
+   
+    seasonsData = []
+    for s, months in seasons_raw.items():
+        for month in months:
+            seasonsData.append([month, s])
+    seasons_df = pd.DataFrame(seasonsData, columns=["month", "season"])
+    seasons_df = seasons_df.sort_values(by=["month"]).reset_index(drop=True)
     
-hyd_df = hyd_df.loc[hyd_df["NAME"].str.endswith("Capacity Scaler")]
-hyd_df["NAME"] = hyd_df["NAME"].str.split("_").str[0]
-
-hyd_df_custom = pd.read_csv(
-    os.path.join(custom_nodes_dir, "RE_profiles_HYD.csv"), encoding="latin-1"
-)
-hyd_df = pd.concat([hyd_df, hyd_df_custom])
-
-# Drop Brazil transmission nodes J1, J2, J3
-brazil_j_nodes = ["BRA-J1", "BRA-J2", "BRA-J3"]
-hyd_df = hyd_df.loc[~hyd_df["NAME"].isin(brazil_j_nodes)]
-hyd_df = hyd_df.set_index("NAME").T.reset_index()
-hyd_df.rename(columns={"index": "MONTH"}, inplace=True)
-
-hyd_df["MONTH"] = hyd_df["MONTH"].str.replace("M", "").astype(int)
-
-hyd_df_processed = pd.DataFrame(columns=["Datetime"])
-hyd_df_processed["Datetime"] = spv_df["Datetime"]
-hyd_df_processed["MONTH"] = (
-    hyd_df_processed["Datetime"].str.split("/").str[1].astype(int)
-)
-hyd_df_processed = pd.merge(hyd_df_processed, hyd_df, how="left", on="MONTH")
-hyd_df_processed.drop(columns="MONTH", inplace=True)
-hyd_df_processed.rename(columns=node_region_dict, inplace=True)
-hyd_df_processed.name = "HYD"
-
-won_df = pd.read_csv(os.path.join(input_data_dir, "Won 2015.csv"), encoding="latin-1")
+    daypartData = []
+    for dp, hr in dayparts_raw.items():
+        daypartData.append([dp, hr[0], hr[1]])
+    dayparts_df = pd.DataFrame(daypartData, columns=["daypart", "start_hour", "end_hour"])
 
-won_df_custom = pd.read_csv(
-    os.path.join(custom_nodes_dir, "RE_profiles_WON.csv"), encoding="latin-1"
-)
-won_df_custom.drop(["Datetime"], axis=1, inplace=True)
-won_df = pd.concat([won_df, won_df_custom], axis=1)
-won_df.name = "WON"
-
-wof_df = pd.read_csv(os.path.join(input_data_dir, "Woff 2015.csv"), encoding="latin-1")
+    dayparts_df["start_hour"] = dayparts_df["start_hour"].map(
+        lambda x: apply_timeshift(x, timeshift)
+    )
+    dayparts_df["end_hour"] = dayparts_df["end_hour"].map(
+        lambda x: apply_timeshift(x, timeshift)
+    )
 
-wof_df_custom = pd.read_csv(
-    os.path.join(custom_nodes_dir, "RE_profiles_WOF.csv"), encoding="latin-1"
-)
-wof_df_custom.drop(["Datetime"], axis=1, inplace=True)
-wof_df = pd.concat([wof_df, wof_df_custom], axis=1)
-wof_df.name = "WOF"
-   
-# ### Create 'output' directory if it doesn't exist
-if not os.path.exists(output_data_dir):
-    os.makedirs(output_data_dir)
+    years = list(range(start_year, end_year + 1))
+    
+    # Read renewable profile files
+    csp_df_custom.drop(["Datetime"], axis=1, inplace=True)
+    csp_df = pd.concat([csp_df, csp_df_custom], axis=1)
+    
+    csp_df.name = "CSP"
 
+    spv_df_custom.drop(["Datetime"], axis=1, inplace=True)
+    spv_df = pd.concat([spv_df, spv_df_custom], axis=1)
+    
+    spv_df.name = "SPV"
+    
+    nodes = ["-".join(x.split("-")[1:]) for x in spv_df.columns if x not in ["Datetime"]]
+    regions = [x for x in spv_df.columns if x not in ["Datetime"]]
+    
+    node_region_dict = dict(zip(nodes, regions))
+        
+    hyd_df = hyd_df.loc[hyd_df["NAME"].str.endswith("Capacity Scaler")]
+    hyd_df["NAME"] = hyd_df["NAME"].str.split("_").str[0]
 
-def correct_datetime_formatting(time_str):
-    """start hours are not padded with 00:00"""
-    try:
-        date_obj = datetime.strptime(time_str, "%d/%m/%Y %H:%M")
-    except ValueError:
+    hyd_df = pd.concat([hyd_df, hyd_df_custom])
+    
+    # Drop Brazil transmission nodes J1, J2, J3
+    brazil_j_nodes = ["BRA-J1", "BRA-J2", "BRA-J3"]
+    hyd_df = hyd_df.loc[~hyd_df["NAME"].isin(brazil_j_nodes)]
+    hyd_df = hyd_df.set_index("NAME").T.reset_index()
+    hyd_df.rename(columns={"index": "MONTH"}, inplace=True)
+    
+    hyd_df["MONTH"] = hyd_df["MONTH"].str.replace("M", "").astype(int)
+    
+    hyd_df_processed = pd.DataFrame(columns=["Datetime"])
+    hyd_df_processed["Datetime"] = spv_df["Datetime"]
+    hyd_df_processed["MONTH"] = (
+        hyd_df_processed["Datetime"].str.split("/").str[1].astype(int)
+    )
+    hyd_df_processed = pd.merge(hyd_df_processed, hyd_df, how="left", on="MONTH")
+    hyd_df_processed.drop(columns="MONTH", inplace=True)
+    hyd_df_processed.rename(columns=node_region_dict, inplace=True)
+    hyd_df_processed.name = "HYD"
+    
+    won_df_custom.drop(["Datetime"], axis=1, inplace=True)
+    won_df = pd.concat([won_df, won_df_custom], axis=1)
+    won_df.name = "WON"
+    
+    wof_df_custom.drop(["Datetime"], axis=1, inplace=True)
+    wof_df = pd.concat([wof_df, wof_df_custom], axis=1)
+    wof_df.name = "WOF"
+       
+    # ### Create 'output' directory if it doesn't exist
+    if not os.path.exists(output_data_dir):
+        os.makedirs(output_data_dir)
+    
+    
+    def correct_datetime_formatting(time_str):
+        """start hours are not padded with 00:00"""
         try:
-            date_obj = datetime.strptime(time_str, "%d/%m/%Y")
-            date_obj = date_obj.replace(hour=0, minute=0)
-        except ValueError as ex:
-            print(ex)
-            raise ValueError
-    return date_obj
-
-
-# ### Create columns for year, month, day, hour, and day type
-
-# Convert datetime to year, month, day, and hour
-demand_df["Datetime"] = demand_df.Datetime.map(correct_datetime_formatting)
-demand_df["Datetime"] = pd.to_datetime(demand_df["Datetime"], format="%d/%m/%Y %H:%M")
-demand_df["Year"] = demand_df["Datetime"].dt.strftime("%Y").astype(int)
-demand_df["Month"] = demand_df["Datetime"].dt.strftime("%m").astype(int)
-demand_df["Day"] = demand_df["Datetime"].dt.strftime("%d").astype(int)
-demand_df["Hour"] = demand_df["Datetime"].dt.strftime("%H").astype(int)
-
-
-custom_sp_demand_profile = pd.read_csv(
-    os.path.join(input_data_dir, "custom_nodes", "specified_demand_profile.csv")
-)
-
-demand_nodes = [x for x in demand_df.columns if x != "Datetime"] + [
-    y for y in custom_sp_demand_profile.iloc[:,3:].columns]
-
-demand_df = pd.merge(
-    demand_df, custom_sp_demand_profile, how="left", on=["Month", "Day", "Hour"]
-)
-
-# Create column for weekday/weekend
-demand_df["Day-of-week"] = demand_df["Datetime"].dt.dayofweek
-demand_df.loc[demand_df["Day-of-week"] < 5, "Day-of-week"] = "WD"
-demand_df.loc[demand_df["Day-of-week"] != "WD", "Day-of-week"] = "WE"
-
-
-# ### Create dictionaries for 'seasons' and 'dayparts'
-
-seasons_dict = dict(zip(list(seasons_df["month"]), list(seasons_df["season"])))
-
-dayparts_dict = {
-    i: [j, k]
-    for i, j, k in zip(
-        list(dayparts_df["daypart"]),
-        list(dayparts_df["start_hour"]),
-        list(dayparts_df["end_hour"]),
+            date_obj = datetime.strptime(time_str, "%d/%m/%Y %H:%M")
+        except ValueError:
+            try:
+                date_obj = datetime.strptime(time_str, "%d/%m/%Y")
+                date_obj = date_obj.replace(hour=0, minute=0)
+            except ValueError as ex:
+                print(ex)
+                raise ValueError
+        return date_obj
+    
+    
+    # ### Create columns for year, month, day, hour, and day type
+    
+    # Convert datetime to year, month, day, and hour
+    demand_df["Datetime"] = demand_df.Datetime.map(correct_datetime_formatting)
+    demand_df["Datetime"] = pd.to_datetime(demand_df["Datetime"], format="%d/%m/%Y %H:%M")
+    demand_df["Year"] = demand_df["Datetime"].dt.strftime("%Y").astype(int)
+    demand_df["Month"] = demand_df["Datetime"].dt.strftime("%m").astype(int)
+    demand_df["Day"] = demand_df["Datetime"].dt.strftime("%d").astype(int)
+    demand_df["Hour"] = demand_df["Datetime"].dt.strftime("%H").astype(int)
+    
+    demand_nodes = [x for x in demand_df.columns if x != "Datetime"] + [
+        y for y in custom_sp_demand_profile.iloc[:,3:].columns]
+    
+    demand_df = pd.merge(
+        demand_df, custom_sp_demand_profile, how="left", on=["Month", "Day", "Hour"]
     )
-}
-
-
-# ### Create columns with 'seasons' and 'dayparts'
-
-
-demand_df["Season"] = demand_df["Month"]
-demand_df["Season"].replace(seasons_dict, inplace=True)
-
-demand_df["Hour"] = demand_df["Hour"].map(lambda x: apply_timeshift(int(x), timeshift))
-for daypart in dayparts_dict:
-    if dayparts_dict[daypart][0] > dayparts_dict[daypart][1]:  # loops over 24hrs
-        demand_df.loc[
-            (demand_df["Hour"] >= dayparts_dict[daypart][0])
-            | (demand_df["Hour"] < dayparts_dict[daypart][1]),
-            "Daypart",
-        ] = daypart
+    
+    # Create column for weekday/weekend
+    demand_df["Day-of-week"] = demand_df["Datetime"].dt.dayofweek
+    demand_df.loc[demand_df["Day-of-week"] < 5, "Day-of-week"] = "WD"
+    demand_df.loc[demand_df["Day-of-week"] != "WD", "Day-of-week"] = "WE"
+    
+    
+    # ### Create dictionaries for 'seasons' and 'dayparts'
+    
+    seasons_dict = dict(zip(list(seasons_df["month"]), list(seasons_df["season"])))
+    
+    dayparts_dict = {
+        i: [j, k]
+        for i, j, k in zip(
+            list(dayparts_df["daypart"]),
+            list(dayparts_df["start_hour"]),
+            list(dayparts_df["end_hour"]),
+        )
+    }
+    
+    
+    # ### Create columns with 'seasons' and 'dayparts'
+    
+    
+    demand_df["Season"] = demand_df["Month"]
+    demand_df["Season"].replace(seasons_dict, inplace=True)
+    
+    demand_df["Hour"] = demand_df["Hour"].map(lambda x: apply_timeshift(int(x), timeshift))
+    for daypart in dayparts_dict:
+        if dayparts_dict[daypart][0] > dayparts_dict[daypart][1]:  # loops over 24hrs
+            demand_df.loc[
+                (demand_df["Hour"] >= dayparts_dict[daypart][0])
+                | (demand_df["Hour"] < dayparts_dict[daypart][1]),
+                "Daypart",
+            ] = daypart
+        else:
+            demand_df.loc[
+                (demand_df["Hour"] >= dayparts_dict[daypart][0])
+                & (demand_df["Hour"] < dayparts_dict[daypart][1]),
+                "Daypart",
+            ] = daypart
+    
+    
+    # ### Create column for timeslice with and without day-type
+    
+    
+    if daytpe:
+        demand_df["TIMESLICE"] = (
+            demand_df["Season"] + demand_df["Day-of-week"] + demand_df["Daypart"]
+        )
     else:
-        demand_df.loc[
-            (demand_df["Hour"] >= dayparts_dict[daypart][0])
-            & (demand_df["Hour"] < dayparts_dict[daypart][1]),
-            "Daypart",
-        ] = daypart
-
-
-# ### Create column for timeslice with and without day-type
-
-
-if daytype_included:
-    demand_df["TIMESLICE"] = (
-        demand_df["Season"] + demand_df["Day-of-week"] + demand_df["Daypart"]
+        demand_df["TIMESLICE"] = demand_df["Season"] + demand_df["Daypart"]
+    
+    # ### Calculate YearSplit
+    
+    
+    yearsplit = (
+        demand_df["TIMESLICE"]
+        .value_counts(normalize=True)
+        .to_frame("VALUE")
+        .round(4)
+        .reset_index()
+        .rename({"index": "TIMESLICE"}, axis=1)
     )
-else:
-    demand_df["TIMESLICE"] = demand_df["Season"] + demand_df["Daypart"]
-
-# ### Calculate YearSplit
-
-
-yearsplit = (
-    demand_df["TIMESLICE"]
-    .value_counts(normalize=True)
-    .to_frame("VALUE")
-    .round(4)
-    .reset_index()
-    .rename({"index": "TIMESLICE"}, axis=1)
-)
-
-yearsplit_final = pd.DataFrame(
-    list(itertools.product(yearsplit["TIMESLICE"].unique(), years)),
-    columns=["TIMESLICE", "YEAR"],
-)
-yearsplit_final = yearsplit_final.join(yearsplit.set_index("TIMESLICE"), on="TIMESLICE")
-yearsplit_final = apply_dtypes(yearsplit_final, "Year Split")
-yearsplit_final.to_csv(os.path.join(output_data_dir, "YearSplit.csv"), index=None)
-
-
-#  Calculate SpecifiedAnnualDemand and SpecifiedDemandProfile
-# ### Calculate SpecifiedAnnualDemand and SpecifiedDemandProfile
-
-sp_demand_df = demand_df[
-    [x for x in demand_df.columns if x in demand_nodes or x == "TIMESLICE"]
-]
-
-sp_demand_df = pd.melt(
-    sp_demand_df,
-    id_vars="TIMESLICE",
-    value_vars=demand_nodes,
-    var_name="node",
-    value_name="demand",
-)
-
-sp_demand_df = sp_demand_df.groupby(["TIMESLICE", "node"], as_index=False).agg(sum)
-
-# Calculate SpecifiedAnnualDemand
-total_demand_df = (
-    sp_demand_df.drop(columns="TIMESLICE").groupby("node", as_index=False).sum()
-)
-
-total_demand_df.rename({"demand": "total_demand"}, axis=1, inplace=True)
-
-sp_demand_df = sp_demand_df.join(total_demand_df.set_index("node"), on="node")
-
-# Calculate SpecifiedDemandProfile
-
-sp_demand_df["VALUE"] = sp_demand_df["demand"] / sp_demand_df["total_demand"]
-
-
-# Filter out country aggregate values for countries with multiple nodes
-country_with_nodes = list(
-    (sp_demand_df.loc[sp_demand_df["node"].str.len() > 6, "node"].str[:-3].unique())
-)
-
-sp_demand_df = sp_demand_df.loc[~(sp_demand_df["node"].isin(country_with_nodes))]
-
-
-# Rename COMMODITY based on naming convention.
-# Add 'XX' for countries without multiple nodes
-sp_demand_df.loc[sp_demand_df["node"].str.len() == 5, "FUEL"] = (
-    "ELC" + sp_demand_df["node"] + "02"
-)
-
-sp_demand_df.loc[sp_demand_df["node"].str.len() == 6, "FUEL"] = (
-    "ELC" + sp_demand_df["node"].str.split("-").str[1:].str.join("") + "XX02"
-)
-
-sp_demand_df.loc[sp_demand_df["node"].str.len() > 6, "FUEL"] = (
-    "ELC" + sp_demand_df["node"].str.split("-").str[1:].str.join("") + "02"
-)
-
-# In case custom data is provided only keep the custom data
-sp_demand_df.drop_duplicates(subset=['TIMESLICE', 'FUEL'], keep = 'last', inplace = True)
-
-# Create master table for SpecifiedDemandProfile
-sp_demand_df_final = pd.DataFrame(
-    list(
-        itertools.product(
-            sp_demand_df["TIMESLICE"].unique(), sp_demand_df["FUEL"].unique(), years
-        )
-    ),
-    columns=["TIMESLICE", "FUEL", "YEAR"],
-)
-sp_demand_df_final = sp_demand_df_final.join(
-    sp_demand_df.set_index(["TIMESLICE", "FUEL"]), on=["TIMESLICE", "FUEL"]
-)
-
-# Add 'REGION' column and fill 'GLOBAL' throughout
-sp_demand_df_final["REGION"] = "GLOBAL"
-
-total_demand_df_final = (
-    sp_demand_df_final.groupby(["REGION", "FUEL", "YEAR"], as_index=False)[
-        "total_demand"
-    ]
-    .agg("mean")
-    .rename({"total_demand": "VALUE"}, axis=1)
-)
-
-# Convert SpecifiedAnnualDemand to required units
-total_demand_df_final["VALUE"] = total_demand_df_final["VALUE"].mul(3.6 * 1e-6)
-
-# Generate SpecifiedDemandProfile.csv file
-sp_demand_df_final["VALUE"] = sp_demand_df_final["VALUE"].round(2)
-sp_demand_df_final = sp_demand_df_final[
-    ["REGION", "FUEL", "TIMESLICE", "YEAR", "VALUE"]
-].dropna()
-
-# sp_demand_df_final = apply_dtypes(sp_demand_df_final, "SpecifiedDemandProfile")
-sp_demand_df_final.drop_duplicates(
-    subset=["REGION", "TIMESLICE", "FUEL", "YEAR"], keep="last", inplace=True
-)
-
-sp_demand_df_final.to_csv(
-    os.path.join(output_data_dir, "SpecifiedDemandProfile.csv"), index=None
-)
-
-# CapacityFactor
-
-datetime_ts_df = demand_df[["Datetime", "TIMESLICE"]]
-capfac_all_df = pd.DataFrame(
-    columns=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"]
-)
-
-def capacity_factor(df):
-    df["Datetime"] = pd.to_datetime(df["Datetime"], format="%d/%m/%Y %H:%M")
-    capfac_df = df.set_index("Datetime").join(
-        datetime_ts_df.set_index("Datetime"), on="Datetime"
+    
+    yearsplit_final = pd.DataFrame(
+        list(itertools.product(yearsplit["TIMESLICE"].unique(), years)),
+        columns=["TIMESLICE", "YEAR"],
     )
-    capfac_nodes = [x for x in capfac_df.columns if x not in ["Datetime", "TIMESLICE"]]
-    capfac_df = capfac_df.reset_index().drop("Datetime", axis=1)
-    capfac_df = pd.melt(
-        capfac_df,
+    yearsplit_final = yearsplit_final.join(yearsplit.set_index("TIMESLICE"), on="TIMESLICE")
+    yearsplit_final = apply_dtypes(yearsplit_final, "Year Split")
+    yearsplit_final.to_csv(os.path.join(output_data_dir, "YearSplit.csv"), index=None)
+    
+    
+    #  Calculate SpecifiedAnnualDemand and SpecifiedDemandProfile
+    # ### Calculate SpecifiedAnnualDemand and SpecifiedDemandProfile
+    
+    sp_demand_df = demand_df[
+        [x for x in demand_df.columns if x in demand_nodes or x == "TIMESLICE"]
+    ]
+    
+    sp_demand_df = pd.melt(
+        sp_demand_df,
         id_vars="TIMESLICE",
-        value_vars=capfac_nodes,
+        value_vars=demand_nodes,
         var_name="node",
-        value_name="VALUE",
+        value_name="demand",
     )
-    capfac_df = capfac_df.groupby(["TIMESLICE", "node"], as_index=False).agg("mean")
-    capfac_df["VALUE"] = capfac_df["VALUE"].div(100).round(4)
-
-    ## Filter out country aggregate values for countries with multiple nodes
-    capfac_df = capfac_df.loc[~(capfac_df["node"].isin(country_with_nodes))]
-
+    
+    sp_demand_df = sp_demand_df.groupby(["TIMESLICE", "node"], as_index=False).agg(sum)
+    
+    # Calculate SpecifiedAnnualDemand
+    total_demand_df = (
+        sp_demand_df.drop(columns="TIMESLICE").groupby("node", as_index=False).sum()
+    )
+    
+    total_demand_df.rename({"demand": "total_demand"}, axis=1, inplace=True)
+    
+    sp_demand_df = sp_demand_df.join(total_demand_df.set_index("node"), on="node")
+    
+    # Calculate SpecifiedDemandProfile
+    
+    sp_demand_df["VALUE"] = sp_demand_df["demand"] / sp_demand_df["total_demand"]
+    
+    
+    # Filter out country aggregate values for countries with multiple nodes
+    country_with_nodes = list(
+        (sp_demand_df.loc[sp_demand_df["node"].str.len() > 6, "node"].str[:-3].unique())
+    )
+    
+    sp_demand_df = sp_demand_df.loc[~(sp_demand_df["node"].isin(country_with_nodes))]
+    
+    
     # Rename COMMODITY based on naming convention.
     # Add 'XX' for countries without multiple nodes
-    capfac_df.loc[capfac_df["node"].str.len() == 5, "TECHNOLOGY"] = (
-        "PWR" + df.name + capfac_df["node"] + "01"
+    sp_demand_df.loc[sp_demand_df["node"].str.len() == 5, "FUEL"] = (
+        "ELC" + sp_demand_df["node"] + "02"
     )
     
-    capfac_df.loc[capfac_df["node"].str.len() == 6, "TECHNOLOGY"] = (
-        "PWR" + df.name + capfac_df["node"].str.split("-").str[1:].str.join("") + "XX01"
+    sp_demand_df.loc[sp_demand_df["node"].str.len() == 6, "FUEL"] = (
+        "ELC" + sp_demand_df["node"].str.split("-").str[1:].str.join("") + "XX02"
     )
-
-    capfac_df.loc[capfac_df["node"].str.len() > 6, "TECHNOLOGY"] = (
-        "PWR" + df.name + capfac_df["node"].str.split("-").str[1:].str.join("") + "01"
+    
+    sp_demand_df.loc[sp_demand_df["node"].str.len() > 6, "FUEL"] = (
+        "ELC" + sp_demand_df["node"].str.split("-").str[1:].str.join("") + "02"
     )
     
     # In case custom data is provided only keep the custom data
-    capfac_df.drop_duplicates(subset=['TIMESLICE', 'TECHNOLOGY'], keep = 'last', inplace = True)
-
-    # Create master table for CapacityFactor
-    capfac_df_final = pd.DataFrame(
+    sp_demand_df.drop_duplicates(subset=['TIMESLICE', 'FUEL'], keep = 'last', inplace = True)
+    
+    # Create master table for SpecifiedDemandProfile
+    sp_demand_df_final = pd.DataFrame(
         list(
             itertools.product(
-                capfac_df["TIMESLICE"].unique(), capfac_df["TECHNOLOGY"].unique(), years
+                sp_demand_df["TIMESLICE"].unique(), sp_demand_df["FUEL"].unique(), years
             )
         ),
-        columns=["TIMESLICE", "TECHNOLOGY", "YEAR"],
+        columns=["TIMESLICE", "FUEL", "YEAR"],
     )
-    capfac_df_final = capfac_df_final.join(
-        capfac_df.set_index(["TIMESLICE", "TECHNOLOGY"]), on=["TIMESLICE", "TECHNOLOGY"]
+    sp_demand_df_final = sp_demand_df_final.join(
+        sp_demand_df.set_index(["TIMESLICE", "FUEL"]), on=["TIMESLICE", "FUEL"]
     )
-
+    
     # Add 'REGION' column and fill 'GLOBAL' throughout
-    capfac_df_final["REGION"] = "GLOBAL"
-
-    capfac_df_final = capfac_df_final[
-        ["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"]
-    ]
-
-    return capfac_df_final
-
-
-capfacs = [capfac_all_df]
-for each in [hyd_df_processed, csp_df, spv_df, won_df, wof_df]:
-    capfacs.append(capacity_factor(each))
-capfac_all_df = pd.concat(capfacs).reset_index(drop=True)
-
-capfac_all_df.drop_duplicates(
-    subset=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR"], keep="last", inplace=True
-)
-capfac_all_df.to_csv(os.path.join(output_data_dir, "CapacityFactor.csv"), index=None)
-
-# Create csv for TIMESLICE
-
-# ## Create csv for TIMESLICE
-time_slice_list = list(demand_df["TIMESLICE"].unique())
-time_slice_df = pd.DataFrame(time_slice_list, columns=["VALUE"]).astype(
-    SET_DTYPES["TIMESLICE"]
-)
-time_slice_df.to_csv(os.path.join(output_data_dir, "TIMESLICE.csv"), index=None)
-
-demand_nodes = list(set(list(sp_demand_df_final["FUEL"].str[3:8])))
-
-# Create Conversionls, Conversionld, and Conversionlh
-
-# Conversionls
-df_ls = pd.DataFrame(
-    list(itertools.product(time_slice_list, list(range(1, len(seasons) + 1)))),
-    columns=["TIMESLICE", "SEASON"],
-)
-df_ls.loc[df_ls["TIMESLICE"].str[1:2].astype(int) == df_ls["SEASON"], "VALUE"] = 1
-df_ls.fillna(0, inplace=True)
-df_ls.to_csv(os.path.join(output_data_dir, "Conversionls.csv"), index=None)
-
-df_season_set = pd.DataFrame(list(range(1, len(seasons) + 1)), columns=["VALUE"])
-df_season_set.to_csv(os.path.join(output_data_dir, "SEASON.csv"), index=None)
-
-# Conversionld
-df_ld = pd.DataFrame(
-    list(itertools.product(time_slice_list, [1])), columns=["TIMESLICE", "DAYTYPE"]
-)
-df_ld["VALUE"] = 1
-df_ld.fillna(0, inplace=True)
-df_ld.to_csv(os.path.join(output_data_dir, "Conversionld.csv"), index=None)
-df_daytype_set = pd.DataFrame([1], columns=["VALUE"])
-df_daytype_set.to_csv(os.path.join(output_data_dir, "DAYTYPE.csv"), index=None)
-
-# Conversionlh
-df_lh = pd.DataFrame(
-    list(itertools.product(time_slice_list, list(range(1, len(dayparts) + 1)))),
-    columns=["TIMESLICE", "DAILYTIMEBRACKET"],
-)
-df_lh.loc[
-    df_lh["TIMESLICE"].str[3:].astype(int) == df_lh["DAILYTIMEBRACKET"], "VALUE"
-] = 1
-df_lh.fillna(0, inplace=True)
-df_lh.to_csv(os.path.join(output_data_dir, "Conversionlh.csv"), index=None)
-df_dayparts_set = pd.DataFrame(list(range(1, len(dayparts) + 1)), columns=["VALUE"])
-df_dayparts_set.to_csv(
-    os.path.join(output_data_dir, "DAILYTIMEBRACKET.csv"), index=None
-)
-
-# Daysplit
-
-daysplit = {}
-for dp, hr in dayparts_raw.items():
-    daysplit[int(dp[1:])] = (hr[1] - hr[0]) / 8760
-
-df_daysplit = pd.DataFrame(
-    itertools.product(list(range(1, len(dayparts) + 1)), years),
-    columns=["DAILYTIMEBRACKET", "YEAR"],
-)
-df_daysplit["VALUE"] = df_daysplit["DAILYTIMEBRACKET"].map(daysplit)
-df_daysplit = df_daysplit[["DAILYTIMEBRACKET", "YEAR", "VALUE"]]
-df_daysplit["VALUE"] = df_daysplit["VALUE"].round(4)
-df_daysplit.to_csv(os.path.join(output_data_dir, "DaySplit.csv"), index=None)
+    sp_demand_df_final["REGION"] = "GLOBAL"
+    
+    total_demand_df_final = (
+        sp_demand_df_final.groupby(["REGION", "FUEL", "YEAR"], as_index=False)[
+            "total_demand"
+        ]
+        .agg("mean")
+        .rename({"total_demand": "VALUE"}, axis=1)
+    )
+    
+    # Convert SpecifiedAnnualDemand to required units
+    total_demand_df_final["VALUE"] = total_demand_df_final["VALUE"].mul(3.6 * 1e-6)
+    
+    # Generate SpecifiedDemandProfile.csv file
+    sp_demand_df_final["VALUE"] = sp_demand_df_final["VALUE"].round(2)
+    sp_demand_df_final = sp_demand_df_final[
+        ["REGION", "FUEL", "TIMESLICE", "YEAR", "VALUE"]
+    ].dropna()
+    
+    # sp_demand_df_final = apply_dtypes(sp_demand_df_final, "SpecifiedDemandProfile")
+    sp_demand_df_final.drop_duplicates(
+        subset=["REGION", "TIMESLICE", "FUEL", "YEAR"], keep="last", inplace=True
+    )
+    
+    sp_demand_df_final.to_csv(
+        os.path.join(output_data_dir, "SpecifiedDemandProfile.csv"), index=None
+    )
+    
+    # CapacityFactor
+    
+    datetime_ts_df = demand_df[["Datetime", "TIMESLICE"]]
+    capfac_all_df = pd.DataFrame(
+        columns=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"]
+    )
+    
+    def capacity_factor(df):
+        df["Datetime"] = pd.to_datetime(df["Datetime"], format="%d/%m/%Y %H:%M")
+        capfac_df = df.set_index("Datetime").join(
+            datetime_ts_df.set_index("Datetime"), on="Datetime"
+        )
+        capfac_nodes = [x for x in capfac_df.columns if x not in ["Datetime", "TIMESLICE"]]
+        capfac_df = capfac_df.reset_index().drop("Datetime", axis=1)
+        capfac_df = pd.melt(
+            capfac_df,
+            id_vars="TIMESLICE",
+            value_vars=capfac_nodes,
+            var_name="node",
+            value_name="VALUE",
+        )
+        capfac_df = capfac_df.groupby(["TIMESLICE", "node"], as_index=False).agg("mean")
+        capfac_df["VALUE"] = capfac_df["VALUE"].div(100).round(4)
+    
+        ## Filter out country aggregate values for countries with multiple nodes
+        capfac_df = capfac_df.loc[~(capfac_df["node"].isin(country_with_nodes))]
+    
+        # Rename COMMODITY based on naming convention.
+        # Add 'XX' for countries without multiple nodes
+        capfac_df.loc[capfac_df["node"].str.len() == 5, "TECHNOLOGY"] = (
+            "PWR" + df.name + capfac_df["node"] + "01"
+        )
+        
+        capfac_df.loc[capfac_df["node"].str.len() == 6, "TECHNOLOGY"] = (
+            "PWR" + df.name + capfac_df["node"].str.split("-").str[1:].str.join("") + "XX01"
+        )
+    
+        capfac_df.loc[capfac_df["node"].str.len() > 6, "TECHNOLOGY"] = (
+            "PWR" + df.name + capfac_df["node"].str.split("-").str[1:].str.join("") + "01"
+        )
+        
+        # In case custom data is provided only keep the custom data
+        capfac_df.drop_duplicates(subset=['TIMESLICE', 'TECHNOLOGY'], keep = 'last', inplace = True)
+    
+        # Create master table for CapacityFactor
+        capfac_df_final = pd.DataFrame(
+            list(
+                itertools.product(
+                    capfac_df["TIMESLICE"].unique(), capfac_df["TECHNOLOGY"].unique(), years
+                )
+            ),
+            columns=["TIMESLICE", "TECHNOLOGY", "YEAR"],
+        )
+        capfac_df_final = capfac_df_final.join(
+            capfac_df.set_index(["TIMESLICE", "TECHNOLOGY"]), on=["TIMESLICE", "TECHNOLOGY"]
+        )
+    
+        # Add 'REGION' column and fill 'GLOBAL' throughout
+        capfac_df_final["REGION"] = "GLOBAL"
+    
+        capfac_df_final = capfac_df_final[
+            ["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"]
+        ]
+    
+        return capfac_df_final
+    
+    
+    capfacs = [capfac_all_df]
+    for each in [hyd_df_processed, csp_df, spv_df, won_df, wof_df]:
+        capfacs.append(capacity_factor(each))
+    capfac_all_df = pd.concat(capfacs).reset_index(drop=True)
+    
+    capfac_all_df.drop_duplicates(
+        subset=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR"], keep="last", inplace=True
+    )
+    capfac_all_df.to_csv(os.path.join(output_data_dir, "CapacityFactor.csv"), index=None)
+    
+    # Create csv for TIMESLICE
+    
+    # ## Create csv for TIMESLICE
+    time_slice_list = list(demand_df["TIMESLICE"].unique())
+    time_slice_df = pd.DataFrame(time_slice_list, columns=["VALUE"]).astype(
+        SET_DTYPES["TIMESLICE"]
+    )
+    time_slice_df.to_csv(os.path.join(output_data_dir, "TIMESLICE.csv"), index=None)
+    
+    demand_nodes = list(set(list(sp_demand_df_final["FUEL"].str[3:8])))
+    
+    # Create Conversionls, Conversionld, and Conversionlh
+    
+    # Conversionls
+    df_ls = pd.DataFrame(
+        list(itertools.product(time_slice_list, list(range(1, len(seasons) + 1)))),
+        columns=["TIMESLICE", "SEASON"],
+    )
+    df_ls.loc[df_ls["TIMESLICE"].str[1:2].astype(int) == df_ls["SEASON"], "VALUE"] = 1
+    df_ls.fillna(0, inplace=True)
+    df_ls.to_csv(os.path.join(output_data_dir, "Conversionls.csv"), index=None)
+    
+    df_season_set = pd.DataFrame(list(range(1, len(seasons) + 1)), columns=["VALUE"])
+    df_season_set.to_csv(os.path.join(output_data_dir, "SEASON.csv"), index=None)
+    
+    # Conversionld
+    df_ld = pd.DataFrame(
+        list(itertools.product(time_slice_list, [1])), columns=["TIMESLICE", "DAYTYPE"]
+    )
+    df_ld["VALUE"] = 1
+    df_ld.fillna(0, inplace=True)
+    df_ld.to_csv(os.path.join(output_data_dir, "Conversionld.csv"), index=None)
+    df_daytype_set = pd.DataFrame([1], columns=["VALUE"])
+    df_daytype_set.to_csv(os.path.join(output_data_dir, "DAYTYPE.csv"), index=None)
+    
+    # Conversionlh
+    df_lh = pd.DataFrame(
+        list(itertools.product(time_slice_list, list(range(1, len(dayparts) + 1)))),
+        columns=["TIMESLICE", "DAILYTIMEBRACKET"],
+    )
+    df_lh.loc[
+        df_lh["TIMESLICE"].str[3:].astype(int) == df_lh["DAILYTIMEBRACKET"], "VALUE"
+    ] = 1
+    df_lh.fillna(0, inplace=True)
+    df_lh.to_csv(os.path.join(output_data_dir, "Conversionlh.csv"), index=None)
+    df_dayparts_set = pd.DataFrame(list(range(1, len(dayparts) + 1)), columns=["VALUE"])
+    df_dayparts_set.to_csv(
+        os.path.join(output_data_dir, "DAILYTIMEBRACKET.csv"), index=None
+    )
+    
+    # Daysplit
+    
+    daysplit = {}
+    for dp, hr in dayparts_raw.items():
+        daysplit[int(dp[1:])] = (hr[1] - hr[0]) / 8760
+    
+    df_daysplit = pd.DataFrame(
+        itertools.product(list(range(1, len(dayparts) + 1)), years),
+        columns=["DAILYTIMEBRACKET", "YEAR"],
+    )
+    df_daysplit["VALUE"] = df_daysplit["DAILYTIMEBRACKET"].map(daysplit)
+    df_daysplit = df_daysplit[["DAILYTIMEBRACKET", "YEAR", "VALUE"]]
+    df_daysplit["VALUE"] = df_daysplit["VALUE"].round(4)
+    df_daysplit.to_csv(os.path.join(output_data_dir, "DaySplit.csv"), index=None)
 
-logging.info("Time Slicing Completed")
\ No newline at end of file
+if __name__ == "__main__":
+    
+    if "snakemake" in globals():
+        start_year = snakemake.params.start_year
+        end_year = snakemake.params.end_year
+        region_name = snakemake.params.region_name
+        geographic_scope = snakemake.params.geographic_scope
+        custom_nodes = snakemake.params.custom_nodes
+        output_data_dir = snakemake.params.output_data_dir
+        input_data_dir = snakemake.params.input_data_dir
+        output_dir = snakemake.params.output_dir
+        input_dir = snakemake.params.input_dir
+        custom_nodes_dir = snakemake.params.input_data_dir
+        daytype = snakemake.params.daytype
+        seasons = snakemake.params.seasons
+        dayparts = snakemake.params.dayparts
+        timeshift = snakemake.params.timeshift
+        
+        plexos_demand = snakemake.input.plexos_demand
+        plexos_csp_2015 = snakemake.input.plexos_csp_2015
+        plexos_spv_2015 = snakemake.input.plexos_spv_2015
+        plexos_hyd_2015 = snakemake.input.plexos_hyd_2015
+        plexos_won_2015 = snakemake.input.plexos_won_2015
+        plexos_wof_2015 = snakemake.input.plexos_wof_2015
+        custom_specified_demand_profiles = snakemake.input.custom_specified_demand_profiles
+        custom_csp_profiles = snakemake.input.custom_csp_profiles
+        custom_hyd_profiles = snakemake.input.custom_hyd_profiles
+        custom_spv_profiles = snakemake.input.custom_spv_profiles
+        custom_wof_profiles = snakemake.input.custom_wof_profiles
+        custom_won_profiles = snakemake.input.custom_won_profiles
+        
+    # The below else statement defines variables if the 'powerplant/main' script is to be run locally
+    # outside the snakemake workflow. This is relevant for testing purposes only! User inputs when running 
+    # the full workflow need to be defined in the config file. 
+            
+    else:      
+        start_year = 2021
+        end_year = 2050
+        region_name = 'GLOBAL'
+        geographic_scope = ['BTN', 'IND']
+        custom_nodes = [] 
+        output_data_dir = 'results/data'
+        input_data_dir = 'resources/data'
+        output_dir = 'results'
+        input_dir = 'resources'
+        custom_nodes_dir = 'resources/data/custom_nodes'
+        daytpe = False
+        seasons =   {'S1': [1, 2, 3, 4, 5, 6], 
+                     'S2': [7, 8, 9, 10, 11, 12]}
+        dayparts =   {'D1': [1, 7],
+                      'D2': [7, 13],
+                      'D3': [13, 19],
+                      'D4': [19, 25]}
+        timeshift = 0
+        
+        plexos_demand = pd.read_csv(os.path.join(input_data_dir, 'All_Demand_UTC_2015.csv'), encoding="latin-1")
+        plexos_csp_2015 = pd.read_csv(os.path.join(input_data_dir, 'CSP 2015.csv'), encoding="latin-1")
+        plexos_spv_2015 = pd.read_csv(os.path.join(input_data_dir, 'SolarPV 2015.csv'), encoding="latin-1")
+        plexos_hyd_2015 = pd.read_csv(os.path.join(input_data_dir, 'Hydro_Monthly_Profiles (15 year average).csv'), encoding="latin-1")
+        plexos_won_2015 = pd.read_csv(os.path.join(input_data_dir, 'Won 2015.csv'), encoding="latin-1")
+        plexos_wof_2015 = pd.read_csv(os.path.join(input_data_dir, 'Woff 2015.csv'), encoding="latin-1")
+        custom_specified_demand_profiles = pd.read_csv(os.path.join(custom_nodes_dir, 'specified_demand_profile.csv'))
+        custom_csp_profiles = pd.read_csv(os.path.join(custom_nodes_dir, 'RE_profiles_CSP.csv'), encoding="latin-1")
+        custom_hyd_profiles = pd.read_csv(os.path.join(custom_nodes_dir, 'RE_profiles_HYD.csv'), encoding="latin-1")
+        custom_spv_profiles = pd.read_csv(os.path.join(custom_nodes_dir, 'RE_profiles_SPV.csv'), encoding="latin-1")
+        custom_wof_profiles = pd.read_csv(os.path.join(custom_nodes_dir, 'RE_profiles_WOF.csv'), encoding="latin-1")
+        custom_won_profiles = pd.read_csv(os.path.join(custom_nodes_dir, 'RE_profiles_WON.csv'), encoding="latin-1")      
+
+    # SET INPUT DATA
+    input_data = {
+        "demand_df" : plexos_demand,
+        "csp_df" : plexos_csp_2015,
+        "spv_df" : plexos_spv_2015,
+        "hyd_df" : plexos_hyd_2015,
+        "won_df" : plexos_won_2015,
+        "wof_df" : plexos_wof_2015,
+        "custom_sp_demand_profile" : custom_specified_demand_profiles,
+        "csp_df_custom" : custom_csp_profiles,
+        "hyd_df_custom" : custom_hyd_profiles,
+        "spv_df_custom" : custom_spv_profiles,
+        "wof_df_custom" : custom_wof_profiles,
+        "won_df_custom" : custom_won_profiles,
+        "seasons_raw" : seasons,
+        "dayparts_raw": dayparts,
+    }
+    
+    # CALL MAIN
+    main(**input_data)
\ No newline at end of file

From f8d408238c3894d9eb635907d0d5913414438ec8 Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Fri, 20 Dec 2024 12:23:34 +0100
Subject: [PATCH 5/9] Delate legacy configuration.py

---
 .../scripts/osemosys_global/configuration.py  | 67 -------------------
 1 file changed, 67 deletions(-)
 delete mode 100644 workflow/scripts/osemosys_global/configuration.py

diff --git a/workflow/scripts/osemosys_global/configuration.py b/workflow/scripts/osemosys_global/configuration.py
deleted file mode 100644
index 95d7ebc6..00000000
--- a/workflow/scripts/osemosys_global/configuration.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""Functionality to interface with configuration files. """
-
-from pathlib import Path
-import yaml
-
-
-class ConfigFile:
-    """Class to hold yaml configuration file data
-
-    Args:
-        config_file_name = yaml file name in the config/ folder
-
-    Example:
-        config = ConfigFile('settings')
-        config.get('geographic_scope')
-        -> ['IND','NPL']
-    """
-
-    # non changing parameters
-    region_name = "GLOBAL"
-
-    def __init__(self, config_file_name):
-        self.file_path = Path(
-            Path(__file__).resolve().parent,
-            "../../../config",
-            f"{config_file_name}.yaml",
-        )
-
-    def get(self, name):
-        with open(self.file_path, encoding="utf-8") as yaml_file:
-            parsed_yaml_file = yaml.load(yaml_file, Loader=yaml.FullLoader).get(name)
-        return parsed_yaml_file
-
-    def get_years(self):
-        start_year = self.get("startYear")
-        end_year = self.get("endYear")
-        return list(range(start_year, end_year + 1))
-
-
-class ConfigPaths:
-    """Class to hold relative paths from file called from."""
-
-    # Hard coded file structure
-    input_dir_name = "resources"
-    output_dir_name = "results"
-    py_file_dir = Path(__file__).resolve().parent  # folder of this module
-
-    def __init__(self):
-        self.input_dir = Path(self.py_file_dir, "../../../", self.input_dir_name)
-        self.input_data_dir = Path(self.input_dir, "data")
-
-        self.output_dir = Path(self.py_file_dir, "../../../", self.output_dir_name)
-        self.output_data_dir = Path(self.output_dir, "data")
-
-        self.scenario_dir = Path(self.output_dir, self.get_scenario_name())
-        self.scenario_data_dir = Path(self.scenario_dir, "data")
-        self.scenario_figs_dir = Path(self.scenario_dir, "figures")
-        self.scenario_results_dir = Path(self.scenario_dir, "results")
-        self.scenario_result_summaries_dir = Path(self.scenario_dir, "result_summaries")
-
-        self.otoole = Path(self.input_dir, "otoole")
-
-        self.custom_nodes_dir = Path(self.input_dir, "data/custom_nodes")
-
-    def get_scenario_name(self):
-        config = ConfigFile("config")
-        return config.get("scenario")

From e8a7b6180d2215d288c06f24297981d1f3d44ddc Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Fri, 20 Dec 2024 13:59:27 +0100
Subject: [PATCH 6/9] snakemake ts data fix

---
 workflow/rules/preprocess.smk               |  4 +--
 workflow/scripts/osemosys_global/TS_data.py | 30 ++++++++++-----------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/workflow/rules/preprocess.smk b/workflow/rules/preprocess.smk
index fd5b2518..8695dcd4 100644
--- a/workflow/rules/preprocess.smk
+++ b/workflow/rules/preprocess.smk
@@ -317,8 +317,8 @@ rule timeslice:
         csv_files = expand('results/data/{output_file}.csv', output_file=timeslice_files),
     log:
         log = 'results/logs/timeslice.log'    
-    shell:
-        'python workflow/scripts/osemosys_global/TS_data.py 2> {log}'
+    script:
+        "../scripts/osemosys_global/TS_data.py"
         
 rule reserves:
     message:
diff --git a/workflow/scripts/osemosys_global/TS_data.py b/workflow/scripts/osemosys_global/TS_data.py
index 60d7affc..1f7e1eac 100644
--- a/workflow/scripts/osemosys_global/TS_data.py
+++ b/workflow/scripts/osemosys_global/TS_data.py
@@ -176,7 +176,7 @@ def correct_datetime_formatting(time_str):
     # ### Create column for timeslice with and without day-type
     
     
-    if daytpe:
+    if daytype:
         demand_df["TIMESLICE"] = (
             demand_df["Season"] + demand_df["Day-of-week"] + demand_df["Daypart"]
         )
@@ -451,7 +451,6 @@ def capacity_factor(df):
         end_year = snakemake.params.end_year
         region_name = snakemake.params.region_name
         geographic_scope = snakemake.params.geographic_scope
-        custom_nodes = snakemake.params.custom_nodes
         output_data_dir = snakemake.params.output_data_dir
         input_data_dir = snakemake.params.input_data_dir
         output_dir = snakemake.params.output_dir
@@ -462,18 +461,18 @@ def capacity_factor(df):
         dayparts = snakemake.params.dayparts
         timeshift = snakemake.params.timeshift
         
-        plexos_demand = snakemake.input.plexos_demand
-        plexos_csp_2015 = snakemake.input.plexos_csp_2015
-        plexos_spv_2015 = snakemake.input.plexos_spv_2015
-        plexos_hyd_2015 = snakemake.input.plexos_hyd_2015
-        plexos_won_2015 = snakemake.input.plexos_won_2015
-        plexos_wof_2015 = snakemake.input.plexos_wof_2015
-        custom_specified_demand_profiles = snakemake.input.custom_specified_demand_profiles
-        custom_csp_profiles = snakemake.input.custom_csp_profiles
-        custom_hyd_profiles = snakemake.input.custom_hyd_profiles
-        custom_spv_profiles = snakemake.input.custom_spv_profiles
-        custom_wof_profiles = snakemake.input.custom_wof_profiles
-        custom_won_profiles = snakemake.input.custom_won_profiles
+        plexos_demand = pd.read_csv(snakemake.input.plexos_demand)
+        plexos_csp_2015 = pd.read_csv(snakemake.input.plexos_csp_2015)
+        plexos_spv_2015 = pd.read_csv(snakemake.input.plexos_spv_2015)
+        plexos_hyd_2015 = pd.read_csv(snakemake.input.plexos_hyd_2015)
+        plexos_won_2015 = pd.read_csv(snakemake.input.plexos_won_2015)
+        plexos_wof_2015 = pd.read_csv(snakemake.input.plexos_wof_2015)
+        custom_specified_demand_profiles = pd.read_csv(snakemake.input.custom_specified_demand_profiles)
+        custom_csp_profiles = pd.read_csv(snakemake.input.custom_csp_profiles)
+        custom_hyd_profiles = pd.read_csv(snakemake.input.custom_hyd_profiles)
+        custom_spv_profiles = pd.read_csv(snakemake.input.custom_spv_profiles)
+        custom_wof_profiles = pd.read_csv(snakemake.input.custom_wof_profiles)
+        custom_won_profiles = pd.read_csv(snakemake.input.custom_won_profiles)
         
     # The below else statement defines variables if the 'powerplant/main' script is to be run locally
     # outside the snakemake workflow. This is relevant for testing purposes only! User inputs when running 
@@ -484,13 +483,12 @@ def capacity_factor(df):
         end_year = 2050
         region_name = 'GLOBAL'
         geographic_scope = ['BTN', 'IND']
-        custom_nodes = [] 
         output_data_dir = 'results/data'
         input_data_dir = 'resources/data'
         output_dir = 'results'
         input_dir = 'resources'
         custom_nodes_dir = 'resources/data/custom_nodes'
-        daytpe = False
+        daytype = False
         seasons =   {'S1': [1, 2, 3, 4, 5, 6], 
                      'S2': [7, 8, 9, 10, 11, 12]}
         dayparts =   {'D1': [1, 7],

From 8d04482587478a0438372c8f7c87867b95cd3c90 Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Fri, 20 Dec 2024 14:35:55 +0100
Subject: [PATCH 7/9] ts data warning fixes

---
 workflow/scripts/osemosys_global/TS_data.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/workflow/scripts/osemosys_global/TS_data.py b/workflow/scripts/osemosys_global/TS_data.py
index 1f7e1eac..f4232021 100644
--- a/workflow/scripts/osemosys_global/TS_data.py
+++ b/workflow/scripts/osemosys_global/TS_data.py
@@ -65,7 +65,7 @@ def main(
     node_region_dict = dict(zip(nodes, regions))
         
     hyd_df = hyd_df.loc[hyd_df["NAME"].str.endswith("Capacity Scaler")]
-    hyd_df["NAME"] = hyd_df["NAME"].str.split("_").str[0]
+    hyd_df.loc[:,"NAME"] = hyd_df["NAME"].str.split("_").str[0]
 
     hyd_df = pd.concat([hyd_df, hyd_df_custom])
     
@@ -132,11 +132,10 @@ def correct_datetime_formatting(time_str):
     )
     
     # Create column for weekday/weekend
-    demand_df["Day-of-week"] = demand_df["Datetime"].dt.dayofweek
-    demand_df.loc[demand_df["Day-of-week"] < 5, "Day-of-week"] = "WD"
+    demand_df["Day-of-week"] = demand_df["Datetime"].dt.dayofweek.astype(str)
+    demand_df.loc[demand_df["Day-of-week"].isin([0,1,2,3,4]), "Day-of-week"] = "WD"
     demand_df.loc[demand_df["Day-of-week"] != "WD", "Day-of-week"] = "WE"
     
-    
     # ### Create dictionaries for 'seasons' and 'dayparts'
     
     seasons_dict = dict(zip(list(seasons_df["month"]), list(seasons_df["season"])))
@@ -155,7 +154,7 @@ def correct_datetime_formatting(time_str):
     
     
     demand_df["Season"] = demand_df["Month"]
-    demand_df["Season"].replace(seasons_dict, inplace=True)
+    demand_df["Season"] = demand_df["Season"].replace(seasons_dict)
     
     demand_df["Hour"] = demand_df["Hour"].map(lambda x: apply_timeshift(int(x), timeshift))
     for daypart in dayparts_dict:
@@ -219,7 +218,7 @@ def correct_datetime_formatting(time_str):
         value_name="demand",
     )
     
-    sp_demand_df = sp_demand_df.groupby(["TIMESLICE", "node"], as_index=False).agg(sum)
+    sp_demand_df = sp_demand_df.groupby(["TIMESLICE", "node"], as_index=False).sum()
     
     # Calculate SpecifiedAnnualDemand
     total_demand_df = (
@@ -305,9 +304,6 @@ def correct_datetime_formatting(time_str):
     # CapacityFactor
     
     datetime_ts_df = demand_df[["Datetime", "TIMESLICE"]]
-    capfac_all_df = pd.DataFrame(
-        columns=["REGION", "TECHNOLOGY", "TIMESLICE", "YEAR", "VALUE"]
-    )
     
     def capacity_factor(df):
         df["Datetime"] = pd.to_datetime(df["Datetime"], format="%d/%m/%Y %H:%M")
@@ -368,10 +364,10 @@ def capacity_factor(df):
     
         return capfac_df_final
     
-    
-    capfacs = [capfac_all_df]
+    capfacs = []
     for each in [hyd_df_processed, csp_df, spv_df, won_df, wof_df]:
         capfacs.append(capacity_factor(each))
+    
     capfac_all_df = pd.concat(capfacs).reset_index(drop=True)
     
     capfac_all_df.drop_duplicates(

From e482967ba734303be1dd1a3e706b9f0cfb23e98d Mon Sep 17 00:00:00 2001
From: maartenbrinkerink <65602545+maartenbrinkerink@users.noreply.github.com>
Date: Mon, 30 Dec 2024 09:07:50 -0500
Subject: [PATCH 8/9] Update to LINEAR emission limit calculations

Previously historical emission data from EMBER was only pulled if the historical year was within the model horizon. However, this meant that in certain instances no LINEAR interpolation could be applied. Changes have been made to pull the latest historical data year, perform the interpolation, and drop historical years that are out of the model horizon.
---
 .../osemosys_global/emissions/emission_limit.py  | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/workflow/scripts/osemosys_global/emissions/emission_limit.py b/workflow/scripts/osemosys_global/emissions/emission_limit.py
index 0a690d73..ab87ad40 100644
--- a/workflow/scripts/osemosys_global/emissions/emission_limit.py
+++ b/workflow/scripts/osemosys_global/emissions/emission_limit.py
@@ -52,7 +52,7 @@ def add_emission_limits(emissions, emission_limit, ember,
                 
                 # Filter template df for given country and emission
                 data = template.copy().loc[template["EMISSION"].isin(
-                    [emission + country])]
+                    [emission + country])].reset_index(drop = True)
                      
                 # Loop through limits per country
                 for idx, row in limits.iterrows():
@@ -68,12 +68,25 @@ def add_emission_limits(emissions, emission_limit, ember,
                         if data_type == 'LINEAR':
                             ember_data = ember.copy().loc[
                                 (ember["EMISSION"].isin([emission + country]))]
+
                             # Check if year for country specific EMBER data exists in horizon.
                             for year in ember_data["YEAR"].unique():
                                 if year in data["YEAR"].values:
                                     # Set baseline year data for LINEAR
                                     data.loc[(data["YEAR"] == year),"VALUE"] = ember_data.loc[
                                         (ember_data["YEAR"] == year)]['VALUE'].iloc[0]
+                                    
+                            # If none of EMBER data years exist in horizon, pull latest EMBER year
+                            # to be able to set linear interpolation values.
+                            if ember_data["YEAR"].unique()[-1] < data["YEAR"].values[0]:
+                                data.loc[len(data)] = [data['EMISSION'].iloc[-1], 
+                                                       ember_data["YEAR"].unique()[-1]]
+                                
+                                data = data.sort_values(by = ['YEAR']).reset_index(drop = True)
+                                
+                                data.loc[(data["YEAR"] == ember_data["YEAR"].unique()[-1]),"VALUE"
+                                         ] = ember_data.loc[
+                                             (ember_data["YEAR"] == year)]['VALUE'].iloc[0]
 
                         # Set baseline year data for POINT
                         data.loc[(data["YEAR"] == data_year),"VALUE"] = data_value     
@@ -104,6 +117,7 @@ def add_emission_limits(emissions, emission_limit, ember,
                 # Format df
                 data.dropna(axis=0, inplace=True)
                 data["REGION"] = region_name
+                data = data.loc[data["YEAR"].between(start_year, end_year)]
                 data = data[["REGION", "EMISSION", "YEAR", "VALUE"]]
                 
                 if annual_emission_limit.empty:

From 4d42f82e92c6627b8d129aa1bb06ffd39b87fae8 Mon Sep 17 00:00:00 2001
From: trevorb1 <trevor_barnes@sfu.ca>
Date: Tue, 7 Jan 2025 12:56:56 -0800
Subject: [PATCH 9/9] remove manual total cost calculation

---
 resources/osemosys_fast_preprocessed.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/resources/osemosys_fast_preprocessed.txt b/resources/osemosys_fast_preprocessed.txt
index fb19e7f9..a2ca0761 100644
--- a/resources/osemosys_fast_preprocessed.txt
+++ b/resources/osemosys_fast_preprocessed.txt
@@ -504,7 +504,6 @@ s.t. OC3_OperatingCostsTotalAnnual{r in REGION, t in TECHNOLOGY, y in YEAR}: (((
 #
 #s.t. TDC1_TotalDiscountedCostByTechnology{r in REGION, t in TECHNOLOGY, y in YEAR}: ((((sum{yy in YEAR: y-yy < OperationalLife[r,t] && y-yy>=0} NewCapacity[r,t,yy])+ ResidualCapacity[r,t,y])*FixedCost[r,t,y] + sum{m in MODEperTECHNOLOGY[t], l in TIMESLICE} RateOfActivity[r,l,t,m,y]*YearSplit[l,y]*VariableCost[r,t,m,y])/DiscountFactorMid[r,y]+CapitalCost[r,t,y] * NewCapacity[r,t,y]/DiscountFactor[r,y]+DiscountedTechnologyEmissionsPenalty[r,t,y]-DiscountedSalvageValue[r,t,y]) = TotalDiscountedCostByTechnology[r,t,y];
 #s.t. TDC2_TotalDiscountedCost{r in REGION, y in YEAR}: sum{t in TECHNOLOGY}((((sum{yy in YEAR: y-yy < OperationalLife[r,t] && y-yy>=0} NewCapacity[r,t,yy])+ ResidualCapacity[r,t,y])*FixedCost[r,t,y] + sum{m in MODEperTECHNOLOGY[t], l in TIMESLICE} RateOfActivity[r,l,t,m,y]*YearSplit[l,y]*VariableCost[r,t,m,y])/DiscountFactorMid[r,y]+CapitalCost[r,t,y] * NewCapacity[r,t,y]/DiscountFactor[r,y]+DiscountedTechnologyEmissionsPenalty[r,t,y]-DiscountedSalvageValue[r,t,y]) + sum{s in STORAGE} (CapitalCostStorage[r,s,y] * NewStorageCapacity[r,s,y]/DiscountFactor[r,y]-CapitalCostStorage[r,s,y] * NewStorageCapacity[r,s,y]/DiscountFactor[r,y]) = TotalDiscountedCost[r,y];
-s.t. TDC2_TotalDiscountedCost{r in REGION, y in YEAR}: sum{t in TECHNOLOGY}(OperatingCost[r,t,y] + CapitalInvestment[r,t,y] + AnnualTechnologyEmissionsPenalty[r,t,y]) + sum{s in STORAGE} (CapitalInvestmentStorage[r,s,y]) = TotalDiscountedCost[r,y];
 #
 #########                      Total Capacity Constraints         ##############
 #