Add script assets-data-revamp

TulipaEnergy · Nov 20, 2024 · 5e581f4 · 5e581f4
1 parent e7a6000
commit 5e581f4
Show file tree

Hide file tree

Showing 2 changed files with 364 additions and 0 deletions.
diff --git a/utils/csv-modifications.jl b/utils/csv-modifications.jl
@@ -72,6 +72,24 @@ function add_column(
     add_column(tulipa_csv, unit, colname, content, position)
 end
 
+"""
+    unit, content = get_column(tulipa_csv, colname)
+    unit, content = get_column(tulipa_csv, position)
+
+Returns column `colname` or column at position `position`.
+"""
+function get_column(tulipa_csv::TulipaCSV, position::Int)
+    unit = tulipa_csv.units[position]
+    content = tulipa_csv.csv[:, position]
+
+    return unit, content
+end
+
+function get_column(tulipa_csv::TulipaCSV, colname)
+    position = columnindex(tulipa_csv.csv, Symbol(colname))
+    return get_column(tulipa_csv, position)
+end
+
 """
     unit, content = remove_column(tulipa_csv, colname, position)
     unit, content = remove_column(tulipa_csv, colname)

diff --git a/utils/scripts/assets-data-revamp.jl b/utils/scripts/assets-data-revamp.jl
@@ -0,0 +1,346 @@
+using DataFrames, DuckDB, TulipaIO, TulipaEnergyModel, TulipaEnergyModel
+
+include("../csv-modifications.jl")
+
+# Local cleanup to ensure clean files
+apply_to_files_named(rm, "asset.csv")
+apply_to_files_named(rm, "asset_milestone.csv")
+apply_to_files_named(rm, "asset_commission.csv")
+apply_to_files_named(rm, "asset_both.csv")
+apply_to_files_named(rm, "flow.csv")
+apply_to_files_named(rm, "flow_milestone.csv")
+apply_to_files_named(rm, "flow_commission.csv")
+apply_to_files_named(rm, "flow_both.csv")
+run(`git restore test/inputs/ benchmark/EU/`)
+
+# FIXME: Two files have fixed_cost_storage_energy and investment_cost_storage_energy
+# FIXME: Check definitions of WHERE. Some files will be missing rows if year=commission_year is the only condition
+# FIXME: Definition of decommissionable?
+
+#=
+    TABLE asset <- graph-assets-data
+
+    name -> asset
+    type
+    group
+    capacity
+    min_operating_point -> ANY FROM assets_data
+    investment_method
+    technical_lifetime
+    economic_lifetime
+    discount_rate
+    consumer_balance_sense -> ANY FROM assets_data
+    capacity_storage_energy
+    is_seasonal -> ANY FROM assets_data
+    use_binary_storage_method -> ANY FROM assets_data
+    unit_commitment -> ANY FROM assets_data
+    unit_commitment_method -> ANY FROM assets_data
+    unit_commitment_integer -> ANY FROM assets_data
+    ramping -> ANY FROM assets_data
+=#
+apply_to_files_named("asset.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv =
+            DuckDB.query(
+                con,
+                "SELECT
+                    gad.name as asset,
+                    ANY_VALUE(type) AS type,
+                    ANY_VALUE(gad.group) AS group,
+                    ANY_VALUE(gad.capacity) AS capacity,
+                    ANY_VALUE(ad.min_operating_point) AS min_operating_point,
+                    ANY_VALUE(investment_method) AS investment_method,
+                    ANY_VALUE(technical_lifetime) AS technical_lifetime,
+                    ANY_VALUE(economic_lifetime) AS economic_lifetime,
+                    ANY_VALUE(discount_rate) AS discount_rate,
+                    ANY_VALUE(consumer_balance_sense) AS consumer_balance_sense,
+                    ANY_VALUE(capacity_storage_energy) AS capacity_storage_energy,
+                    ANY_VALUE(is_seasonal) AS is_seasonal,
+                    ANY_VALUE(use_binary_storage_method) AS use_binary_storage_method,
+                    ANY_VALUE(unit_commitment) AS unit_commitment,
+                    ANY_VALUE(unit_commitment_method) AS unit_commitment_method,
+                    ANY_VALUE(unit_commitment_integer) AS unit_commitment_integer,
+                    ANY_VALUE(ramping) AS ramping,
+                FROM graph_assets_data AS gad
+                LEFT JOIN assets_data AS ad
+                    ON gad.name = ad.name
+                GROUP BY asset
+                ORDER BY asset
+                ",
+            ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+#=
+    TABLE flow = graph-flows-data
+=#
+apply_to_files_named("flow.csv"; include_missing = true) do path
+    cp(joinpath(dirname(path), "graph-flows-data.csv"), path)
+end
+
+#=
+    TABLE asset_milestone
+
+    asset -> ?
+    milestone_year -> ?
+    peak_demand -> assets_data WHERE year=commission_year
+    storage_inflows -> assets_data WHERE year=commission_year
+    initial_storage_level -> assets_data WHERE year=commission_year
+    max_energy_timeframe_partition  -> assets_data WHERE year=commission_year
+    min_energy_timeframe_partition -> assets_data WHERE year=commission_year
+    units_on_cost -> assets_data WHERE year=commission_year
+    max_ramp_up -> assets_data WHERE year=commission_year
+    max_ramp_down -> assets_data WHERE year=commission_year
+=#
+apply_to_files_named("asset_milestone.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv = DuckDB.query(
+            con,
+            "SELECT
+                name as asset,
+                year as milestone_year,
+                peak_demand,
+                storage_inflows,
+                initial_storage_level,
+                max_energy_timeframe_partition,
+                min_energy_timeframe_partition,
+                units_on_cost,
+                max_ramp_up,
+                max_ramp_down,
+            FROM assets_data AS ad
+            WHERE year=commission_year
+            ",
+        ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+#=
+    TABLE flow_milestone
+
+    from_asset
+    to_asset
+    milestone_year
+    variable_cost
+    efficiency
+=#
+apply_to_files_named("flow_milestone.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv = DuckDB.query(
+            con,
+            "SELECT
+                from_asset,
+                to_asset,
+                year AS milestone_year,
+                variable_cost,
+                efficiency,
+            FROM flows_data AS fd
+            WHERE year=commission_year
+            ",
+        ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+#=
+    TABLE asset_commission <- vintage-assets-data
+
+    keep:
+    name -> asset
+    commission_year
+    fixed_cost
+    investment_cost
+    fixed_cost_storage_energy
+    investment_cost_storage_energy
+    storage_method_energy -> assets_data WHERE year=commission_year
+    energy_to_power_ratio -> assets_data WHERE year=commission_year
+=#
+apply_to_files_named("asset_commission.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv = DuckDB.query(
+            con,
+            "SELECT
+                vad.name as asset,
+                vad.commission_year,
+                fixed_cost,
+                investment_cost,
+                vad.fixed_cost_storage_energy,
+                vad.investment_cost_storage_energy,
+                storage_method_energy,
+                energy_to_power_ratio,
+            FROM vintage_assets_data AS vad
+            LEFT JOIN assets_data AS ad
+                ON vad.name = ad.name
+                AND vad.commission_year = ad.commission_year
+                AND vad.commission_year = ad.year
+            ",
+        ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+#=
+    TABLE flow_commission
+
+    from_asset
+    to_asset
+    commission_year
+    fixed_cost
+    investment_cost
+=#
+apply_to_files_named("flow_commission.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv = DuckDB.query(
+            con,
+            "SELECT
+                vfd.from_asset,
+                vfd.to_asset,
+                vfd.commission_year,
+                fixed_cost,
+                investment_cost,
+            FROM vintage_flows_data AS vfd
+            LEFT JOIN flows_data AS fd
+                ON vfd.from_asset = fd.from_asset
+                AND vfd.to_asset = fd.to_asset
+                AND fd.year = fd.commission_year
+            ",
+        ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+#=
+    TABLE asset_both <- assets-data
+
+    keep:
+    name -> asset
+    year -> milestone_year
+    commission_year
+    active
+    investable
+    decommissionable -> ?
+    investment_integer
+    investment_limit
+    initial_units
+    investment_integer_storage_energy
+    investment_limit_storage_energy
+    initial_storage_units
+=#
+apply_to_files_named("asset_both.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv = DuckDB.query(
+            con,
+            "SELECT
+                name as asset,
+                year as milestone_year,
+                commission_year,
+                active,
+                investable,
+                investable AS decommissionable,
+                investment_integer,
+                investment_limit,
+                initial_units,
+                investment_integer_storage_energy,
+                investment_limit_storage_energy,
+                initial_storage_level,
+            FROM assets_data AS ad
+            ",
+        ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+#=
+    TABLE flow_both
+
+    from_asset
+    to_asset
+    milestone_year
+    commission_year
+    active
+    investable
+    decommissionable
+    investment_integer
+    investment_limit
+    initial_export_units
+    initial_import_units
+=#
+apply_to_files_named("flow_both.csv"; include_missing = true) do path
+    touch(path)
+    change_file(path) do tcsv
+        dirpath = dirname(path)
+        con = DBInterface.connect(DuckDB.DB)
+        schemas = TulipaEnergyModel.schema_per_table_name
+        read_csv_folder(con, dirpath; schemas)
+
+        tcsv.csv = DuckDB.query(
+            con,
+            "SELECT
+                from_asset,
+                to_asset,
+                year AS milestone_year,
+                commission_year,
+                active,
+                investable,
+                investable AS decommissionable,
+                investment_integer,
+                investment_limit,
+                initial_export_units,
+                initial_import_units,
+            FROM flows_data AS fd
+            ",
+        ) |> DataFrame
+
+        tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
+    end
+end
+
+# Remove old files
+for a_or_f in ("assets", "flows"),
+    filename in ("$a_or_f-data", "graph-$a_or_f-data", "vintage-$a_or_f-data")
+
+    apply_to_files_named(rm, filename)
+end