diff --git a/utils/csv-modifications.jl b/utils/csv-modifications.jl index 1280b0dd..654a16c6 100644 --- a/utils/csv-modifications.jl +++ b/utils/csv-modifications.jl @@ -72,6 +72,24 @@ function add_column( add_column(tulipa_csv, unit, colname, content, position) end +""" + unit, content = get_column(tulipa_csv, colname) + unit, content = get_column(tulipa_csv, position) + +Returns column `colname` or column at position `position`. +""" +function get_column(tulipa_csv::TulipaCSV, position::Int) + unit = tulipa_csv.units[position] + content = tulipa_csv.csv[:, position] + + return unit, content +end + +function get_column(tulipa_csv::TulipaCSV, colname) + position = columnindex(tulipa_csv.csv, Symbol(colname)) + return get_column(tulipa_csv, position) +end + """ unit, content = remove_column(tulipa_csv, colname, position) unit, content = remove_column(tulipa_csv, colname) diff --git a/utils/scripts/assets-data-revamp.jl b/utils/scripts/assets-data-revamp.jl new file mode 100644 index 00000000..27d2c48e --- /dev/null +++ b/utils/scripts/assets-data-revamp.jl @@ -0,0 +1,346 @@ +using DataFrames, DuckDB, TulipaIO, TulipaEnergyModel, TulipaEnergyModel + +include("../csv-modifications.jl") + +# Local cleanup to ensure clean files +apply_to_files_named(rm, "asset.csv") +apply_to_files_named(rm, "asset_milestone.csv") +apply_to_files_named(rm, "asset_commission.csv") +apply_to_files_named(rm, "asset_both.csv") +apply_to_files_named(rm, "flow.csv") +apply_to_files_named(rm, "flow_milestone.csv") +apply_to_files_named(rm, "flow_commission.csv") +apply_to_files_named(rm, "flow_both.csv") +run(`git restore test/inputs/ benchmark/EU/`) + +# FIXME: Two files have fixed_cost_storage_energy and investment_cost_storage_energy +# FIXME: Check definitions of WHERE. Some files will be missing rows if year=commission_year is the only condition +# FIXME: Definition of decommissionable? + +#= + TABLE asset <- graph-assets-data + + name -> asset + type + group + capacity + min_operating_point -> ANY FROM assets_data + investment_method + technical_lifetime + economic_lifetime + discount_rate + consumer_balance_sense -> ANY FROM assets_data + capacity_storage_energy + is_seasonal -> ANY FROM assets_data + use_binary_storage_method -> ANY FROM assets_data + unit_commitment -> ANY FROM assets_data + unit_commitment_method -> ANY FROM assets_data + unit_commitment_integer -> ANY FROM assets_data + ramping -> ANY FROM assets_data +=# +apply_to_files_named("asset.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = + DuckDB.query( + con, + "SELECT + gad.name as asset, + ANY_VALUE(type) AS type, + ANY_VALUE(gad.group) AS group, + ANY_VALUE(gad.capacity) AS capacity, + ANY_VALUE(ad.min_operating_point) AS min_operating_point, + ANY_VALUE(investment_method) AS investment_method, + ANY_VALUE(technical_lifetime) AS technical_lifetime, + ANY_VALUE(economic_lifetime) AS economic_lifetime, + ANY_VALUE(discount_rate) AS discount_rate, + ANY_VALUE(consumer_balance_sense) AS consumer_balance_sense, + ANY_VALUE(capacity_storage_energy) AS capacity_storage_energy, + ANY_VALUE(is_seasonal) AS is_seasonal, + ANY_VALUE(use_binary_storage_method) AS use_binary_storage_method, + ANY_VALUE(unit_commitment) AS unit_commitment, + ANY_VALUE(unit_commitment_method) AS unit_commitment_method, + ANY_VALUE(unit_commitment_integer) AS unit_commitment_integer, + ANY_VALUE(ramping) AS ramping, + FROM graph_assets_data AS gad + LEFT JOIN assets_data AS ad + ON gad.name = ad.name + GROUP BY asset + ORDER BY asset + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +#= + TABLE flow = graph-flows-data +=# +apply_to_files_named("flow.csv"; include_missing = true) do path + cp(joinpath(dirname(path), "graph-flows-data.csv"), path) +end + +#= + TABLE asset_milestone + + asset -> ? + milestone_year -> ? + peak_demand -> assets_data WHERE year=commission_year + storage_inflows -> assets_data WHERE year=commission_year + initial_storage_level -> assets_data WHERE year=commission_year + max_energy_timeframe_partition -> assets_data WHERE year=commission_year + min_energy_timeframe_partition -> assets_data WHERE year=commission_year + units_on_cost -> assets_data WHERE year=commission_year + max_ramp_up -> assets_data WHERE year=commission_year + max_ramp_down -> assets_data WHERE year=commission_year +=# +apply_to_files_named("asset_milestone.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = DuckDB.query( + con, + "SELECT + name as asset, + year as milestone_year, + peak_demand, + storage_inflows, + initial_storage_level, + max_energy_timeframe_partition, + min_energy_timeframe_partition, + units_on_cost, + max_ramp_up, + max_ramp_down, + FROM assets_data AS ad + WHERE year=commission_year + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +#= + TABLE flow_milestone + + from_asset + to_asset + milestone_year + variable_cost + efficiency +=# +apply_to_files_named("flow_milestone.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = DuckDB.query( + con, + "SELECT + from_asset, + to_asset, + year AS milestone_year, + variable_cost, + efficiency, + FROM flows_data AS fd + WHERE year=commission_year + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +#= + TABLE asset_commission <- vintage-assets-data + + keep: + name -> asset + commission_year + fixed_cost + investment_cost + fixed_cost_storage_energy + investment_cost_storage_energy + storage_method_energy -> assets_data WHERE year=commission_year + energy_to_power_ratio -> assets_data WHERE year=commission_year +=# +apply_to_files_named("asset_commission.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = DuckDB.query( + con, + "SELECT + vad.name as asset, + vad.commission_year, + fixed_cost, + investment_cost, + vad.fixed_cost_storage_energy, + vad.investment_cost_storage_energy, + storage_method_energy, + energy_to_power_ratio, + FROM vintage_assets_data AS vad + LEFT JOIN assets_data AS ad + ON vad.name = ad.name + AND vad.commission_year = ad.commission_year + AND vad.commission_year = ad.year + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +#= + TABLE flow_commission + + from_asset + to_asset + commission_year + fixed_cost + investment_cost +=# +apply_to_files_named("flow_commission.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = DuckDB.query( + con, + "SELECT + vfd.from_asset, + vfd.to_asset, + vfd.commission_year, + fixed_cost, + investment_cost, + FROM vintage_flows_data AS vfd + LEFT JOIN flows_data AS fd + ON vfd.from_asset = fd.from_asset + AND vfd.to_asset = fd.to_asset + AND fd.year = fd.commission_year + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +#= + TABLE asset_both <- assets-data + + keep: + name -> asset + year -> milestone_year + commission_year + active + investable + decommissionable -> ? + investment_integer + investment_limit + initial_units + investment_integer_storage_energy + investment_limit_storage_energy + initial_storage_units +=# +apply_to_files_named("asset_both.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = DuckDB.query( + con, + "SELECT + name as asset, + year as milestone_year, + commission_year, + active, + investable, + investable AS decommissionable, + investment_integer, + investment_limit, + initial_units, + investment_integer_storage_energy, + investment_limit_storage_energy, + initial_storage_level, + FROM assets_data AS ad + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +#= + TABLE flow_both + + from_asset + to_asset + milestone_year + commission_year + active + investable + decommissionable + investment_integer + investment_limit + initial_export_units + initial_import_units +=# +apply_to_files_named("flow_both.csv"; include_missing = true) do path + touch(path) + change_file(path) do tcsv + dirpath = dirname(path) + con = DBInterface.connect(DuckDB.DB) + schemas = TulipaEnergyModel.schema_per_table_name + read_csv_folder(con, dirpath; schemas) + + tcsv.csv = DuckDB.query( + con, + "SELECT + from_asset, + to_asset, + year AS milestone_year, + commission_year, + active, + investable, + investable AS decommissionable, + investment_integer, + investment_limit, + initial_export_units, + initial_import_units, + FROM flows_data AS fd + ", + ) |> DataFrame + + tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)] + end +end + +# Remove old files +for a_or_f in ("assets", "flows"), + filename in ("$a_or_f-data", "graph-$a_or_f-data", "vintage-$a_or_f-data") + + apply_to_files_named(rm, filename) +end