Skip to content

Commit

Permalink
Add script assets-data-revamp
Browse files Browse the repository at this point in the history
  • Loading branch information
abelsiqueira committed Nov 20, 2024
1 parent e7a6000 commit 5e581f4
Show file tree
Hide file tree
Showing 2 changed files with 364 additions and 0 deletions.
18 changes: 18 additions & 0 deletions utils/csv-modifications.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ function add_column(
add_column(tulipa_csv, unit, colname, content, position)
end

"""
unit, content = get_column(tulipa_csv, colname)
unit, content = get_column(tulipa_csv, position)
Returns column `colname` or column at position `position`.
"""
function get_column(tulipa_csv::TulipaCSV, position::Int)
unit = tulipa_csv.units[position]
content = tulipa_csv.csv[:, position]

return unit, content
end

function get_column(tulipa_csv::TulipaCSV, colname)
position = columnindex(tulipa_csv.csv, Symbol(colname))
return get_column(tulipa_csv, position)
end

"""
unit, content = remove_column(tulipa_csv, colname, position)
unit, content = remove_column(tulipa_csv, colname)
Expand Down
346 changes: 346 additions & 0 deletions utils/scripts/assets-data-revamp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,346 @@
using DataFrames, DuckDB, TulipaIO, TulipaEnergyModel, TulipaEnergyModel

include("../csv-modifications.jl")

# Local cleanup to ensure clean files
apply_to_files_named(rm, "asset.csv")
apply_to_files_named(rm, "asset_milestone.csv")
apply_to_files_named(rm, "asset_commission.csv")
apply_to_files_named(rm, "asset_both.csv")
apply_to_files_named(rm, "flow.csv")
apply_to_files_named(rm, "flow_milestone.csv")
apply_to_files_named(rm, "flow_commission.csv")
apply_to_files_named(rm, "flow_both.csv")
run(`git restore test/inputs/ benchmark/EU/`)

# FIXME: Two files have fixed_cost_storage_energy and investment_cost_storage_energy
# FIXME: Check definitions of WHERE. Some files will be missing rows if year=commission_year is the only condition
# FIXME: Definition of decommissionable?

#=
TABLE asset <- graph-assets-data
name -> asset
type
group
capacity
min_operating_point -> ANY FROM assets_data
investment_method
technical_lifetime
economic_lifetime
discount_rate
consumer_balance_sense -> ANY FROM assets_data
capacity_storage_energy
is_seasonal -> ANY FROM assets_data
use_binary_storage_method -> ANY FROM assets_data
unit_commitment -> ANY FROM assets_data
unit_commitment_method -> ANY FROM assets_data
unit_commitment_integer -> ANY FROM assets_data
ramping -> ANY FROM assets_data
=#
apply_to_files_named("asset.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv =
DuckDB.query(
con,
"SELECT
gad.name as asset,
ANY_VALUE(type) AS type,
ANY_VALUE(gad.group) AS group,
ANY_VALUE(gad.capacity) AS capacity,
ANY_VALUE(ad.min_operating_point) AS min_operating_point,
ANY_VALUE(investment_method) AS investment_method,
ANY_VALUE(technical_lifetime) AS technical_lifetime,
ANY_VALUE(economic_lifetime) AS economic_lifetime,
ANY_VALUE(discount_rate) AS discount_rate,
ANY_VALUE(consumer_balance_sense) AS consumer_balance_sense,
ANY_VALUE(capacity_storage_energy) AS capacity_storage_energy,
ANY_VALUE(is_seasonal) AS is_seasonal,
ANY_VALUE(use_binary_storage_method) AS use_binary_storage_method,
ANY_VALUE(unit_commitment) AS unit_commitment,
ANY_VALUE(unit_commitment_method) AS unit_commitment_method,
ANY_VALUE(unit_commitment_integer) AS unit_commitment_integer,
ANY_VALUE(ramping) AS ramping,
FROM graph_assets_data AS gad
LEFT JOIN assets_data AS ad
ON gad.name = ad.name
GROUP BY asset
ORDER BY asset
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

#=
TABLE flow = graph-flows-data
=#
apply_to_files_named("flow.csv"; include_missing = true) do path
cp(joinpath(dirname(path), "graph-flows-data.csv"), path)
end

#=
TABLE asset_milestone
asset -> ?
milestone_year -> ?
peak_demand -> assets_data WHERE year=commission_year
storage_inflows -> assets_data WHERE year=commission_year
initial_storage_level -> assets_data WHERE year=commission_year
max_energy_timeframe_partition -> assets_data WHERE year=commission_year
min_energy_timeframe_partition -> assets_data WHERE year=commission_year
units_on_cost -> assets_data WHERE year=commission_year
max_ramp_up -> assets_data WHERE year=commission_year
max_ramp_down -> assets_data WHERE year=commission_year
=#
apply_to_files_named("asset_milestone.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv = DuckDB.query(
con,
"SELECT
name as asset,
year as milestone_year,
peak_demand,
storage_inflows,
initial_storage_level,
max_energy_timeframe_partition,
min_energy_timeframe_partition,
units_on_cost,
max_ramp_up,
max_ramp_down,
FROM assets_data AS ad
WHERE year=commission_year
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

#=
TABLE flow_milestone
from_asset
to_asset
milestone_year
variable_cost
efficiency
=#
apply_to_files_named("flow_milestone.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv = DuckDB.query(
con,
"SELECT
from_asset,
to_asset,
year AS milestone_year,
variable_cost,
efficiency,
FROM flows_data AS fd
WHERE year=commission_year
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

#=
TABLE asset_commission <- vintage-assets-data
keep:
name -> asset
commission_year
fixed_cost
investment_cost
fixed_cost_storage_energy
investment_cost_storage_energy
storage_method_energy -> assets_data WHERE year=commission_year
energy_to_power_ratio -> assets_data WHERE year=commission_year
=#
apply_to_files_named("asset_commission.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv = DuckDB.query(
con,
"SELECT
vad.name as asset,
vad.commission_year,
fixed_cost,
investment_cost,
vad.fixed_cost_storage_energy,
vad.investment_cost_storage_energy,
storage_method_energy,
energy_to_power_ratio,
FROM vintage_assets_data AS vad
LEFT JOIN assets_data AS ad
ON vad.name = ad.name
AND vad.commission_year = ad.commission_year
AND vad.commission_year = ad.year
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

#=
TABLE flow_commission
from_asset
to_asset
commission_year
fixed_cost
investment_cost
=#
apply_to_files_named("flow_commission.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv = DuckDB.query(
con,
"SELECT
vfd.from_asset,
vfd.to_asset,
vfd.commission_year,
fixed_cost,
investment_cost,
FROM vintage_flows_data AS vfd
LEFT JOIN flows_data AS fd
ON vfd.from_asset = fd.from_asset
AND vfd.to_asset = fd.to_asset
AND fd.year = fd.commission_year
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

#=
TABLE asset_both <- assets-data
keep:
name -> asset
year -> milestone_year
commission_year
active
investable
decommissionable -> ?
investment_integer
investment_limit
initial_units
investment_integer_storage_energy
investment_limit_storage_energy
initial_storage_units
=#
apply_to_files_named("asset_both.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv = DuckDB.query(
con,
"SELECT
name as asset,
year as milestone_year,
commission_year,
active,
investable,
investable AS decommissionable,
investment_integer,
investment_limit,
initial_units,
investment_integer_storage_energy,
investment_limit_storage_energy,
initial_storage_level,
FROM assets_data AS ad
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

#=
TABLE flow_both
from_asset
to_asset
milestone_year
commission_year
active
investable
decommissionable
investment_integer
investment_limit
initial_export_units
initial_import_units
=#
apply_to_files_named("flow_both.csv"; include_missing = true) do path
touch(path)
change_file(path) do tcsv
dirpath = dirname(path)
con = DBInterface.connect(DuckDB.DB)
schemas = TulipaEnergyModel.schema_per_table_name
read_csv_folder(con, dirpath; schemas)

tcsv.csv = DuckDB.query(
con,
"SELECT
from_asset,
to_asset,
year AS milestone_year,
commission_year,
active,
investable,
investable AS decommissionable,
investment_integer,
investment_limit,
initial_export_units,
initial_import_units,
FROM flows_data AS fd
",
) |> DataFrame

tcsv.units = ["" for _ in 1:size(tcsv.csv, 2)]
end
end

# Remove old files
for a_or_f in ("assets", "flows"),
filename in ("$a_or_f-data", "graph-$a_or_f-data", "vintage-$a_or_f-data")

apply_to_files_named(rm, filename)
end

0 comments on commit 5e581f4

Please sign in to comment.