electricity demand: remove powerstastics flag, merge sources in retrieve_electricity_demand

This commit is contained in:
Fabian 2023-08-17 10:17:12 +02:00
parent 672d7b9538
commit 9d4ce430cc
6 changed files with 85 additions and 110 deletions

View File

@ -1,5 +1,4 @@
,Unit,Values,Description ,Unit,Values,Description
power_statistics,bool,"{true, false}",Whether to load the electricity consumption data of the ENTSOE power statistics (only for files from 2019 and before) or from the ENTSOE transparency data (only has load data from 2015 onwards).
interpolate_limit,hours,integer,"Maximum gap size (consecutive nans) which interpolated linearly." interpolate_limit,hours,integer,"Maximum gap size (consecutive nans) which interpolated linearly."
time_shift_for_large_gaps,string,string,"Periods which are used for copying time-slices in order to fill large gaps of nans. Have to be valid ``pandas`` period strings." time_shift_for_large_gaps,string,string,"Periods which are used for copying time-slices in order to fill large gaps of nans. Have to be valid ``pandas`` period strings."
manual_adjustments,bool,"{true, false}","Whether to adjust the load data manually according to the function in :func:`manual_adjustment`." manual_adjustments,bool,"{true, false}","Whether to adjust the load data manually according to the function in :func:`manual_adjustment`."

1 Unit Values Description
power_statistics bool {true, false} Whether to load the electricity consumption data of the ENTSOE power statistics (only for files from 2019 and before) or from the ENTSOE transparency data (only has load data from 2015 onwards).
2 interpolate_limit hours integer Maximum gap size (consecutive nans) which interpolated linearly.
3 time_shift_for_large_gaps string string Periods which are used for copying time-slices in order to fill large gaps of nans. Have to be valid ``pandas`` period strings.
4 manual_adjustments bool {true, false} Whether to adjust the load data manually according to the function in :func:`manual_adjustment`.

View File

@ -91,7 +91,7 @@ None.
**Outputs** **Outputs**
- ``data/load_raw.csv`` - ``data/electricity_demand.csv``
Rule ``retrieve_cost_data`` Rule ``retrieve_cost_data``

View File

@ -24,7 +24,7 @@ rule build_electricity_demand:
countries=config_provider("countries"), countries=config_provider("countries"),
load=config_provider("load"), load=config_provider("load"),
input: input:
ancient("data/load_raw.csv"), ancient("data/electricity_demand.csv"),
output: output:
RESOURCES + "load.csv", RESOURCES + "load.csv",
log: log:

View File

@ -159,7 +159,7 @@ if config["enable"]["retrieve"]:
params: params:
versions=["2019-06-05", "2020-10-06"], versions=["2019-06-05", "2020-10-06"],
output: output:
"data/load_raw.csv", "data/electricity_demand.csv",
log: log:
"logs/retrieve_electricity_demand.log", "logs/retrieve_electricity_demand.log",
resources: resources:

View File

@ -31,7 +31,7 @@ Relevant Settings
Inputs Inputs
------ ------
- ``data/load_raw.csv``: - ``data/electricity_demand.csv``:
Outputs Outputs
------- -------
@ -49,7 +49,7 @@ from _helpers import configure_logging, set_scenario_config
from pandas import Timedelta as Delta from pandas import Timedelta as Delta
def load_timeseries(fn, years, countries, powerstatistics=True): def load_timeseries(fn, years, countries):
""" """
Read load data from OPSD time-series package version 2020-10-06. Read load data from OPSD time-series package version 2020-10-06.
@ -62,10 +62,6 @@ def load_timeseries(fn, years, countries, powerstatistics=True):
File name or url location (file format .csv) File name or url location (file format .csv)
countries : listlike countries : listlike
Countries for which to read load data. Countries for which to read load data.
powerstatistics: bool
Whether the electricity consumption data of the ENTSOE power
statistics (if true) or of the ENTSOE transparency map (if false)
should be parsed.
Returns Returns
------- -------
@ -74,17 +70,9 @@ def load_timeseries(fn, years, countries, powerstatistics=True):
""" """
logger.info(f"Retrieving load data from '{fn}'.") logger.info(f"Retrieving load data from '{fn}'.")
pattern = "power_statistics" if powerstatistics else "transparency"
pattern = f"_load_actual_entsoe_{pattern}"
def rename(s):
return s[: -len(pattern)]
return ( return (
pd.read_csv(fn, index_col=0, parse_dates=[0]) pd.read_csv(fn, index_col=0, parse_dates=[0])
.tz_localize(None) .tz_localize(None)
.filter(like=pattern)
.rename(columns=rename)
.dropna(how="all", axis=0) .dropna(how="all", axis=0)
.rename(columns={"GB_UKM": "GB"}) .rename(columns={"GB_UKM": "GB"})
.filter(items=countries) .filter(items=countries)
@ -149,17 +137,18 @@ def copy_timeslice(load, cntry, start, stop, delta, fn_load=None):
].values ].values
elif fn_load is not None: elif fn_load is not None:
duration = pd.date_range(freq="h", start=start - delta, end=stop - delta) duration = pd.date_range(freq="h", start=start - delta, end=stop - delta)
load_raw = load_timeseries(fn_load, duration, [cntry], powerstatistics) load_raw = load_timeseries(fn_load, duration, [cntry])
load.loc[start:stop, cntry] = load_raw.loc[ load.loc[start:stop, cntry] = load_raw.loc[
start - delta : stop - delta, cntry start - delta : stop - delta, cntry
].values ].values
def manual_adjustment(load, fn_load, powerstatistics): def manual_adjustment(load, fn_load):
""" """
Adjust gaps manual for load data from OPSD time-series package. Adjust gaps manual for load data from OPSD time-series package.
1. For the ENTSOE power statistics load data (if powerstatistics is True) 1. For years later than 2015 for which the load data is mainly taken from the
ENTSOE power statistics
Kosovo (KV) and Albania (AL) do not exist in the data set. Kosovo gets the Kosovo (KV) and Albania (AL) do not exist in the data set. Kosovo gets the
same load curve as Serbia and Albania the same as Macdedonia, both scaled same load curve as Serbia and Albania the same as Macdedonia, both scaled
@ -167,7 +156,8 @@ def manual_adjustment(load, fn_load, powerstatistics):
IEA Data browser [0] for the year 2013. IEA Data browser [0] for the year 2013.
2. For the ENTSOE transparency load data (if powerstatistics is False) 2. For years earlier than 2015 for which the load data is mainly taken from the
ENTSOE transparency platforms
Albania (AL) and Macedonia (MK) do not exist in the data set. Both get the Albania (AL) and Macedonia (MK) do not exist in the data set. Both get the
same load curve as Montenegro, scaled by the corresponding ratio of total energy same load curve as Montenegro, scaled by the corresponding ratio of total energy
@ -183,9 +173,6 @@ def manual_adjustment(load, fn_load, powerstatistics):
---------- ----------
load : pd.DataFrame load : pd.DataFrame
Load time-series with UTC timestamps x ISO-2 countries Load time-series with UTC timestamps x ISO-2 countries
powerstatistics: bool
Whether argument load comprises the electricity consumption data of
the ENTSOE power statistics or of the ENTSOE transparency map
load_fn: str load_fn: str
File name or url location (file format .csv) File name or url location (file format .csv)
@ -195,88 +182,66 @@ def manual_adjustment(load, fn_load, powerstatistics):
Manual adjusted and interpolated load time-series with UTC Manual adjusted and interpolated load time-series with UTC
timestamps x ISO-2 countries timestamps x ISO-2 countries
""" """
if powerstatistics: if "MK" in load:
if "MK" in load.columns: if "AL" not in load or load.AL.isnull().values.all():
if "AL" not in load.columns or load.AL.isnull().values.all(): load["AL"] = load["MK"] * (4.1 / 7.4)
load["AL"] = load["MK"] * (4.1 / 7.4) if "RS" in load:
if "RS" in load.columns: if "KV" not in load or load.KV.isnull().values.all():
if "KV" not in load.columns or load.KV.isnull().values.all(): load["KV"] = load["RS"] * (4.8 / 27.0)
load["KV"] = load["RS"] * (4.8 / 27.0) if "ME" in load:
if "AL" not in load and "AL" in countries:
load["AL"] = load.ME * (5.7 / 2.9)
if "MK" not in load and "MK" in countries:
load["MK"] = load.ME * (6.7 / 2.9)
if "BA" not in load and "BA" in countries:
load["BA"] = load.HR * (11.0 / 16.2)
copy_timeslice( copy_timeslice(load, "GR", "2015-08-11 21:00", "2015-08-15 20:00", Delta(weeks=1))
load, "GR", "2015-08-11 21:00", "2015-08-15 20:00", Delta(weeks=1) copy_timeslice(load, "AT", "2018-12-31 22:00", "2019-01-01 22:00", Delta(days=2))
) copy_timeslice(load, "CH", "2010-01-19 07:00", "2010-01-19 22:00", Delta(days=1))
copy_timeslice( copy_timeslice(load, "CH", "2010-03-28 00:00", "2010-03-28 21:00", Delta(days=1))
load, "AT", "2018-12-31 22:00", "2019-01-01 22:00", Delta(days=2) # is a WE, so take WE before
) copy_timeslice(load, "CH", "2010-10-08 13:00", "2010-10-10 21:00", Delta(weeks=1))
copy_timeslice( copy_timeslice(load, "CH", "2010-11-04 04:00", "2010-11-04 22:00", Delta(days=1))
load, "CH", "2010-01-19 07:00", "2010-01-19 22:00", Delta(days=1) copy_timeslice(load, "NO", "2010-12-09 11:00", "2010-12-09 18:00", Delta(days=1))
) # whole january missing
copy_timeslice( copy_timeslice(
load, "CH", "2010-03-28 00:00", "2010-03-28 21:00", Delta(days=1) load,
) "GB",
# is a WE, so take WE before "2010-01-01 00:00",
copy_timeslice( "2010-01-31 23:00",
load, "CH", "2010-10-08 13:00", "2010-10-10 21:00", Delta(weeks=1) Delta(days=-365),
) fn_load,
copy_timeslice( )
load, "CH", "2010-11-04 04:00", "2010-11-04 22:00", Delta(days=1) # 1.1. at midnight gets special treatment
) copy_timeslice(
copy_timeslice( load,
load, "NO", "2010-12-09 11:00", "2010-12-09 18:00", Delta(days=1) "IE",
) "2016-01-01 00:00",
# whole january missing "2016-01-01 01:00",
copy_timeslice( Delta(days=-366),
load, fn_load,
"GB", )
"2010-01-01 00:00", copy_timeslice(
"2010-01-31 23:00", load,
Delta(days=-365), "PT",
fn_load, "2016-01-01 00:00",
) "2016-01-01 01:00",
# 1.1. at midnight gets special treatment Delta(days=-366),
copy_timeslice( fn_load,
load, )
"IE", copy_timeslice(
"2016-01-01 00:00", load,
"2016-01-01 01:00", "GB",
Delta(days=-366), "2016-01-01 00:00",
fn_load, "2016-01-01 01:00",
) Delta(days=-366),
copy_timeslice( fn_load,
load, )
"PT",
"2016-01-01 00:00",
"2016-01-01 01:00",
Delta(days=-366),
fn_load,
)
copy_timeslice(
load,
"GB",
"2016-01-01 00:00",
"2016-01-01 01:00",
Delta(days=-366),
fn_load,
)
else: copy_timeslice(load, "BG", "2018-10-27 21:00", "2018-10-28 22:00", Delta(weeks=1))
if "ME" in load: copy_timeslice(load, "LU", "2019-01-02 11:00", "2019-01-05 05:00", Delta(weeks=-1))
if "AL" not in load and "AL" in countries: copy_timeslice(load, "LU", "2019-02-05 20:00", "2019-02-06 19:00", Delta(weeks=-1))
load["AL"] = load.ME * (5.7 / 2.9)
if "MK" not in load and "MK" in countries:
load["MK"] = load.ME * (6.7 / 2.9)
if "BA" not in load and "BA" in countries:
load["BA"] = load.HR * (11.0 / 16.2)
copy_timeslice(
load, "BG", "2018-10-27 21:00", "2018-10-28 22:00", Delta(weeks=1)
)
copy_timeslice(
load, "LU", "2019-01-02 11:00", "2019-01-05 05:00", Delta(weeks=-1)
)
copy_timeslice(
load, "LU", "2019-02-05 20:00", "2019-02-06 19:00", Delta(weeks=-1)
)
return load return load
@ -290,17 +255,16 @@ if __name__ == "__main__":
configure_logging(snakemake) configure_logging(snakemake)
set_scenario_config(snakemake) set_scenario_config(snakemake)
powerstatistics = snakemake.params.load["power_statistics"]
interpolate_limit = snakemake.params.load["interpolate_limit"] interpolate_limit = snakemake.params.load["interpolate_limit"]
countries = snakemake.params.countries countries = snakemake.params.countries
snapshots = pd.date_range(freq="h", **snakemake.params.snapshots) snapshots = pd.date_range(freq="h", **snakemake.params.snapshots)
years = slice(snapshots[0], snapshots[-1]) years = slice(snapshots[0], snapshots[-1])
time_shift = snakemake.params.load["time_shift_for_large_gaps"] time_shift = snakemake.params.load["time_shift_for_large_gaps"]
load = load_timeseries(snakemake.input[0], years, countries, powerstatistics) load = load_timeseries(snakemake.input[0], years, countries)
if snakemake.params.load["manual_adjustments"]: if snakemake.params.load["manual_adjustments"]:
load = manual_adjustment(load, snakemake.input[0], powerstatistics) load = manual_adjustment(load, snakemake.input[0])
if load.empty: if load.empty:
logger.warning("Build electricity demand time series is empty.") logger.warning("Build electricity demand time series is empty.")

View File

@ -20,7 +20,7 @@ if __name__ == "__main__":
if "snakemake" not in globals(): if "snakemake" not in globals():
from _helpers import mock_snakemake from _helpers import mock_snakemake
snakemake = mock_snakemake("retrieve_eletricity_demand") snakemake = mock_snakemake("retrieve_electricity_demand")
rootpath = ".." rootpath = ".."
else: else:
rootpath = "." rootpath = "."
@ -33,5 +33,17 @@ if __name__ == "__main__":
pd.read_csv(url.format(version=version), index_col=0) pd.read_csv(url.format(version=version), index_col=0)
for version in snakemake.params.versions for version in snakemake.params.versions
] ]
res = pd.concat([df1, df2[df2.index > df1.index[-1]]], join="inner") combined = pd.concat([df1, df2[df2.index > df1.index[-1]]])
pattern = "_load_actual_entsoe_transparency"
transparency = combined.filter(like=pattern).rename(
columns=lambda x: x.replace(pattern, "")
)
pattern = "_load_actual_entsoe_power_statistics"
powerstatistics = combined.filter(like=pattern).rename(
columns=lambda x: x.replace(pattern, "")
)
res = transparency.fillna(powerstatistics)
res.to_csv(snakemake.output[0]) res.to_csv(snakemake.output[0])