From 9d4ce430cc6abc73941ab9acebb35d9b0db919c4 Mon Sep 17 00:00:00 2001
From: Fabian <fab.hof@gmx.de>
Date: Thu, 17 Aug 2023 10:17:12 +0200
Subject: [PATCH] electricity demand: remove powerstastics flag, merge sources
 in retrieve_electricity_demand

---
 doc/configtables/load.csv              |   1 -
 doc/retrieve.rst                       |   2 +-
 rules/build_electricity.smk            |   2 +-
 rules/retrieve.smk                     |   2 +-
 scripts/build_electricity_demand.py    | 172 ++++++++++---------------
 scripts/retrieve_electricity_demand.py |  16 ++-
 6 files changed, 85 insertions(+), 110 deletions(-)

diff --git a/doc/configtables/load.csv b/doc/configtables/load.csv
index 6e98f881..ac666947 100644
--- a/doc/configtables/load.csv
+++ b/doc/configtables/load.csv
@@ -1,5 +1,4 @@
 ,Unit,Values,Description
-power_statistics,bool,"{true, false}",Whether to load the electricity consumption data of the ENTSOE power statistics (only for files from 2019 and before) or from the ENTSOE transparency data (only has load data from 2015 onwards).
 interpolate_limit,hours,integer,"Maximum gap size (consecutive nans) which interpolated linearly."
 time_shift_for_large_gaps,string,string,"Periods which are used for copying time-slices in order to fill large gaps of nans. Have to be valid ``pandas`` period strings."
 manual_adjustments,bool,"{true, false}","Whether to adjust the load data manually according to the function in :func:`manual_adjustment`."
diff --git a/doc/retrieve.rst b/doc/retrieve.rst
index 4786581e..66c996f5 100644
--- a/doc/retrieve.rst
+++ b/doc/retrieve.rst
@@ -91,7 +91,7 @@ None.
 
 **Outputs**
 
-- ``data/load_raw.csv``
+- ``data/electricity_demand.csv``
 
 
 Rule ``retrieve_cost_data``
diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk
index b359868f..2e7a0c30 100644
--- a/rules/build_electricity.smk
+++ b/rules/build_electricity.smk
@@ -24,7 +24,7 @@ rule build_electricity_demand:
         countries=config_provider("countries"),
         load=config_provider("load"),
     input:
-        ancient("data/load_raw.csv"),
+        ancient("data/electricity_demand.csv"),
     output:
         RESOURCES + "load.csv",
     log:
diff --git a/rules/retrieve.smk b/rules/retrieve.smk
index 5af0ffb5..34e2eb7c 100644
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@@ -159,7 +159,7 @@ if config["enable"]["retrieve"]:
         params:
             versions=["2019-06-05", "2020-10-06"],
         output:
-            "data/load_raw.csv",
+            "data/electricity_demand.csv",
         log:
             "logs/retrieve_electricity_demand.log",
         resources:
diff --git a/scripts/build_electricity_demand.py b/scripts/build_electricity_demand.py
index 60d40e1e..3fd9d605 100755
--- a/scripts/build_electricity_demand.py
+++ b/scripts/build_electricity_demand.py
@@ -31,7 +31,7 @@ Relevant Settings
 Inputs
 ------
 
-- ``data/load_raw.csv``:
+- ``data/electricity_demand.csv``:
 
 Outputs
 -------
@@ -49,7 +49,7 @@ from _helpers import configure_logging, set_scenario_config
 from pandas import Timedelta as Delta
 
 
-def load_timeseries(fn, years, countries, powerstatistics=True):
+def load_timeseries(fn, years, countries):
     """
     Read load data from OPSD time-series package version 2020-10-06.
 
@@ -62,10 +62,6 @@ def load_timeseries(fn, years, countries, powerstatistics=True):
         File name or url location (file format .csv)
     countries : listlike
         Countries for which to read load data.
-    powerstatistics: bool
-        Whether the electricity consumption data of the ENTSOE power
-        statistics (if true) or of the ENTSOE transparency map (if false)
-        should be parsed.
 
     Returns
     -------
@@ -74,17 +70,9 @@ def load_timeseries(fn, years, countries, powerstatistics=True):
     """
     logger.info(f"Retrieving load data from '{fn}'.")
 
-    pattern = "power_statistics" if powerstatistics else "transparency"
-    pattern = f"_load_actual_entsoe_{pattern}"
-
-    def rename(s):
-        return s[: -len(pattern)]
-
     return (
         pd.read_csv(fn, index_col=0, parse_dates=[0])
         .tz_localize(None)
-        .filter(like=pattern)
-        .rename(columns=rename)
         .dropna(how="all", axis=0)
         .rename(columns={"GB_UKM": "GB"})
         .filter(items=countries)
@@ -149,17 +137,18 @@ def copy_timeslice(load, cntry, start, stop, delta, fn_load=None):
             ].values
         elif fn_load is not None:
             duration = pd.date_range(freq="h", start=start - delta, end=stop - delta)
-            load_raw = load_timeseries(fn_load, duration, [cntry], powerstatistics)
+            load_raw = load_timeseries(fn_load, duration, [cntry])
             load.loc[start:stop, cntry] = load_raw.loc[
                 start - delta : stop - delta, cntry
             ].values
 
 
-def manual_adjustment(load, fn_load, powerstatistics):
+def manual_adjustment(load, fn_load):
     """
     Adjust gaps manual for load data from OPSD time-series package.
 
-     1. For the ENTSOE power statistics load data (if powerstatistics is True)
+     1. For years later than 2015 for which the load data is mainly taken from the
+        ENTSOE power statistics
 
      Kosovo (KV) and Albania (AL) do not exist in the data set. Kosovo gets the
      same load curve as Serbia and Albania the same as Macdedonia, both scaled
@@ -167,7 +156,8 @@ def manual_adjustment(load, fn_load, powerstatistics):
      IEA Data browser [0] for the year 2013.
 
 
-     2. For the ENTSOE transparency load data (if powerstatistics is False)
+     2. For years earlier than 2015 for which the load data is mainly taken from the
+        ENTSOE transparency platforms
 
      Albania (AL) and Macedonia (MK) do not exist in the data set. Both get the
      same load curve as Montenegro,  scaled by the corresponding ratio of total energy
@@ -183,9 +173,6 @@ def manual_adjustment(load, fn_load, powerstatistics):
      ----------
      load : pd.DataFrame
          Load time-series with UTC timestamps x ISO-2 countries
-     powerstatistics: bool
-         Whether argument load comprises the electricity consumption data of
-         the ENTSOE power statistics or of the ENTSOE transparency map
     load_fn: str
          File name or url location (file format .csv)
 
@@ -195,88 +182,66 @@ def manual_adjustment(load, fn_load, powerstatistics):
          Manual adjusted and interpolated load time-series with UTC
          timestamps x ISO-2 countries
     """
-    if powerstatistics:
-        if "MK" in load.columns:
-            if "AL" not in load.columns or load.AL.isnull().values.all():
-                load["AL"] = load["MK"] * (4.1 / 7.4)
-        if "RS" in load.columns:
-            if "KV" not in load.columns or load.KV.isnull().values.all():
-                load["KV"] = load["RS"] * (4.8 / 27.0)
+    if "MK" in load:
+        if "AL" not in load or load.AL.isnull().values.all():
+            load["AL"] = load["MK"] * (4.1 / 7.4)
+    if "RS" in load:
+        if "KV" not in load or load.KV.isnull().values.all():
+            load["KV"] = load["RS"] * (4.8 / 27.0)
+    if "ME" in load:
+        if "AL" not in load and "AL" in countries:
+            load["AL"] = load.ME * (5.7 / 2.9)
+        if "MK" not in load and "MK" in countries:
+            load["MK"] = load.ME * (6.7 / 2.9)
+        if "BA" not in load and "BA" in countries:
+            load["BA"] = load.HR * (11.0 / 16.2)
 
-        copy_timeslice(
-            load, "GR", "2015-08-11 21:00", "2015-08-15 20:00", Delta(weeks=1)
-        )
-        copy_timeslice(
-            load, "AT", "2018-12-31 22:00", "2019-01-01 22:00", Delta(days=2)
-        )
-        copy_timeslice(
-            load, "CH", "2010-01-19 07:00", "2010-01-19 22:00", Delta(days=1)
-        )
-        copy_timeslice(
-            load, "CH", "2010-03-28 00:00", "2010-03-28 21:00", Delta(days=1)
-        )
-        # is a WE, so take WE before
-        copy_timeslice(
-            load, "CH", "2010-10-08 13:00", "2010-10-10 21:00", Delta(weeks=1)
-        )
-        copy_timeslice(
-            load, "CH", "2010-11-04 04:00", "2010-11-04 22:00", Delta(days=1)
-        )
-        copy_timeslice(
-            load, "NO", "2010-12-09 11:00", "2010-12-09 18:00", Delta(days=1)
-        )
-        # whole january missing
-        copy_timeslice(
-            load,
-            "GB",
-            "2010-01-01 00:00",
-            "2010-01-31 23:00",
-            Delta(days=-365),
-            fn_load,
-        )
-        # 1.1. at midnight gets special treatment
-        copy_timeslice(
-            load,
-            "IE",
-            "2016-01-01 00:00",
-            "2016-01-01 01:00",
-            Delta(days=-366),
-            fn_load,
-        )
-        copy_timeslice(
-            load,
-            "PT",
-            "2016-01-01 00:00",
-            "2016-01-01 01:00",
-            Delta(days=-366),
-            fn_load,
-        )
-        copy_timeslice(
-            load,
-            "GB",
-            "2016-01-01 00:00",
-            "2016-01-01 01:00",
-            Delta(days=-366),
-            fn_load,
-        )
+    copy_timeslice(load, "GR", "2015-08-11 21:00", "2015-08-15 20:00", Delta(weeks=1))
+    copy_timeslice(load, "AT", "2018-12-31 22:00", "2019-01-01 22:00", Delta(days=2))
+    copy_timeslice(load, "CH", "2010-01-19 07:00", "2010-01-19 22:00", Delta(days=1))
+    copy_timeslice(load, "CH", "2010-03-28 00:00", "2010-03-28 21:00", Delta(days=1))
+    # is a WE, so take WE before
+    copy_timeslice(load, "CH", "2010-10-08 13:00", "2010-10-10 21:00", Delta(weeks=1))
+    copy_timeslice(load, "CH", "2010-11-04 04:00", "2010-11-04 22:00", Delta(days=1))
+    copy_timeslice(load, "NO", "2010-12-09 11:00", "2010-12-09 18:00", Delta(days=1))
+    # whole january missing
+    copy_timeslice(
+        load,
+        "GB",
+        "2010-01-01 00:00",
+        "2010-01-31 23:00",
+        Delta(days=-365),
+        fn_load,
+    )
+    # 1.1. at midnight gets special treatment
+    copy_timeslice(
+        load,
+        "IE",
+        "2016-01-01 00:00",
+        "2016-01-01 01:00",
+        Delta(days=-366),
+        fn_load,
+    )
+    copy_timeslice(
+        load,
+        "PT",
+        "2016-01-01 00:00",
+        "2016-01-01 01:00",
+        Delta(days=-366),
+        fn_load,
+    )
+    copy_timeslice(
+        load,
+        "GB",
+        "2016-01-01 00:00",
+        "2016-01-01 01:00",
+        Delta(days=-366),
+        fn_load,
+    )
 
-    else:
-        if "ME" in load:
-            if "AL" not in load and "AL" in countries:
-                load["AL"] = load.ME * (5.7 / 2.9)
-            if "MK" not in load and "MK" in countries:
-                load["MK"] = load.ME * (6.7 / 2.9)
-            if "BA" not in load and "BA" in countries:
-                load["BA"] = load.HR * (11.0 / 16.2)
-        copy_timeslice(
-            load, "BG", "2018-10-27 21:00", "2018-10-28 22:00", Delta(weeks=1)
-        )
-        copy_timeslice(
-            load, "LU", "2019-01-02 11:00", "2019-01-05 05:00", Delta(weeks=-1)
-        )
-        copy_timeslice(
-            load, "LU", "2019-02-05 20:00", "2019-02-06 19:00", Delta(weeks=-1)
-        )
+    copy_timeslice(load, "BG", "2018-10-27 21:00", "2018-10-28 22:00", Delta(weeks=1))
+    copy_timeslice(load, "LU", "2019-01-02 11:00", "2019-01-05 05:00", Delta(weeks=-1))
+    copy_timeslice(load, "LU", "2019-02-05 20:00", "2019-02-06 19:00", Delta(weeks=-1))
 
     return load
 
@@ -290,17 +255,16 @@ if __name__ == "__main__":
     configure_logging(snakemake)
     set_scenario_config(snakemake)
 
-    powerstatistics = snakemake.params.load["power_statistics"]
     interpolate_limit = snakemake.params.load["interpolate_limit"]
     countries = snakemake.params.countries
     snapshots = pd.date_range(freq="h", **snakemake.params.snapshots)
     years = slice(snapshots[0], snapshots[-1])
     time_shift = snakemake.params.load["time_shift_for_large_gaps"]
 
-    load = load_timeseries(snakemake.input[0], years, countries, powerstatistics)
+    load = load_timeseries(snakemake.input[0], years, countries)
 
     if snakemake.params.load["manual_adjustments"]:
-        load = manual_adjustment(load, snakemake.input[0], powerstatistics)
+        load = manual_adjustment(load, snakemake.input[0])
 
     if load.empty:
         logger.warning("Build electricity demand time series is empty.")
diff --git a/scripts/retrieve_electricity_demand.py b/scripts/retrieve_electricity_demand.py
index 58511857..01dc4aa8 100644
--- a/scripts/retrieve_electricity_demand.py
+++ b/scripts/retrieve_electricity_demand.py
@@ -20,7 +20,7 @@ if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        snakemake = mock_snakemake("retrieve_eletricity_demand")
+        snakemake = mock_snakemake("retrieve_electricity_demand")
         rootpath = ".."
     else:
         rootpath = "."
@@ -33,5 +33,17 @@ if __name__ == "__main__":
         pd.read_csv(url.format(version=version), index_col=0)
         for version in snakemake.params.versions
     ]
-    res = pd.concat([df1, df2[df2.index > df1.index[-1]]], join="inner")
+    combined = pd.concat([df1, df2[df2.index > df1.index[-1]]])
+
+    pattern = "_load_actual_entsoe_transparency"
+    transparency = combined.filter(like=pattern).rename(
+        columns=lambda x: x.replace(pattern, "")
+    )
+    pattern = "_load_actual_entsoe_power_statistics"
+    powerstatistics = combined.filter(like=pattern).rename(
+        columns=lambda x: x.replace(pattern, "")
+    )
+
+    res = transparency.fillna(powerstatistics)
+
     res.to_csv(snakemake.output[0])