From 84edde8f63d7968926d83f36fc3e3194eaee0fc1 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 3 Dec 2020 16:02:21 +0100
Subject: [PATCH] add time series segmentation with tsam package (#186)

* add time segmentation with tsam

* cut off SEG

* Snakefile: adjust memory function

* untangle memory reservation calculation

* prepare: document segmentation option

* correct typo
---
 Snakefile                  |  7 ++++-
 doc/configtables/opts.csv  |  1 +
 doc/release_notes.rst      |  6 +++++
 environment.docs.yaml      |  1 +
 environment.yaml           |  1 +
 scripts/prepare_network.py | 54 +++++++++++++++++++++++++++++++++++---
 6 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index 6b92999e..a82fdae3 100644
--- a/Snakefile
+++ b/Snakefile
@@ -279,7 +279,7 @@ rule prepare_network:
     output: 'networks/{network}_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc'
     log: "logs/prepare_network/{network}_s{simpl}_{clusters}_ec_l{ll}_{opts}.log"
     threads: 1
-    resources: mem=1000
+    resources: mem=4000
     # benchmark: "benchmarks/prepare_network/{network}_s{simpl}_{clusters}_ec_l{ll}_{opts}"
     script: "scripts/prepare_network.py"
 
@@ -290,6 +290,11 @@ def memory(w):
         if m is not None:
             factor /= int(m.group(1))
             break
+    for o in w.opts.split('-'):
+        m = re.match(r'^(\d+)seg$', o, re.IGNORECASE)
+        if m is not None:
+            factor *= int(m.group(1)) / 8760
+            break
     if w.clusters.endswith('m'):
         return int(factor * (18000 + 180 * int(w.clusters[:-1])))
     else:
diff --git a/doc/configtables/opts.csv b/doc/configtables/opts.csv
index 55a9c471..43d299d4 100644
--- a/doc/configtables/opts.csv
+++ b/doc/configtables/opts.csv
@@ -1,5 +1,6 @@
 Trigger, Description, Definition, Status
 ``nH``; i.e. ``2H``-``6H``, Resample the time-resolution by averaging over every ``n`` snapshots, ``prepare_network``: `average_every_nhours() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L110>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L146>`_), In active use
+``nSEG``; e.g. ``4380SEG``, "Apply time series segmentation with `tsam <https://tsam.readthedocs.io/en/latest/index.html>`_ package to ``n`` adjacent snapshots of varying lengths based on capacity factors of varying renewables, hydro inflow and load.", ``prepare_network``: ``apply_time_segmentation(), In active use
 ``Co2L``, Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit``. If a float is appended an overall emission limit relative to the emission level given in ``electricity: co2base`` is added (e.g. ``Co2L0.05`` limits emissisions to 5% of what is given in ``electricity: co2base``), ``prepare_network``: `add_co2limit() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L19>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L154>`_, In active use
 ``Ep``, Add cost for a carbon-dioxide price configured in ``costs: emission_prices: co2`` to ``marginal_cost`` of generators (other emission types listed in ``network.carriers`` possible as well), ``prepare_network``: `add_emission_prices() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L24>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L158>`_, In active use
 ``CCL``, Add minimum and maximum levels of generator nominal capacity per carrier for individual countries. These can be specified in the file linked at ``electricity: agg_p_nom_limits`` in the configuration. File defaults to ``data/agg_p_nom_minmax.csv``., ``solve_network``, In active use
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index bec532e6..6e581705 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -56,6 +56,12 @@ Upcoming Release
 
 * Electricity consumption data is now directly retrieved from the `OPSD website <https://data.open-power-system-data.org/time_series/2019-06-05>`_ using the rule ``build_load_data``. The user can decide whether to take the ENTSOE power statistics data (defaul) or the ENTSOE transparency data.   
 
+* Added an option to the ``{opts}`` wildcard that applies a time series segmentation algorithm based on renewables, hydro inflow and load time series
+  to produce a given total number of adjacent snapshots of varying lengths.
+  This feature is an alternative to downsampling the temporal resolution by simply averaging and
+  uses the `tsam <https://tsam.readthedocs.io/en/latest/index.html>`_ package
+  (#186 <https://github.com/PyPSA/pypsa-eur/pull/186>`_).
+
 PyPSA-Eur 0.2.0 (8th June 2020)
 ==================================
 
diff --git a/environment.docs.yaml b/environment.docs.yaml
index 3c50f2f2..762e89af 100755
--- a/environment.docs.yaml
+++ b/environment.docs.yaml
@@ -49,6 +49,7 @@ dependencies:
   # The FRESNA/KIT stuff is not packaged for conda yet
   - pip:
     - vresutils==0.3.1
+    - tsam>=1.1.0
     - git+https://github.com/PyPSA/glaes.git#egg=glaes
     - git+https://github.com/PyPSA/geokit.git#egg=geokit
     - cdsapi
diff --git a/environment.yaml b/environment.yaml
index cce0050b..cb322632 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -57,6 +57,7 @@ dependencies:
 
   - pip:
     - vresutils==0.3.1
+    - tsam>=1.1.0
     - git+https://github.com/PyPSA/glaes.git#egg=glaes
     - git+https://github.com/PyPSA/geokit.git#egg=geokit
     - cdsapi
diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index fe88f457..c2092980 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -11,7 +11,8 @@ Prepare PyPSA network for solving according to :ref:`opts` and :ref:`ll`, such a
 - setting an **N-1 security margin** factor for transmission line capacities,
 - specifying an expansion limit on the **cost** of transmission expansion,
 - specifying an expansion limit on the **volume** of transmission expansion, and
-- reducing the **temporal** resolution by averaging over multiple hours.
+- reducing the **temporal** resolution by averaging over multiple hours
+  or segmenting time series into chunks of varying lengths using ``tsam``.
 
 Relevant Settings
 -----------------
@@ -133,7 +134,7 @@ def set_transmission_limit(n, ll_type, factor, Nyears=1):
 
 
 def average_every_nhours(n, offset):
-    logger.info('Resampling the network to {}'.format(offset))
+    logger.info(f"Resampling the network to {offset}")
     m = n.copy(with_time=False)
 
     snapshot_weightings = n.snapshot_weightings.resample(offset).sum()
@@ -148,6 +149,47 @@ def average_every_nhours(n, offset):
 
     return m
 
+def apply_time_segmentation(n, segments):
+    logger.info(f"Aggregating time series to {segments} segments.")
+    try:
+        import tsam.timeseriesaggregation as tsam
+    except:
+        raise ModuleNotFoundError("Optional dependency 'tsam' not found."
+                                  "Install via 'pip install tsam'")
+
+    p_max_pu_norm = n.generators_t.p_max_pu.max()
+    p_max_pu = n.generators_t.p_max_pu / p_max_pu_norm
+
+    load_norm = n.loads_t.p_set.max()
+    load = n.loads_t.p_set / load_norm
+    
+    inflow_norm = n.storage_units_t.inflow.max()
+    inflow = n.storage_units_t.inflow / inflow_norm
+
+    raw = pd.concat([p_max_pu, load, inflow], axis=1, sort=False)
+
+    solver_name = snakemake.config["solving"]["solver"]["name"]
+
+    agg = tsam.TimeSeriesAggregation(raw, hoursPerPeriod=len(raw),
+                                     noTypicalPeriods=1, noSegments=int(segments),
+                                     segmentation=True, solver=solver_name)
+
+    segmented = agg.createTypicalPeriods()
+
+    weightings = segmented.index.get_level_values("Segment Duration")
+    offsets = np.insert(np.cumsum(weightings[:-1]), 0, 0)
+    snapshots = [n.snapshots[0] + pd.Timedelta(f"{offset}h") for offset in offsets]
+
+    n.set_snapshots(pd.DatetimeIndex(snapshots, name='name'))
+    n.snapshot_weightings = pd.Series(weightings, index=snapshots, name="weightings", dtype="float64")
+    
+    segmented.index = snapshots
+    n.generators_t.p_max_pu = segmented[n.generators_t.p_max_pu.columns] * p_max_pu_norm
+    n.loads_t.p_set = segmented[n.loads_t.p_set.columns] * load_norm
+    n.storage_units_t.inflow = segmented[n.storage_units_t.inflow.columns] * inflow_norm
+
+    return n
+
 def enforce_autarky(n, only_crossborder=False):
     if only_crossborder:
         lines_rm = n.lines.loc[
@@ -189,8 +231,12 @@ if __name__ == "__main__":
         if m is not None:
             n = average_every_nhours(n, m.group(0))
             break
-    else:
-        logger.info("No resampling")
+
+    for o in opts:
+        m = re.match(r'^\d+seg$', o, re.IGNORECASE)
+        if m is not None:
+            n = apply_time_segmentation(n, m.group(0)[:-3])
+            break
 
     for o in opts:
         if "Co2L" in o: