add time series segmentation with tsam package (#186)

* add time segmentation with tsam * cut off SEG * Snakefile: adjust memory function * untangle memory reservation calculation * prepare: document segmentation option * correct typo
2020-12-03 16:02:21 +01:00 · 2020-12-03 16:02:21 +01:00 · 84edde8f63
commit 84edde8f63
parent cfb979a0d0
6 changed files with 65 additions and 5 deletions
--- a/7
+++ b/7
@ -279,7 +279,7 @@ rule prepare_network:
    output: 'networks/{network}_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc'
    log: "logs/prepare_network/{network}_s{simpl}_{clusters}_ec_l{ll}_{opts}.log"
    threads: 1
-    resources: mem=1000
+    resources: mem=4000
    # benchmark: "benchmarks/prepare_network/{network}_s{simpl}_{clusters}_ec_l{ll}_{opts}"
    script: "scripts/prepare_network.py"
@ -290,6 +290,11 @@ def memory(w):
        if m is not None:
            factor /= int(m.group(1))
            break
    for o in w.opts.split('-'):
        m = re.match(r'^(\d+)seg$', o, re.IGNORECASE)
        if m is not None:
            factor *= int(m.group(1)) / 8760
            break
    if w.clusters.endswith('m'):
        return int(factor * (18000 + 180 * int(w.clusters[:-1])))
    else:
--- a/doc/configtables/opts.csv
+++ b/doc/configtables/opts.csv
@ -1,5 +1,6 @@
 Trigger, Description, Definition, Status
 ``nH``; i.e. ``2H``-``6H``, Resample the time-resolution by averaging over every ``n`` snapshots, ``prepare_network``: `average_every_nhours() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L110>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L146>`_), In active use
 ``nSEG``; e.g. ``4380SEG``, "Apply time series segmentation with `tsam <https://tsam.readthedocs.io/en/latest/index.html>`_ package to ``n`` adjacent snapshots of varying lengths based on capacity factors of varying renewables, hydro inflow and load.", ``prepare_network``: ``apply_time_segmentation(), In active use
 ``Co2L``, Add an overall absolute carbon-dioxide emissions limit configured in ``electricity: co2limit``. If a float is appended an overall emission limit relative to the emission level given in ``electricity: co2base`` is added (e.g. ``Co2L0.05`` limits emissisions to 5% of what is given in ``electricity: co2base``), ``prepare_network``: `add_co2limit() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L19>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L154>`_, In active use
 ``Ep``, Add cost for a carbon-dioxide price configured in ``costs: emission_prices: co2`` to ``marginal_cost`` of generators (other emission types listed in ``network.carriers`` possible as well), ``prepare_network``: `add_emission_prices() <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L24>`_ and its `caller <https://github.com/PyPSA/pypsa-eur/blob/6b964540ed39d44079cdabddee8333f486d0cd63/scripts/prepare_network.py#L158>`_, In active use
 ``CCL``, Add minimum and maximum levels of generator nominal capacity per carrier for individual countries. These can be specified in the file linked at ``electricity: agg_p_nom_limits`` in the configuration. File defaults to ``data/agg_p_nom_minmax.csv``., ``solve_network``, In active use
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@ -56,6 +56,12 @@ Upcoming Release
 * Electricity consumption data is now directly retrieved from the `OPSD website <https://data.open-power-system-data.org/time_series/2019-06-05>`_ using the rule ``build_load_data``. The user can decide whether to take the ENTSOE power statistics data (defaul) or the ENTSOE transparency data.   
 * Added an option to the ``{opts}`` wildcard that applies a time series segmentation algorithm based on renewables, hydro inflow and load time series
  to produce a given total number of adjacent snapshots of varying lengths.
  This feature is an alternative to downsampling the temporal resolution by simply averaging and
  uses the `tsam <https://tsam.readthedocs.io/en/latest/index.html>`_ package
  (#186 <https://github.com/PyPSA/pypsa-eur/pull/186>`_).
 PyPSA-Eur 0.2.0 (8th June 2020)
 ==================================
--- a/environment.docs.yaml
+++ b/environment.docs.yaml
@ -49,6 +49,7 @@ dependencies:
  # The FRESNA/KIT stuff is not packaged for conda yet
  - pip:
    - vresutils==0.3.1
    - tsam>=1.1.0
    - git+https://github.com/PyPSA/glaes.git#egg=glaes
    - git+https://github.com/PyPSA/geokit.git#egg=geokit
    - cdsapi
--- a/environment.yaml
+++ b/environment.yaml
@ -57,6 +57,7 @@ dependencies:
  - pip:
    - vresutils==0.3.1
    - tsam>=1.1.0
    - git+https://github.com/PyPSA/glaes.git#egg=glaes
    - git+https://github.com/PyPSA/geokit.git#egg=geokit
    - cdsapi
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@ -11,7 +11,8 @@ Prepare PyPSA network for solving according to :ref:`opts` and :ref:`ll`, such a
 - setting an **N-1 security margin** factor for transmission line capacities,
 - specifying an expansion limit on the **cost** of transmission expansion,
 - specifying an expansion limit on the **volume** of transmission expansion, and
- reducing the **temporal** resolution by averaging over multiple hours.
+- reducing the **temporal** resolution by averaging over multiple hours
  or segmenting time series into chunks of varying lengths using ``tsam``.
 Relevant Settings
 -----------------
@ -133,7 +134,7 @@ def set_transmission_limit(n, ll_type, factor, Nyears=1):
 def average_every_nhours(n, offset):
-    logger.info('Resampling the network to {}'.format(offset))
+    logger.info(f"Resampling the network to {offset}")
    m = n.copy(with_time=False)
    snapshot_weightings = n.snapshot_weightings.resample(offset).sum()
@ -148,6 +149,47 @@ def average_every_nhours(n, offset):
    return m
 def apply_time_segmentation(n, segments):
    logger.info(f"Aggregating time series to {segments} segments.")
    try:
        import tsam.timeseriesaggregation as tsam
    except:
        raise ModuleNotFoundError("Optional dependency 'tsam' not found."
                                  "Install via 'pip install tsam'")
    p_max_pu_norm = n.generators_t.p_max_pu.max()
    p_max_pu = n.generators_t.p_max_pu / p_max_pu_norm
    load_norm = n.loads_t.p_set.max()
    load = n.loads_t.p_set / load_norm
    inflow_norm = n.storage_units_t.inflow.max()
    inflow = n.storage_units_t.inflow / inflow_norm
    raw = pd.concat([p_max_pu, load, inflow], axis=1, sort=False)
    solver_name = snakemake.config["solving"]["solver"]["name"]
    agg = tsam.TimeSeriesAggregation(raw, hoursPerPeriod=len(raw),
                                     noTypicalPeriods=1, noSegments=int(segments),
                                     segmentation=True, solver=solver_name)
    segmented = agg.createTypicalPeriods()
    weightings = segmented.index.get_level_values("Segment Duration")
    offsets = np.insert(np.cumsum(weightings[:-1]), 0, 0)
    snapshots = [n.snapshots[0] + pd.Timedelta(f"{offset}h") for offset in offsets]
    n.set_snapshots(pd.DatetimeIndex(snapshots, name='name'))
    n.snapshot_weightings = pd.Series(weightings, index=snapshots, name="weightings", dtype="float64")
    segmented.index = snapshots
    n.generators_t.p_max_pu = segmented[n.generators_t.p_max_pu.columns] * p_max_pu_norm
    n.loads_t.p_set = segmented[n.loads_t.p_set.columns] * load_norm
    n.storage_units_t.inflow = segmented[n.storage_units_t.inflow.columns] * inflow_norm
    return n
 def enforce_autarky(n, only_crossborder=False):
    if only_crossborder:
        lines_rm = n.lines.loc[
@ -189,8 +231,12 @@ if __name__ == "__main__":
        if m is not None:
            n = average_every_nhours(n, m.group(0))
            break
-    else:
+
-        logger.info("No resampling")
+    for o in opts:
        m = re.match(r'^\d+seg$', o, re.IGNORECASE)
        if m is not None:
            n = apply_time_segmentation(n, m.group(0)[:-3])
            break
    for o in opts:
        if "Co2L" in o: