Merge branch 'eu-energy-security' into eu-energy-security-conventional-attrs

2022-06-28 13:22:39 +02:00 · 2022-06-28 13:22:39 +02:00 · e175d1aa0e
commit e175d1aa0e
parent c9ee3e7b36 3294ad92eb
24 changed files with 482 additions and 162 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -13,12 +13,13 @@ on:
    branches:
    - master
  pull_request:
+    branches:
+    - master
  schedule:
    - cron: "0 5 * * TUE"

 env:
-  CONDA_CACHE_NUMBER: 1 # Change this value to manually reset the environment cache
-  DATA_CACHE_NUMBER: 1
+  CACHE_NUMBER: 1 # Change this value to manually reset the environment cache

 jobs:
  build:
@ -65,26 +66,16 @@ jobs:
            miniforge-version: latest
            activate-environment: pypsa-eur
            use-mamba: true
-
-      - name: Set cache dates
-        run: |
-          echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
-          echo "WEEK=$(date +'%Y%U')" >> $GITHUB_ENV
-
-      - name: Cache data and cutouts folders
-        uses: actions/cache@v3
-        with:
-          path: |
-            data
-            cutouts
-          key: data-cutouts-${{ env.WEEK }}-${{ env.DATA_CACHE_NUMBER }}
+            
+      - name: Set cache date
+        run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV

      - name: Create environment cache
        uses: actions/cache@v2
        id: cache
        with:
          path: ${{ matrix.prefix }}
-          key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CONDA_CACHE_NUMBER }}
+          key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}

      - name: Update environment due to outdated or unavailable cache
        run: mamba env update -n pypsa-eur -f envs/environment.yaml
--- a/25
+++ b/25
@ -66,12 +66,6 @@ if config['enable'].get('retrieve_databundle', True):
        script: 'scripts/retrieve_databundle.py'


-rule retrieve_natura_data:
-    input: HTTP.remote("sdi.eea.europa.eu/datashare/s/H6QGCybMdLLnywo/download", additional_request_string="?path=%2FNatura2000_end2020_gpkg&files=Natura2000_end2020.gpkg", static=True)
-    output: "data/Natura2000_end2020.gpkg"
-    run: move(input[0], output[0])
-
-
 rule retrieve_load_data:
    input: HTTP.remote("data.open-power-system-data.org/time_series/2019-06-05/time_series_60min_singleindex.csv", keep_local=True, static=True)
    output: "data/load_raw.csv"
@ -170,11 +164,28 @@ if config['enable'].get('retrieve_cutout', True):
        run: move(input[0], output[0])


+if config['enable'].get('build_natura_raster', False):
+    rule build_natura_raster:
+        input:
+            natura="data/bundle/natura/Natura2000_end2015.shp",
+            cutouts=expand("cutouts/{cutouts}.nc", **config['atlite'])
+        output: "resources/natura.tiff"
+        log: "logs/build_natura_raster.log"
+        script: "scripts/build_natura_raster.py"
+
+
+if config['enable'].get('retrieve_natura_raster', True):
+    rule retrieve_natura_raster:
+        input: HTTP.remote("zenodo.org/record/4706686/files/natura.tiff", keep_local=True, static=True)
+        output: "resources/natura.tiff"
+        run: move(input[0], output[0])
+
+
 rule build_renewable_profiles:
    input:
        base_network="networks/base.nc",
        corine="data/bundle/corine/g250_clc06_V18_5.tif",
-        natura=lambda w: ("data/Natura2000_end2020.gpkg"
+        natura=lambda w: ("resources/natura.tiff"
                          if config["renewable"][w.technology]["natura"]
                          else []),
        gebco=lambda w: ("data/bundle/GEBCO_2014_2D.nc"
--- a/config.default.yaml
+++ b/config.default.yaml
@ -18,8 +18,23 @@ scenario:
 countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']

 clustering:
-  simplify:
+  simplify_network:
    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+    algorithm: kmeans # choose from: [hac, kmeans]
+    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
+  cluster_network:
+    algorithm: kmeans
+    feature: solar+onwind-time
+  aggregation_strategies:
+    generators:
+      p_nom_max: sum # use "min" for more conservative assumptions
+      p_nom_min: sum
+      p_min_pu: mean
+      marginal_cost: mean
+      committable: any
+      ramp_limit_up: max
+      ramp_limit_down: max
+      efficiency: mean

 snapshots:
  start: "2013-01-01"
@ -31,6 +46,8 @@ enable:
  retrieve_databundle: true
  build_cutout: false
  retrieve_cutout: true
+  build_natura_raster: false
+  retrieve_natura_raster: true
  custom_busmap: false

 electricity:
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@ -19,8 +19,23 @@ scenario:
 countries: ['BE']

 clustering:
-  simplify:
+  simplify_network:
    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+    algorithm: kmeans # choose from: [hac, kmeans]
+    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
+  cluster_network:
+    algorithm: kmeans
+    feature: solar+onwind-time
+  aggregation_strategies:
+    generators:
+      p_nom_max: sum # use "min" for more conservative assumptions
+      p_nom_min: sum
+      p_min_pu: mean
+      marginal_cost: mean
+      committable: any
+      ramp_limit_up: max
+      ramp_limit_down: max
+      efficiency: mean

 snapshots:
  start: "2013-03-01"
@ -32,6 +47,8 @@ enable:
  retrieve_databundle: true
  build_cutout: false
  retrieve_cutout: true
+  build_natura_raster: false
+  retrieve_natura_raster: true
  custom_busmap: false

 electricity:
@ -76,7 +93,7 @@ renewable:
                   24, 25, 26, 27, 28, 29, 31, 32]
      distance: 1000
      distance_grid_codes: [1, 2, 3, 4, 5, 6]
-    natura: false
+    natura: true
    potential: simple # or conservative
    clip_p_max_pu: 1.e-2
  offwind-ac:
@ -87,7 +104,7 @@ renewable:
    capacity_per_sqkm: 3
    # correction_factor: 0.93
    corine: [44, 255]
-    natura: false
+    natura: true
    max_shore_distance: 30000
    potential: simple # or conservative
    clip_p_max_pu: 1.e-2
@ -100,7 +117,7 @@ renewable:
    capacity_per_sqkm: 3
    # correction_factor: 0.93
    corine: [44, 255]
-    natura: false
+    natura: true
    min_shore_distance: 30000
    potential: simple # or conservative
    clip_p_max_pu: 1.e-2
@ -122,7 +139,7 @@ renewable:
    # correction_factor: 0.854337
    corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
             14, 15, 16, 17, 18, 19, 20, 26, 31, 32]
-    natura: false
+    natura: true
    potential: simple # or conservative
    clip_p_max_pu: 1.e-2

--- a/doc/configtables/clustering.csv
+++ b/doc/configtables/clustering.csv
@ -1,3 +1,13 @@
 ,Unit,Values,Description
-simplify,,,
+simplify_network,,,
 -- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
+-- algorithm,str,"One of {‘kmeans’, ‘hac’}",
+-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.",
+cluster_network
+-- algorithm,str,"One of {‘kmeans’, ‘hac’}",
+-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.",
+aggregation_strategies,,,
+-- generators,,,
+-- -- {key},str,"{key} can be any of the component of the generator (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new generator."
+-- buses,,,
+-- -- {key},str,"{key} can be any of the component of the bus (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new bus."
--- a/doc/configtables/toplevel.csv
+++ b/doc/configtables/toplevel.csv
@ -12,4 +12,6 @@ enable,,,
 -- retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zenodo via the rule :mod:`retrieve_databundle` or whether to keep a custom databundle located in the corresponding folder."
 -- build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`."
 -- retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`."
+-- build_natura_raster,bool,"{true, false}","Switch to enable the creation of the raster ``natura.tiff`` via the rule :mod:`build_natura_raster`."
+-- retrieve_natura_raster,bool,"{true, false}","Switch to enable the retrieval of ``natura.tiff`` from zenodo with :mod:`retrieve_natura_raster`."
 -- custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``."
--- a/doc/preparation.rst
+++ b/doc/preparation.rst
@ -27,6 +27,7 @@ With these and the externally extracted ENTSO-E online map topology
 Then the process continues by calculating conventional power plant capacities, potentials, and per-unit availability time series for variable renewable energy carriers and hydro power plants with the following rules:

 - :mod:`build_powerplants` for today's thermal power plant capacities using `powerplantmatching <https://github.com/FRESNA/powerplantmatching>`_ allocating these to the closest substation for each powerplant,
+- :mod:`build_natura_raster` for rasterising NATURA2000 natural protection areas,
 - :mod:`build_renewable_profiles` for the hourly capacity factors and installation potentials constrained by land-use in each substation's Voronoi cell for PV, onshore and offshore wind, and
 - :mod:`build_hydro_profile` for the hourly per-unit hydro power availability time series.

@ -40,6 +41,7 @@ together into a detailed PyPSA network stored in ``networks/elec.nc``.
   preparation/build_shapes
   preparation/build_load_data
   preparation/build_cutout
+   preparation/build_natura_raster
   preparation/prepare_links_p_nom
   preparation/base_network
   preparation/build_bus_regions
--- a/doc/preparation/build_natura_raster.rst
+++ b/doc/preparation/build_natura_raster.rst
@ -0,0 +1,39 @@
+..
+  SPDX-FileCopyrightText: 2019-2020 The PyPSA-Eur Authors
+
+  SPDX-License-Identifier: CC-BY-4.0
+
+.. _natura:
+
+Rule ``build_natura_raster``
+===============================
+
+.. graphviz::
+    :align: center
+
+    digraph snakemake_dag {
+        graph [bgcolor=white,
+            margin=0,
+            size="8,5"
+        ];
+        node [fontname=sans,
+            fontsize=10,
+            penwidth=2,
+            shape=box,
+            style=rounded
+        ];
+        edge [color=grey,
+            penwidth=2
+        ];
+        9	 [color="0.22 0.6 0.85",
+            label=build_renewable_profiles];
+        12	 [color="0.31 0.6 0.85",
+            fillcolor=gray,
+            label=build_natura_raster,
+            style=filled];
+        12 -> 9;
+    }
+
+|
+
+.. automodule:: build_natura_raster
--- a/doc/preparation/build_renewable_profiles.rst
+++ b/doc/preparation/build_renewable_profiles.rst
@ -41,6 +41,9 @@ Rule ``build_renewable_profiles``
        8	 [color="0.00 0.6 0.85",
            label=build_shapes];
        8 -> 9;
+        12	 [color="0.31 0.6 0.85",
+            label=build_natura_raster];
+        12 -> 9;
        13	 [color="0.56 0.6 0.85",
            label=build_cutout];
        13 -> 9;
--- a/doc/preparation/retrieve.rst
+++ b/doc/preparation/retrieve.rst
@ -50,3 +50,30 @@ The :ref:`tutorial` uses a smaller cutout than required for the full model (30 M

 .. seealso::
    For details see :mod:`build_cutout` and read the `atlite documentation <https://atlite.readthedocs.io>`_.
+
+
+Rule ``retrieve_natura_raster``
+-------------------------------
+
+.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4706686.svg
+   :target: https://doi.org/10.5281/zenodo.4706686
+
+This rule, as a substitute for :mod:`build_natura_raster`, downloads an already rasterized version (`natura.tiff <https://zenodo.org/record/4706686/files/natura.tiff>`_) of `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas to reduce computation times. The file is placed into the ``resources`` sub-directory.
+
+**Relevant Settings**
+
+.. code:: yaml
+
+    enable:
+        build_natura_raster:
+
+.. seealso::
+    Documentation of the configuration file ``config.yaml`` at
+    :ref:`toplevel_cf`
+
+**Outputs**
+
+- ``resources/natura.tiff``: Rasterized version of `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas to reduce computation times.
+
+.. seealso::
+    For details see :mod:`build_natura_raster`.
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@ -88,6 +88,11 @@ Upcoming Release

 * Add option to alter marginal costs of a carrier through `{opts}` wildcard: `<carrier>+m<factor>`, e.g. `gas+m2.5`, will multiply the default marginal cost for gas by factor 2.5.

+* Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent.
+
+* Hierarchical clustering was introduced. Distance metric is calculated from renewable potentials on hourly (feature entry ends with `-time`) or annual (feature entry in config end with `-cap`) values.
+
+
 Synchronisation Release - Ukraine and Moldova (17th March 2022)
 ===============================================================

--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@ -35,8 +35,8 @@ To run the tutorial, use this as your configuration file ``config.yaml``.

    .../pypsa-eur % cp config.tutorial.yaml config.yaml

-This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle`
-and :mod:`retrieve_cutout` totalling at less than 250 MB.
+This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle`,
+:mod:`retrieve_natura_raster`, :mod:`retrieve_cutout` totalling at less than 250 MB.
 The full set of data dependencies would consume 5.3 GB.
 For more information on the data dependencies of PyPSA-Eur, continue reading :ref:`data`.

--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@ -10,7 +10,7 @@ dependencies:
  - python>=3.8
  - pip

-  - pypsa>=0.18.1
+  - pypsa>=0.19.1
  - atlite>=0.2.6
  - dask

@ -27,7 +27,7 @@ dependencies:
  - powerplantmatching>=0.5.3
  - numpy
  - pandas
-  - geopandas
+  - geopandas>=0.11.0
  - xarray
  - netcdf4
  - networkx
@ -46,7 +46,6 @@ dependencies:
  # GIS dependencies:
  - cartopy
  - descartes
-  - fiona # explicit for Windows
  - rasterio<=1.2.9  # 1.2.10 creates error https://github.com/PyPSA/atlite/issues/238

  # PyPSA-Eur-Sec Dependencies
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@ -4,7 +4,9 @@

 import pandas as pd
 from pathlib import Path
+from collections import OrderedDict

+REGION_COLS = ['geometry', 'name', 'x', 'y', 'country']

 def configure_logging(snakemake, skip_handlers=False):
    """
@ -210,6 +212,22 @@ def progress_retrieve(url, file):

    urllib.request.urlretrieve(url, file, reporthook=dlProgress)

+def get_aggregation_strategies(aggregation_strategies):
+    # default aggregation strategies that cannot be defined in .yaml format must be specified within
+    # the function, otherwise (when defaults are passed in the function's definition) they get lost
+    # when custom values are specified in the config.
+
+    import numpy as np
+    from pypsa.networkclustering import _make_consense
+
+    bus_strategies = dict(country=_make_consense("Bus", "country"))
+    bus_strategies.update(aggregation_strategies.get("buses", {}))
+
+    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
+    generator_strategies.update(aggregation_strategies.get("generators", {}))
+
+    return bus_strategies, generator_strategies
+

 def mock_snakemake(rulename, **wildcards):
    """
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@ -391,7 +391,9 @@ def _set_countries_and_substations(n, config, country_shapes, offshore_shapes):

    countries = config['countries']
    country_shapes = gpd.read_file(country_shapes).set_index('name')['geometry']
-    offshore_shapes = gpd.read_file(offshore_shapes).set_index('name')['geometry']
+    # reindexing necessary for supporting empty geo-dataframes
+    offshore_shapes = gpd.read_file(offshore_shapes)
+    offshore_shapes = offshore_shapes.reindex(columns=['name', 'geometry']).set_index('name')['geometry']
    substation_b = buses['symbol'].str.contains('substation|converter station', case=False)

    def prefer_voltage(x, which):
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@ -42,7 +42,7 @@ Description
 """

 import logging
-from _helpers import configure_logging
+from _helpers import configure_logging, REGION_COLS

 import pypsa
 import os
@ -55,13 +55,6 @@ from scipy.spatial import Voronoi
 logger = logging.getLogger(__name__)


-def save_to_geojson(s, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
-    schema = {**gpd.io.file.infer_schema(s), 'geometry': 'Unknown'}
-    s.to_file(fn, driver='GeoJSON', schema=schema)
-
-
 def voronoi_partition_pts(points, outline):
    """
    Compute the polygons of a voronoi partition of `points` within the
@ -120,7 +113,8 @@ if __name__ == "__main__":
    n = pypsa.Network(snakemake.input.base_network)

    country_shapes = gpd.read_file(snakemake.input.country_shapes).set_index('name')['geometry']
-    offshore_shapes = gpd.read_file(snakemake.input.offshore_shapes).set_index('name')['geometry']
+    offshore_shapes = gpd.read_file(snakemake.input.offshore_shapes)
+    offshore_shapes = offshore_shapes.reindex(columns=REGION_COLS).set_index('name')['geometry']

    onshore_regions = []
    offshore_regions = []
@ -151,6 +145,8 @@ if __name__ == "__main__":
        offshore_regions_c = offshore_regions_c.loc[offshore_regions_c.area > 1e-2]
        offshore_regions.append(offshore_regions_c)

-    save_to_geojson(pd.concat(onshore_regions, ignore_index=True), snakemake.output.regions_onshore)
-
-    save_to_geojson(pd.concat(offshore_regions, ignore_index=True), snakemake.output.regions_offshore)
+    pd.concat(onshore_regions, ignore_index=True).to_file(snakemake.output.regions_onshore)
+    if offshore_regions:
+        pd.concat(offshore_regions, ignore_index=True).to_file(snakemake.output.regions_offshore)
+    else:
+        offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore)
--- a/scripts/build_cutout.py
+++ b/scripts/build_cutout.py
@ -116,7 +116,7 @@ if __name__ == "__main__":
        # Determine the bounds from bus regions with a buffer of two grid cells
        onshore = gpd.read_file(snakemake.input.regions_onshore)
        offshore = gpd.read_file(snakemake.input.regions_offshore)
-        regions =  onshore.append(offshore)
+        regions =  pd.concat([onshore, offshore])
        d = max(cutout_params.get('dx', 0.25), cutout_params.get('dy', 0.25))*2
        cutout_params['bounds'] = regions.total_bounds + [-d, -d, d, d]
    elif {'x', 'y'}.issubset(cutout_params):
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@ -0,0 +1,89 @@
+# SPDX-FileCopyrightText: : 2017-2020 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+"""
+Rasters the vector data of the `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas onto all cutout regions.
+
+Relevant Settings
+-----------------
+
+.. code:: yaml
+
+    renewable:
+        {technology}:
+            cutout:
+
+.. seealso::
+    Documentation of the configuration file ``config.yaml`` at
+    :ref:`renewable_cf`
+
+Inputs
+------
+
+- ``data/bundle/natura/Natura2000_end2015.shp``: `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas.
+
+    .. image:: ../img/natura.png
+        :scale: 33 %
+
+Outputs
+-------
+
+- ``resources/natura.tiff``: Rasterized version of `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas to reduce computation times.
+
+    .. image:: ../img/natura.png
+        :scale: 33 %
+
+Description
+-----------
+
+"""
+
+import logging
+from _helpers import configure_logging
+
+import atlite
+import geopandas as gpd
+import rasterio as rio
+from rasterio.features import geometry_mask
+from rasterio.warp import transform_bounds
+
+logger = logging.getLogger(__name__)
+
+
+def determine_cutout_xXyY(cutout_name):
+    cutout = atlite.Cutout(cutout_name)
+    assert cutout.crs.to_epsg() == 4326
+    x, X, y, Y = cutout.extent
+    dx, dy = cutout.dx, cutout.dy
+    return [x - dx/2., X + dx/2., y - dy/2., Y + dy/2.]
+
+
+def get_transform_and_shape(bounds, res):
+    left, bottom = [(b // res)* res for b in bounds[:2]]
+    right, top = [(b // res + 1) * res for b in bounds[2:]]
+    shape = int((top - bottom) // res), int((right - left) / res)
+    transform = rio.Affine(res, 0, left, 0, -res, top)
+    return transform, shape
+
+
+if __name__ == "__main__":
+    if 'snakemake' not in globals():
+        from _helpers import mock_snakemake
+        snakemake = mock_snakemake('build_natura_raster')
+    configure_logging(snakemake)
+
+    cutouts = snakemake.input.cutouts
+    xs, Xs, ys, Ys = zip(*(determine_cutout_xXyY(cutout) for cutout in cutouts))
+    bounds = transform_bounds(4326, 3035, min(xs), min(ys), max(Xs), max(Ys))
+    transform, out_shape = get_transform_and_shape(bounds, res=100)
+
+    # adjusted boundaries
+    shapes = gpd.read_file(snakemake.input.natura).to_crs(3035)
+    raster = ~geometry_mask(shapes.geometry, out_shape[::-1], transform)
+    raster = raster.astype(rio.uint8)
+
+    with rio.open(snakemake.output[0], 'w', driver='GTiff', dtype=rio.uint8,
+                  count=1, transform=transform, crs=3035, compress='lzw',
+                  width=raster.shape[1], height=raster.shape[0]) as dst:
+        dst.write(raster, indexes=1)
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@ -221,15 +221,17 @@ if __name__ == '__main__':
    client = Client(cluster, asynchronous=True)
 
    cutout = atlite.Cutout(snakemake.input['cutout'])
-    regions = gpd.read_file(snakemake.input.regions).set_index('name').rename_axis('bus')
+    regions = gpd.read_file(snakemake.input.regions)
+    assert not regions.empty, (f"List of regions in {snakemake.input.regions} is empty, please "
+                               "disable the corresponding renewable technology")
+    # do not pull up, set_index does not work if geo dataframe is empty
+    regions = regions.set_index('name').rename_axis('bus')
    buses = regions.index

    excluder = atlite.ExclusionContainer(crs=3035, res=100)

    if config['natura']:
-        mask = regions.to_crs(3035).buffer(0) # buffer to avoid invalid geometry
-        natura = gpd.read_file(snakemake.input.natura, mask=mask)
-        excluder.add_geometry(natura.geometry)
+        excluder.add_raster(snakemake.input.natura, nodata=0, allow_no_overlap=True)

    corine = config.get("corine", {})
    if "grid_codes" in corine:
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@ -129,14 +129,15 @@ def eez(country_shapes, eez, country_list):
    df['name'] = df['ISO_3digit'].map(lambda c: _get_country('alpha_2', alpha_3=c))
    s = df.set_index('name').geometry.map(lambda s: _simplify_polys(s, filterremote=False))
    s = gpd.GeoSeries({k:v for k,v in s.iteritems() if v.distance(country_shapes[k]) < 1e-3})
+    s = s.to_frame("geometry")
    s.index.name = "name"
    return s


 def country_cover(country_shapes, eez_shapes=None):
-    shapes = list(country_shapes)
+    shapes = country_shapes
    if eez_shapes is not None:
-        shapes += list(eez_shapes)
+        shapes = pd.concat([shapes, eez_shapes])

    europe_shape = unary_union(shapes)
    if isinstance(europe_shape, MultiPolygon):
@ -203,16 +204,6 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
    return df


-def save_to_geojson(df, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
-    if not isinstance(df, gpd.GeoDataFrame):
-        df = gpd.GeoDataFrame(dict(geometry=df))
-    df = df.reset_index()
-    schema = {**gpd.io.file.infer_schema(df), 'geometry': 'Unknown'}
-    df.to_file(fn, driver='GeoJSON', schema=schema)
-
-
 if __name__ == "__main__":
    if 'snakemake' not in globals():
        from _helpers import mock_snakemake
@ -220,15 +211,14 @@ if __name__ == "__main__":
    configure_logging(snakemake)

    country_shapes = countries(snakemake.input.naturalearth, snakemake.config['countries'])
-    save_to_geojson(country_shapes, snakemake.output.country_shapes)
+    country_shapes.reset_index().to_file(snakemake.output.country_shapes)

    offshore_shapes = eez(country_shapes, snakemake.input.eez, snakemake.config['countries'])
-    save_to_geojson(offshore_shapes, snakemake.output.offshore_shapes)
+    offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)

-    europe_shape = country_cover(country_shapes, offshore_shapes)
-    save_to_geojson(gpd.GeoSeries(europe_shape), snakemake.output.europe_shape)
+    europe_shape = gpd.GeoDataFrame(geometry=[country_cover(country_shapes, offshore_shapes.geometry)])
+    europe_shape.reset_index().to_file(snakemake.output.europe_shape)

    nuts3_shapes = nuts3(country_shapes, snakemake.input.nuts3, snakemake.input.nuts3pop,
                         snakemake.input.nuts3gdp, snakemake.input.ch_cantons, snakemake.input.ch_popgdp)
-
-    save_to_geojson(nuts3_shapes, snakemake.output.nuts3_shapes)
+    nuts3_shapes.reset_index().to_file(snakemake.output.nuts3_shapes)
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@ -11,11 +11,11 @@ Relevant Settings

 .. code:: yaml

-    focus_weights:
+    clustering:
+      cluster_network:
+      aggregation_strategies:

-    renewable: (keys)
-        {technology}:
-            potential:
+    focus_weights:

    solving:
        solver:
@ -122,7 +122,7 @@ Exemplary unsolved network clustered to 37 nodes:
 """

 import logging
-from _helpers import configure_logging, update_p_nom_max
+from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies, REGION_COLS

 import pypsa
 import os
@ -138,7 +138,7 @@ import seaborn as sns
 from functools import reduce

 from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering,
-                                     _make_consense, get_clustering_from_busmap)
+                                     busmap_by_hac, _make_consense, get_clustering_from_busmap)

 import warnings
 warnings.filterwarnings(action='ignore', category=UserWarning)
@ -173,6 +173,42 @@ def weighting_for_country(n, x):
    return (w * (100. / w.max())).clip(lower=1.).astype(int)


+def get_feature_for_hac(n, buses_i=None, feature=None):
+
+    if buses_i is None:
+        buses_i = n.buses.index
+
+    if feature is None:
+        feature = "solar+onwind-time"
+
+    carriers = feature.split('-')[0].split('+')
+    if "offwind" in carriers:
+        carriers.remove("offwind")
+        carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique())
+
+    if feature.split('-')[1] == 'cap':
+        feature_data = pd.DataFrame(index=buses_i, columns=carriers)
+        for carrier in carriers:
+            gen_i = n.generators.query("carrier == @carrier").index
+            attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus)
+            feature_data[carrier] = attach
+
+    if feature.split('-')[1] == 'time':
+        feature_data = pd.DataFrame(columns=buses_i)
+        for carrier in carriers:
+            gen_i = n.generators.query("carrier == @carrier").index
+            attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus)
+            feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
+
+        feature_data = feature_data.T
+        # timestamp raises error in sklearn >= v1.2:
+        feature_data.columns = feature_data.columns.astype(str)
+
+    feature_data = feature_data.fillna(0)
+
+    return feature_data
+
+
 def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
    """Determine the number of clusters per country"""

@ -221,13 +257,50 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
    return pd.Series(m.n.get_values(), index=L.index).round().astype(int)


-def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds):
+def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds):
    if algorithm == "kmeans":
        algorithm_kwds.setdefault('n_init', 1000)
        algorithm_kwds.setdefault('max_iter', 30000)
        algorithm_kwds.setdefault('tol', 1e-6)
        algorithm_kwds.setdefault('random_state', 0)

+    def fix_country_assignment_for_hac(n):
+        from scipy.sparse import csgraph
+
+        # overwrite country of nodes that are disconnected from their country-topology
+        for country in n.buses.country.unique():
+            m = n[n.buses.country ==country].copy()
+
+            _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False)
+
+            component = pd.Series(labels, index=m.buses.index)
+            component_sizes = component.value_counts()
+
+            if len(component_sizes)>1:
+                disconnected_bus = component[component==component_sizes.index[-1]].index[0]
+
+                neighbor_bus = (
+                    n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus")
+                    .iloc[0][['bus0', 'bus1']]
+                )
+                new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0]
+
+                logger.info(
+                    f"overwriting country `{country}` of bus `{disconnected_bus}` "
+                    f"to new country `{new_country}`, because it is disconnected "
+                    "from its inital inter-country transmission grid."
+                )
+                n.buses.at[disconnected_bus, "country"] = new_country
+        return n
+
+    if algorithm == "hac":
+        feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
+        n = fix_country_assignment_for_hac(n)
+
+    if (algorithm != "hac") and (feature is not None):
+        logger.warning(f"Keyword argument feature is only valid for algorithm `hac`. "
+                       f"Given feature `{feature}` will be ignored.")
+
    n.determine_network_topology()

    n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name)
@ -251,47 +324,34 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
            return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
        elif algorithm == "louvain":
            return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
+        elif algorithm == "hac":
+            return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index])
        else:
-            raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.")
+            raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.")

    return (n.buses.groupby(['country', 'sub_network'], group_keys=False)
            .apply(busmap_for_country).squeeze().rename('busmap'))


 def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None,
-                              line_length_factor=1.25, potential_mode='simple', solver_name="cbc",
-                              algorithm="kmeans", extended_link_costs=0, focus_weights=None):
+                              line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc",
+                              algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None):

-    if potential_mode == 'simple':
-        p_nom_max_strategy = pd.Series.sum
-    elif potential_mode == 'conservative':
-        p_nom_max_strategy = pd.Series.min
-    else:
-        raise AttributeError(f"potential_mode should be one of 'simple' or 'conservative' but is '{potential_mode}'")
+    bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies)

    if not isinstance(custom_busmap, pd.Series):
-        busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
+        busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature)
    else:
        busmap = custom_busmap

    clustering = get_clustering_from_busmap(
        n, busmap,
-        bus_strategies=dict(country=_make_consense("Bus", "country")),
+        bus_strategies=bus_strategies,
        aggregate_generators_weighted=True,
        aggregate_generators_carriers=aggregate_carriers,
        aggregate_one_ports=["Load", "StorageUnit"],
        line_length_factor=line_length_factor,
-        generator_strategies={'p_nom_max': p_nom_max_strategy, 
-                              'p_nom_min': pd.Series.sum, 
-                              'p_min_pu': pd.Series.mean, 
-                              'marginal_cost': pd.Series.mean, 
-                              'committable': np.any, 
-                              'ramp_limit_up': pd.Series.max, 
-                              'ramp_limit_down': pd.Series.max,
-                              'build_year': lambda x: 0, 
-                              'lifetime': lambda x: np.inf,
-                              'efficiency': np.mean,
-                              },
+        generator_strategies=generator_strategies,
        scale_link_capital_costs=False)

    if not n.links.empty:
@ -306,24 +366,18 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
    return clustering


-def save_to_geojson(s, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
-    df = s.reset_index()
-    schema = {**gpd.io.file.infer_schema(df), 'geometry': 'Unknown'}
-    df.to_file(fn, driver='GeoJSON', schema=schema)
-
-
 def cluster_regions(busmaps, input=None, output=None):

    busmap = reduce(lambda x, y: x.map(y), busmaps[1:], busmaps[0])

    for which in ('regions_onshore', 'regions_offshore'):
-        regions = gpd.read_file(getattr(input, which)).set_index('name')
-        geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.unary_union)
-        regions_c = gpd.GeoDataFrame(dict(geometry=geom_c))
+        regions = gpd.read_file(getattr(input, which))
+        regions = regions.reindex(columns=REGION_COLS).set_index('name')
+        aggfunc = dict(x="mean", y="mean", country="first")
+        regions_c = regions.dissolve(busmap, aggfunc=aggfunc)
        regions_c.index.name = 'name'
-        save_to_geojson(regions_c, getattr(output, which))
+        regions_c = regions_c.reset_index()
+        regions_c.to_file(getattr(output, which))


 def plot_busmap_for_n_clusters(n, n_clusters, fn=None):
@ -378,21 +432,29 @@ if __name__ == "__main__":
                "The `potential` configuration option must agree for all renewable carriers, for now!"
            )
            return v
-        potential_mode = consense(pd.Series([snakemake.config['renewable'][tech]['potential']
-                                             for tech in renewable_carriers]))
+        aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
+        # translate str entries of aggregation_strategies to pd.Series functions:
+        aggregation_strategies = {
+            p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()}
+            for p in aggregation_strategies.keys()
+        }
+
        custom_busmap = snakemake.config["enable"].get("custom_busmap", False)
        if custom_busmap:
            custom_busmap = pd.read_csv(snakemake.input.custom_busmap, index_col=0, squeeze=True)
            custom_busmap.index = custom_busmap.index.astype(str)
            logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")

+        cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {})
        clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers,
-                                               line_length_factor, potential_mode,
+                                               line_length_factor, aggregation_strategies,
                                               snakemake.config['solving']['solver']['name'],
-                                               "kmeans", hvac_overhead_cost, focus_weights)
+                                               cluster_config.get("algorithm", "hac"),
+                                               cluster_config.get("feature", "solar+onwind-time"),
+                                               hvac_overhead_cost, focus_weights)
+
+    update_p_nom_max(clustering.network)

-    update_p_nom_max(n)
-    
    clustering.network.export_to_netcdf(snakemake.output.network)
    for attr in ('busmap', 'linemap'): #also available: linemap_positive, linemap_negative
        getattr(clustering, attr).to_csv(snakemake.output[attr])
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@ -13,6 +13,11 @@ Relevant Settings

 .. code:: yaml

+    clustering:
+      simplify_network:
+      cluster_network:
+      aggregation_strategies:
+
    costs:
        USD2013_to_EUR2013:
        discountrate:
@ -22,10 +27,6 @@ Relevant Settings
    electricity:
        max_hours:

-    renewables: (keys)
-        {technology}:
-            potential:
-
    lines:
        length_factor:

@ -83,7 +84,7 @@ The rule :mod:`simplify_network` does up to four things:
 """

 import logging
-from _helpers import configure_logging, update_p_nom_max
+from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies

 from cluster_network import clustering_for_n_clusters, cluster_regions
 from add_electricity import load_costs
@ -189,7 +190,10 @@ def _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, out
            


-def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, aggregate_one_ports={"Load", "StorageUnit"}):
+def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
+                                   aggregate_one_ports={"Load", "StorageUnit"},
+                                   aggregation_strategies=dict()):
+
    def replace_components(n, c, df, pnl):
        n.mremove(c, n.df(c).index)

@ -200,14 +204,12 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a

    _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, output)

-    strategies = {
-        'p_nom_min': np.sum, 
-        'p_nom_max': 'sum', 
-        'build_year': lambda x: 0, 
-        'lifetime': lambda x: np.inf, 
-        'efficiency': np.mean
-        }
-    generators, generators_pnl = aggregategenerators(n, busmap, custom_strategies=strategies)
+    _, generator_strategies = get_aggregation_strategies(aggregation_strategies)
+
+    generators, generators_pnl = aggregategenerators(
+        n, busmap, custom_strategies=generator_strategies
+    )
+
    replace_components(n, "Generator", generators, generators_pnl)

    for one_port in aggregate_one_ports:
@ -221,7 +223,7 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a
        n.mremove(c, df.index[df.bus0.isin(buses_to_del) | df.bus1.isin(buses_to_del)])


-def simplify_links(n, costs, config, output):
+def simplify_links(n, costs, config, output, aggregation_strategies=dict()):
    ## Complex multi-node links are folded into end-points
    logger.info("Simplifying connected link components")

@ -313,21 +315,23 @@ def simplify_links(n, costs, config, output):

    logger.debug("Collecting all components using the busmap")

-    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output)
+    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
+                                   aggregation_strategies=aggregation_strategies)
    return n, busmap

-def remove_stubs(n, costs, config, output):
+def remove_stubs(n, costs, config, output, aggregation_strategies=dict()):
    logger.info("Removing stubs")

    busmap = busmap_by_stubs(n) #  ['country'])

    connection_costs_to_bus = _compute_connection_costs_to_bus(n, busmap, costs, config)

-    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output)
+    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
+                                   aggregation_strategies=aggregation_strategies)

    return n, busmap

-def aggregate_to_substations(n, buses_i=None):
+def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None):
    # can be used to aggregate a selection of buses to electrically closest neighbors
    # if no buses are given, nodes that are no substations or without offshore connection are aggregated
    
@ -352,23 +356,20 @@ def aggregate_to_substations(n, buses_i=None):
    busmap = n.buses.index.to_series()
    busmap.loc[buses_i] = dist.idxmin(1)

+    bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies)
+
    clustering = get_clustering_from_busmap(n, busmap,
-                                            bus_strategies=dict(country=_make_consense("Bus", "country")),
+                                            bus_strategies=bus_strategies,
                                            aggregate_generators_weighted=True,
                                            aggregate_generators_carriers=None,
                                            aggregate_one_ports=["Load", "StorageUnit"],
                                            line_length_factor=1.0,
-                                            generator_strategies={'p_nom_max': 'sum', 
-                                                                  'build_year': lambda x: 0, 
-                                                                  'lifetime': lambda x: np.inf, 
-                                                                  'efficiency': np.mean
-                                                                  },
+                                            generator_strategies=generator_strategies,
                                            scale_link_capital_costs=False)
-        
    return clustering.network, busmap


-def cluster(n, n_clusters, config):
+def cluster(n, n_clusters, config, algorithm="hac", feature=None, aggregation_strategies=dict()):
    logger.info(f"Clustering to {n_clusters} buses")

    focus_weights = config.get('focus_weights', None)
@ -376,17 +377,11 @@ def cluster(n, n_clusters, config):
    renewable_carriers = pd.Index([tech
                                    for tech in n.generators.carrier.unique()
                                    if tech.split('-', 2)[0] in config['renewable']])
-    def consense(x):
-        v = x.iat[0]
-        assert ((x == v).all() or x.isnull().all()), (
-            "The `potential` configuration option must agree for all renewable carriers, for now!"
-        )
-        return v
-    potential_mode = (consense(pd.Series([config['renewable'][tech]['potential']
-                                            for tech in renewable_carriers]))
-                        if len(renewable_carriers) > 0 else 'conservative')
-    clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode,
+
+    clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False,
+                                           aggregation_strategies=aggregation_strategies,
                                           solver_name=config['solving']['solver']['name'],
+                                           algorithm=algorithm, feature=feature,
                                           focus_weights=focus_weights)

    return clustering.network, clustering.busmap
@ -400,24 +395,50 @@ if __name__ == "__main__":

    n = pypsa.Network(snakemake.input.network)

+    aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
+    # translate str entries of aggregation_strategies to pd.Series functions:
+    aggregation_strategies = {
+        p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()}
+        for p in aggregation_strategies.keys()
+    }
+
    n, trafo_map = simplify_network_to_380(n)

    Nyears = n.snapshot_weightings.objective.sum() / 8760

    technology_costs = load_costs(snakemake.input.tech_costs, snakemake.config['costs'], snakemake.config['electricity'], Nyears)

-    n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output)
+    n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output,
+                                           aggregation_strategies)

-    n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output)
+    n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output,
+                               aggregation_strategies=aggregation_strategies)

    busmaps = [trafo_map, simplify_links_map, stub_map]

-    if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False):
-        n, substation_map = aggregate_to_substations(n)
+    cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {})
+    if cluster_config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False):
+        n, substation_map = aggregate_to_substations(n, aggregation_strategies)
        busmaps.append(substation_map)

+    # treatment of outliers (nodes without a profile for considered carrier):
+    # all nodes that have no profile of the given carrier are being aggregated to closest neighbor
+    if (
+            snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac" or
+            cluster_config.get("algorithm", "hac") == "hac"
+    ):
+        carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+')
+        for carrier in carriers:
+            buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus))
+            logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.')
+            n, busmap_hac = aggregate_to_substations(n, aggregation_strategies, buses_i)
+            busmaps.append(busmap_hac)
+
    if snakemake.wildcards.simpl:
-        n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config)
+        n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config,
+                                 cluster_config.get('algorithm', 'hac'),
+                                 cluster_config.get('feature', None),
+                                 aggregation_strategies)
        busmaps.append(cluster_map)

    # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@ -104,7 +104,7 @@ def prepare_network(n, solve_opts):
    if load_shedding:
        n.add("Carrier", "load", color="#dd2e23", nice_name="Load shedding")
        buses_i = n.buses.query("carrier == 'AC'").index
-        if not np.isscalar(load_shedding): load_shedding = 1e2
+        if not np.isscalar(load_shedding): load_shedding = 1e2 # Eur/kWh
        # intersect between macroeconomic and surveybased
        # willingness to pay
        # http://journal.frontiersin.org/article/10.3389/fenrg.2015.00055/full)
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@ -18,8 +18,23 @@ scenario:
 countries: ['BE']

 clustering:
-  simplify:
+  simplify_network:
    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+    algorithm: kmeans # choose from: [hac, kmeans]
+    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
+  cluster_network:
+    algorithm: kmeans
+    feature: solar+onwind-time
+  aggregation_strategies:
+    generators:
+      p_nom_max: sum # use "min" for more conservative assumptions
+      p_nom_min: sum
+      p_min_pu: mean
+      marginal_cost: mean
+      committable: any
+      ramp_limit_up: max
+      ramp_limit_down: max
+      efficiency: mean

 snapshots:
  start: "2013-03-01"
@ -31,6 +46,8 @@ enable:
  retrieve_databundle: true
  build_cutout: false
  retrieve_cutout: true
+  build_natura_raster: false
+  retrieve_natura_raster: true
  custom_busmap: false

 electricity: