From 299e71e2b3eacf53f796db3c9e965034bbb25d16 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Wed, 17 Nov 2021 13:46:33 +0100
Subject: [PATCH 01/23] introduce hierarchical agglomeratice clustering (hac)

---
 config.default.yaml             |  9 +++--
 config.tutorial.yaml            |  9 +++--
 doc/configtables/clustering.csv |  9 +++--
 envs/environment.yaml           |  2 +-
 scripts/cluster_network.py      | 63 +++++++++++++++++++++++++++++----
 scripts/simplify_network.py     | 10 ++++--
 test/config.test1.yaml          |  9 +++--
 7 files changed, 93 insertions(+), 18 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index f70e7c2c..631645b9 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -20,8 +20,13 @@ scenario:
 countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']
 
 clustering:
-  simplify:
-    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+  simplify_network:
+    to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+    algorithm: hac
+    feature: solar+onwind-time
+  cluster_network:
+    algorithm: hac # choose from: [hac, kmeans]
+    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
 
 snapshots:
   start: "2013-01-01"
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 26ead242..919da193 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -20,8 +20,13 @@ scenario:
 countries: ['DE']
 
 clustering:
-  simplify:
-    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+  simplify_network:
+    to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+    algorithm: hac
+    feature: solar+onwind-time
+  cluster_network:
+    algorithm: hac # choose from: [hac, kmeans]
+    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
 
 snapshots:
   start: "2013-03-01"
diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv
index 2f63f955..e488dd39 100644
--- a/doc/configtables/clustering.csv
+++ b/doc/configtables/clustering.csv
@@ -1,3 +1,8 @@
 ,Unit,Values,Description
-simplify,,,
--- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
+simplify_network,,,
+-- to_substations,bool,"One of {'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
+-- algorithm,str,"One of {‘kmenas’, ‘hac’}",
+-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.",
+cluster_network,,,
+-- algorithm,str,"One of {‘kmenas’, ‘hac’}",
+-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.",
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 29d743ac..5f6a8310 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -12,7 +12,6 @@ dependencies:
   - pip
   - mamba # esp for windows build
 
-  - pypsa>=0.18
   - atlite>=0.2.5
   - dask<=2021.3.1 # until https://github.com/dask/dask/issues/7583 is solved
 
@@ -56,5 +55,6 @@ dependencies:
   - tabula-py
 
   - pip:
+    - git+https://github.com/pypsa/pypsa.git#egg=pypsa
     - vresutils==0.3.1
     - tsam>=1.1.0
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 980b73b0..7aae3ae6 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -138,7 +138,7 @@ import seaborn as sns
 from functools import reduce
 
 from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering,
-                                     _make_consense, get_clustering_from_busmap)
+                                     busmap_by_hac, _make_consense, get_clustering_from_busmap)
 
 from add_electricity import load_costs
 
@@ -170,6 +170,45 @@ def weighting_for_country(n, x):
     return (w * (100. / w.max())).clip(lower=1.).astype(int)
 
 
+def get_feature_for_hac(n, buses_i, feature=None): #buses_i = n.buses.index
+
+    if feature is None:
+        feature = "solar+onwind-time"
+
+    carriers = feature.split('-')[0].split('+')
+    if "offwind" in carriers:
+        carriers.remove("offwind")
+        carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique())
+
+    if feature.split('-')[1] == 'cap':
+        feature_data = pd.DataFrame(index=buses_i, columns=carriers)
+        for carrier in carriers:
+            try:
+                feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
+                                         .rename(index=lambda x: x.split(' ')[0]))
+            except:
+                feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
+                                         .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))
+
+    if feature.split('-')[1] == 'time':
+        feature_data = pd.DataFrame(columns=buses_i)
+        for carrier in carriers:
+            try:
+                # without simpl wildcard (bus names are "X X"):
+                feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
+                                                   .rename(columns=lambda x: x.split(' ')[0]))[buses_i]
+            except:
+                # with simpl wildcard (bus names are "X X X"):
+                feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
+                                                   .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i]
+        feature_data = feature_data.T
+        feature_data.columns = feature_data.columns.astype(str) # Timestamp will raise error in sklearn>=v1.2
+
+    feature_data = feature_data.fillna(0)
+
+    return feature_data
+
+
 def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None):
     """Determine the number of clusters per country"""
 
@@ -221,12 +260,18 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None):
     return pd.Series(m.n.get_values(), index=L.index).astype(int)
 
 
-def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds):
+def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds):
     if algorithm == "kmeans":
         algorithm_kwds.setdefault('n_init', 1000)
         algorithm_kwds.setdefault('max_iter', 30000)
         algorithm_kwds.setdefault('tol', 1e-6)
 
+    if algorithm == "hac":
+        feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
+    elif feature is not None:
+        logger.info(f"keyword argument feature is only valid for algorithm 'hac'."
+                    f"given feature {feature} will be ignored.")
+
     n.determine_network_topology()
 
     n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name)
@@ -250,8 +295,10 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
             return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
         elif algorithm == "louvain":
             return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
+        elif algorithm == "hac":
+            return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index])
         else:
-            raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.")
+            raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.")
 
     return (n.buses.groupby(['country', 'sub_network'], group_keys=False)
             .apply(busmap_for_country).squeeze().rename('busmap'))
@@ -259,7 +306,9 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
 
 def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None,
                               line_length_factor=1.25, potential_mode='simple', solver_name="cbc",
-                              algorithm="kmeans", extended_link_costs=0, focus_weights=None):
+                              algorithm="kmeans", feature=None, extended_link_costs=0, focus_weights=None):
+
+    logger.info(f"Clustering network using algorithm {algorithm} and feature {feature}...")
 
     if potential_mode == 'simple':
         p_nom_max_strategy = np.sum
@@ -273,7 +322,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
         busmap.index = busmap.index.astype(str)
         logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
     else:
-        busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
+        busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature)
 
     clustering = get_clustering_from_busmap(
         n, busmap,
@@ -313,7 +362,7 @@ def cluster_regions(busmaps, input=None, output=None):
 
     for which in ('regions_onshore', 'regions_offshore'):
         regions = gpd.read_file(getattr(input, which)).set_index('name')
-        geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.cascaded_union)
+        geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.unary_union)
         regions_c = gpd.GeoDataFrame(dict(geometry=geom_c))
         regions_c.index.name = 'name'
         save_to_geojson(regions_c, getattr(output, which))
@@ -377,6 +426,8 @@ if __name__ == "__main__":
                                                line_length_factor=line_length_factor,
                                                potential_mode=potential_mode,
                                                solver_name=snakemake.config['solving']['solver']['name'],
+                                               algorithm=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('algorithm', 'kmeans'),
+                                               feature=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('feature', None),
                                                extended_link_costs=hvac_overhead_cost,
                                                focus_weights=focus_weights)
 
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 85bc4d15..8a93952c 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -360,7 +360,7 @@ def aggregate_to_substations(n, buses_i=None):
     return clustering.network, busmap
 
 
-def cluster(n, n_clusters):
+def cluster(n, n_clusters, algorithm="kmeans", feature=None):
     logger.info(f"Clustering to {n_clusters} buses")
 
     focus_weights = snakemake.config.get('focus_weights', None)
@@ -377,8 +377,10 @@ def cluster(n, n_clusters):
     potential_mode = (consense(pd.Series([snakemake.config['renewable'][tech]['potential']
                                             for tech in renewable_carriers]))
                         if len(renewable_carriers) > 0 else 'conservative')
+
     clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode,
                                            solver_name=snakemake.config['solving']['solver']['name'],
+                                           algorithm=algorithm, feature=feature,
                                            focus_weights=focus_weights)
 
     return clustering.network, clustering.busmap
@@ -400,12 +402,14 @@ if __name__ == "__main__":
 
     busmaps = [trafo_map, simplify_links_map, stub_map]
 
-    if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False):
+    if snakemake.config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False):
         n, substation_map = aggregate_to_substations(n)
         busmaps.append(substation_map)
 
     if snakemake.wildcards.simpl:
-        n, cluster_map = cluster(n, int(snakemake.wildcards.simpl))
+        n, cluster_map = cluster(n, int(snakemake.wildcards.simpl),
+                                 algorithm=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('algorithm', 'hac'),
+                                 feature=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('feature', None))
         busmaps.append(cluster_map)
 
     # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
index 2986037b..0c34ea13 100755
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@@ -19,8 +19,13 @@ scenario:
 countries: ['DE']
 
 clustering:
-  simplify:
-    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+  simplify_network:
+    to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+    algorithm: hac
+    feature: solar+onwind-time
+  cluster_network:
+    algorithm: hac # choose from: [hac, kmeans]
+    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
 
 snapshots:
   start: "2013-03-01"

From 6c4ea69e9563c257961e10f3e8d37b85d0cff7cf Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Fri, 10 Dec 2021 17:53:40 +0100
Subject: [PATCH 02/23] clustering: own config for clustering settings

---
 scripts/cluster_network.py  | 14 +++++++++-----
 scripts/simplify_network.py |  5 +++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 7aae3ae6..415d3820 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -170,7 +170,10 @@ def weighting_for_country(n, x):
     return (w * (100. / w.max())).clip(lower=1.).astype(int)
 
 
-def get_feature_for_hac(n, buses_i, feature=None): #buses_i = n.buses.index
+def get_feature_for_hac(n, buses_i=None, feature=None):
+
+    if buses_i is None:
+        buses_i = n.buses.index
 
     if feature is None:
         feature = "solar+onwind-time"
@@ -269,8 +272,8 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
     if algorithm == "hac":
         feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
     elif feature is not None:
-        logger.info(f"keyword argument feature is only valid for algorithm 'hac'."
-                    f"given feature {feature} will be ignored.")
+        logger.warning(f"Keyword argument feature is only valid for algorithm 'hac'."
+                       f"given feature {feature} will be ignored.")
 
     n.determine_network_topology()
 
@@ -422,12 +425,13 @@ if __name__ == "__main__":
         potential_mode = consense(pd.Series([snakemake.config['renewable'][tech]['potential']
                                              for tech in renewable_carriers]))
         custom_busmap = snakemake.config["enable"].get("custom_busmap", False)
+        cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {})
         clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers,
                                                line_length_factor=line_length_factor,
                                                potential_mode=potential_mode,
                                                solver_name=snakemake.config['solving']['solver']['name'],
-                                               algorithm=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('algorithm', 'kmeans'),
-                                               feature=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('feature', None),
+                                               algorithm=cluster_config.get('algorithm', 'kmeans'),
+                                               feature=cluster_config.get('feature', None),
                                                extended_link_costs=hvac_overhead_cost,
                                                focus_weights=focus_weights)
 
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 8a93952c..9464c3f7 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -407,9 +407,10 @@ if __name__ == "__main__":
         busmaps.append(substation_map)
 
     if snakemake.wildcards.simpl:
+        cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {})
         n, cluster_map = cluster(n, int(snakemake.wildcards.simpl),
-                                 algorithm=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('algorithm', 'hac'),
-                                 feature=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('feature', None))
+                                 algorithm=cluster_config.get('algorithm', 'hac'),
+                                 feature=cluster_config.get('feature', None))
         busmaps.append(cluster_map)
 
     # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed

From 256ac48b470470571d0b5751319e8f441ce39e0c Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Fri, 4 Feb 2022 16:45:00 +0100
Subject: [PATCH 03/23] resolve merging master bugs

---
 scripts/cluster_network.py  | 13 +++++++++----
 scripts/simplify_network.py |  6 +++---
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index d4b3b139..1c55234c 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -190,9 +190,11 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
         feature_data = pd.DataFrame(index=buses_i, columns=carriers)
         for carrier in carriers:
             try:
+                # without simpl wildcard (bus names are "X X"):
                 feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
                                          .rename(index=lambda x: x.split(' ')[0]))
             except:
+                # with simpl wildcard (bus names are "X X X"):
                 feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
                                          .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))
 
@@ -208,7 +210,7 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
                 feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
                                                    .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i]
         feature_data = feature_data.T
-        feature_data.columns = feature_data.columns.astype(str) # Timestamp will raise error in sklearn>=v1.2
+        feature_data.columns = feature_data.columns.astype(str) # timestamp raises error in sklearn>=v1.2
 
     feature_data = feature_data.fillna(0)
 
@@ -309,9 +311,9 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
 
 def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None,
                               line_length_factor=1.25, potential_mode='simple', solver_name="cbc",
-                              algorithm="kmeans", feature=None, extended_link_costs=0, focus_weights=None):
+                              algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None):
 
-    logger.info(f"Clustering network using algorithm {algorithm} and feature {feature}...")
+    logger.info(f"Clustering network using algorithm ``{algorithm}`` and feature ``{feature}``...")
 
     if potential_mode == 'simple':
         p_nom_max_strategy = np.sum
@@ -424,10 +426,13 @@ if __name__ == "__main__":
             custom_busmap.index = custom_busmap.index.astype(str)
             logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
 
+        cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {})
         clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers,
                                                line_length_factor, potential_mode,
                                                snakemake.config['solving']['solver']['name'],
-                                               "kmeans", hvac_overhead_cost, focus_weights)
+                                               cluster_config.get("algorithm", "hac"),
+                                               cluster_config.get("feature", "solar+onwind-time"),
+                                               hvac_overhead_cost, focus_weights)
 
     update_p_nom_max(n)
     
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 0e9dd385..23facc79 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -353,11 +353,10 @@ def aggregate_to_substations(n, buses_i=None):
                                             line_length_factor=1.0,
                                             generator_strategies={'p_nom_max': 'sum'},
                                             scale_link_capital_costs=False)
-        
     return clustering.network, busmap
 
 
-def cluster(n, n_clusters, config, algorithm="kmeans", feature=None):
+def cluster(n, n_clusters, config, algorithm="hac", feature=None):
     logger.info(f"Clustering to {n_clusters} buses")
 
     focus_weights = config.get('focus_weights', None)
@@ -403,7 +402,8 @@ if __name__ == "__main__":
 
     busmaps = [trafo_map, simplify_links_map, stub_map]
 
-    if snakemake.config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False):
+    cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {})
+    if cluster_config.get('to_substations', False):
         n, substation_map = aggregate_to_substations(n)
         busmaps.append(substation_map)
 

From b5dbf4eb3240222e3a3145bbf87b3d66a63d7169 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Fri, 4 Feb 2022 17:19:23 +0100
Subject: [PATCH 04/23] overwrite country of isolated buses

---
 scripts/cluster_network.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 1c55234c..bd49556f 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -271,11 +271,36 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
         algorithm_kwds.setdefault('max_iter', 30000)
         algorithm_kwds.setdefault('tol', 1e-6)
 
+    def fix_country_assignment_for_hac(n):
+        # overwrite country of nodes that are disconnected from their country-topology
+        for country in n.buses.country.unique():
+            m = n.copy()
+            m.buses = m.buses.query("country in @country")
+            m.lines = m.lines.query("bus0 in @m.buses.index and bus1 in @m.buses.index")
+            m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index")
+
+            _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False)
+            component = pd.Series(labels, index=m.buses.index)
+            component_sizes = component.value_counts()
+
+            if len(component_sizes)>1:
+                disconnected_bus = component[component==component_sizes[component_sizes==component_sizes.min()].index[0]].index
+                neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']]
+                new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0]
+
+                logger.info(f"overwriting country ``{country}`` of bus ``{disconnected_bus}`` to new country ``{new_country}``, "
+                            "because it is disconnected from its inital inter-country transmission grid.")
+                n.buses.at[disconnected_bus, "country"] = new_country
+        return n
+
     if algorithm == "hac":
+        from scipy.sparse import csgraph
+
         feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
+        n = fix_country_assignment_for_hac(n)
     elif feature is not None:
         logger.warning(f"Keyword argument feature is only valid for algorithm 'hac'."
-                       f"given feature {feature} will be ignored.")
+                       f"given feature ``{feature}`` will be ignored.")
 
     n.determine_network_topology()
 

From 82a0338e9f3f8b68eb7d73aad0ceb3b26862d028 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Fri, 4 Feb 2022 20:27:18 +0100
Subject: [PATCH 05/23] treatment of outliers and small feature-bugfix

---
 scripts/cluster_network.py  | 17 +++++++++--------
 scripts/simplify_network.py | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index bd49556f..2cc406eb 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -272,6 +272,8 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
         algorithm_kwds.setdefault('tol', 1e-6)
 
     def fix_country_assignment_for_hac(n):
+        from scipy.sparse import csgraph
+
         # overwrite country of nodes that are disconnected from their country-topology
         for country in n.buses.country.unique():
             m = n.copy()
@@ -288,19 +290,18 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
                 neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']]
                 new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0]
 
-                logger.info(f"overwriting country ``{country}`` of bus ``{disconnected_bus}`` to new country ``{new_country}``, "
+                logger.info(f"overwriting country `{country}` of bus `{disconnected_bus}` to new country `{new_country}`, "
                             "because it is disconnected from its inital inter-country transmission grid.")
                 n.buses.at[disconnected_bus, "country"] = new_country
         return n
 
     if algorithm == "hac":
-        from scipy.sparse import csgraph
-
         feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
         n = fix_country_assignment_for_hac(n)
-    elif feature is not None:
-        logger.warning(f"Keyword argument feature is only valid for algorithm 'hac'."
-                       f"given feature ``{feature}`` will be ignored.")
+
+    if (algorithm != "hac") and (feature is not None):
+        logger.warning(f"Keyword argument feature is only valid for algorithm `hac`. "
+                       f"Given feature `{feature}` will be ignored.")
 
     n.determine_network_topology()
 
@@ -338,7 +339,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
                               line_length_factor=1.25, potential_mode='simple', solver_name="cbc",
                               algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None):
 
-    logger.info(f"Clustering network using algorithm ``{algorithm}`` and feature ``{feature}``...")
+    logger.info(f"Clustering network using algorithm `{algorithm}` and feature `{feature}`...")
 
     if potential_mode == 'simple':
         p_nom_max_strategy = np.sum
@@ -348,7 +349,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
         raise AttributeError(f"potential_mode should be one of 'simple' or 'conservative' but is '{potential_mode}'")
 
     if not custom_busmap:
-        busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
+        busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature)
     else:
         busmap = custom_busmap
 
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 23facc79..b28bc4dc 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -407,7 +407,21 @@ if __name__ == "__main__":
         n, substation_map = aggregate_to_substations(n)
         busmaps.append(substation_map)
 
+    # treatment of outliers (nodes without a profile for considered carrier) for "cluster_network"
+    if snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac":
+        carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+')
+        buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus))
+        n, busmap_hac = aggregate_to_substations(n, buses_i)
+        busmaps.append(busmap_hac)
+
     if snakemake.wildcards.simpl:
+        # treatment of outliers (nodes without a profile for a considered carrier) for "simplify"
+        if cluster_config.get("algorithm", "hac") == "hac":
+            carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+')
+            buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus))
+            n, busmap_hac = aggregate_to_substations(n, buses_i)
+            busmaps.append(busmap_hac)
+        # conduct clustering
         n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config,
                                  algorithm=cluster_config.get('algorithm', 'hac'),
                                  feature=cluster_config.get('feature', None))

From f02d5fe82161f7373423c8ff6571a4319ab8c164 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 7 Mar 2022 11:43:04 +0100
Subject: [PATCH 06/23] fix clustering setup for hac according to fneum
 suggestions

---
 doc/configtables/clustering.csv | 4 ++--
 envs/environment.yaml           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv
index e488dd39..d14ab395 100644
--- a/doc/configtables/clustering.csv
+++ b/doc/configtables/clustering.csv
@@ -1,8 +1,8 @@
 ,Unit,Values,Description
 simplify_network,,,
 -- to_substations,bool,"One of {'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
--- algorithm,str,"One of {‘kmenas’, ‘hac’}",
+-- algorithm,str,"One of {‘kmeans’, ‘hac’}",
 -- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.",
 cluster_network,,,
--- algorithm,str,"One of {‘kmenas’, ‘hac’}",
+-- algorithm,str,"One of {‘kmeans’, ‘hac’}",
 -- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.",
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 1d8d9d28..c64b5fbb 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -10,6 +10,7 @@ dependencies:
   - python>=3.8
   - pip
 
+  - pypsa>=0.19.1
   - atlite>=0.2.5
   - dask
 
@@ -53,6 +54,5 @@ dependencies:
   - tabula-py
 
   - pip:
-    - git+https://github.com/pypsa/pypsa.git#egg=pypsa
     - vresutils>=0.3.1
     - tsam>=1.1.0

From 68f332f5cc8603ceb890fb41f01cfc02bbebb10e Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Thu, 17 Mar 2022 15:42:04 +0100
Subject: [PATCH 07/23] suggestions by coroa and fneum

---
 scripts/cluster_network.py | 42 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index f4058e79..003442ff 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -189,28 +189,20 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
     if feature.split('-')[1] == 'cap':
         feature_data = pd.DataFrame(index=buses_i, columns=carriers)
         for carrier in carriers:
-            try:
-                # without simpl wildcard (bus names are "X X"):
-                feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
-                                         .rename(index=lambda x: x.split(' ')[0]))
-            except:
-                # with simpl wildcard (bus names are "X X X"):
-                feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
-                                         .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))
+            gen_i = n.generators.query("carrier == @carrier").index
+            attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus)
+            feature_data[carrier] = attach
 
     if feature.split('-')[1] == 'time':
         feature_data = pd.DataFrame(columns=buses_i)
         for carrier in carriers:
-            try:
-                # without simpl wildcard (bus names are "X X"):
-                feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
-                                                   .rename(columns=lambda x: x.split(' ')[0]))[buses_i]
-            except:
-                # with simpl wildcard (bus names are "X X X"):
-                feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
-                                                   .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i]
+            gen_i = n.generators.query("carrier == @carrier").index
+            attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus)
+            feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
+
         feature_data = feature_data.T
-        feature_data.columns = feature_data.columns.astype(str) # timestamp raises error in sklearn>=v1.2
+        # timestamp raises error in sklearn >= v1.2:
+        feature_data.columns = feature_data.columns.astype(str)
 
     feature_data = feature_data.fillna(0)
 
@@ -283,16 +275,24 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
             m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index")
 
             _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False)
+
             component = pd.Series(labels, index=m.buses.index)
             component_sizes = component.value_counts()
 
             if len(component_sizes)>1:
-                disconnected_bus = component[component==component_sizes[component_sizes==component_sizes.min()].index[0]].index
-                neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']]
+                disconnected_bus = component[component==component_sizes.index[-1]].index[0]
+
+                neighbor_bus = (
+                    n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus")
+                    .iloc[0][['bus0', 'bus1']]
+                )
                 new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0]
 
-                logger.info(f"overwriting country `{country}` of bus `{disconnected_bus}` to new country `{new_country}`, "
-                            "because it is disconnected from its inital inter-country transmission grid.")
+                logger.info(
+                    f"overwriting country `{country}` of bus `{disconnected_bus}` "
+                    f"to new country `{new_country}`, because it is disconnected "
+                    "from its inital inter-country transmission grid."
+                )
                 n.buses.at[disconnected_bus, "country"] = new_country
         return n
 

From 4c1c5e3a4e2a527792cb9a3dcc0b4d5c08d45f29 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Thu, 17 Mar 2022 17:38:30 +0100
Subject: [PATCH 08/23] suggestions by coroa and fneum

---
 scripts/cluster_network.py | 42 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index f4058e79..003442ff 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -189,28 +189,20 @@ def get_feature_for_hac(n, buses_i=None, feature=None):
     if feature.split('-')[1] == 'cap':
         feature_data = pd.DataFrame(index=buses_i, columns=carriers)
         for carrier in carriers:
-            try:
-                # without simpl wildcard (bus names are "X X"):
-                feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
-                                         .rename(index=lambda x: x.split(' ')[0]))
-            except:
-                # with simpl wildcard (bus names are "X X X"):
-                feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
-                                         .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))
+            gen_i = n.generators.query("carrier == @carrier").index
+            attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus)
+            feature_data[carrier] = attach
 
     if feature.split('-')[1] == 'time':
         feature_data = pd.DataFrame(columns=buses_i)
         for carrier in carriers:
-            try:
-                # without simpl wildcard (bus names are "X X"):
-                feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
-                                                   .rename(columns=lambda x: x.split(' ')[0]))[buses_i]
-            except:
-                # with simpl wildcard (bus names are "X X X"):
-                feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
-                                                   .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i]
+            gen_i = n.generators.query("carrier == @carrier").index
+            attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus)
+            feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
+
         feature_data = feature_data.T
-        feature_data.columns = feature_data.columns.astype(str) # timestamp raises error in sklearn>=v1.2
+        # timestamp raises error in sklearn >= v1.2:
+        feature_data.columns = feature_data.columns.astype(str)
 
     feature_data = feature_data.fillna(0)
 
@@ -283,16 +275,24 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
             m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index")
 
             _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False)
+
             component = pd.Series(labels, index=m.buses.index)
             component_sizes = component.value_counts()
 
             if len(component_sizes)>1:
-                disconnected_bus = component[component==component_sizes[component_sizes==component_sizes.min()].index[0]].index
-                neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']]
+                disconnected_bus = component[component==component_sizes.index[-1]].index[0]
+
+                neighbor_bus = (
+                    n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus")
+                    .iloc[0][['bus0', 'bus1']]
+                )
                 new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0]
 
-                logger.info(f"overwriting country `{country}` of bus `{disconnected_bus}` to new country `{new_country}`, "
-                            "because it is disconnected from its inital inter-country transmission grid.")
+                logger.info(
+                    f"overwriting country `{country}` of bus `{disconnected_bus}` "
+                    f"to new country `{new_country}`, because it is disconnected "
+                    "from its inital inter-country transmission grid."
+                )
                 n.buses.at[disconnected_bus, "country"] = new_country
         return n
 

From 8cb4c17930909c873d66f003ea41a2e09a924e4f Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Tue, 22 Mar 2022 16:53:05 +0100
Subject: [PATCH 09/23] unify vre treatment for hac clustering for
 simplify_network and cluster_network

---
 scripts/simplify_network.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index b28bc4dc..f9ac8ad7 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -351,7 +351,7 @@ def aggregate_to_substations(n, buses_i=None):
                                             aggregate_generators_carriers=None,
                                             aggregate_one_ports=["Load", "StorageUnit"],
                                             line_length_factor=1.0,
-                                            generator_strategies={'p_nom_max': 'sum'},
+                                            generator_strategies={'p_nom_max': 'sum', 'p_nom_min': 'sum'},
                                             scale_link_capital_costs=False)
     return clustering.network, busmap
 
@@ -407,21 +407,20 @@ if __name__ == "__main__":
         n, substation_map = aggregate_to_substations(n)
         busmaps.append(substation_map)
 
-    # treatment of outliers (nodes without a profile for considered carrier) for "cluster_network"
-    if snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac":
+    # treatment of outliers (nodes without a profile for considered carrier):
+    # all nodes that have no profile of the given carrier are being aggregated to closest neighbor
+    if (
+            snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac" or
+            cluster_config.get("algorithm", "hac") == "hac"
+    ):
         carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+')
-        buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus))
-        n, busmap_hac = aggregate_to_substations(n, buses_i)
-        busmaps.append(busmap_hac)
-
-    if snakemake.wildcards.simpl:
-        # treatment of outliers (nodes without a profile for a considered carrier) for "simplify"
-        if cluster_config.get("algorithm", "hac") == "hac":
-            carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+')
-            buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus))
+        for carrier in carriers:
+            buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus))
+            logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.')
             n, busmap_hac = aggregate_to_substations(n, buses_i)
             busmaps.append(busmap_hac)
-        # conduct clustering
+
+    if snakemake.wildcards.simpl:
         n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config,
                                  algorithm=cluster_config.get('algorithm', 'hac'),
                                  feature=cluster_config.get('feature', None))

From 7f29c31abe6cf133f28f4b4558cccb7f20899d72 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Thu, 24 Mar 2022 13:17:01 +0100
Subject: [PATCH 10/23] .copy() shortcut

---
 scripts/cluster_network.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 9c6d541f..f659f8a8 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -269,10 +269,7 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
 
         # overwrite country of nodes that are disconnected from their country-topology
         for country in n.buses.country.unique():
-            m = n.copy()
-            m.buses = m.buses.query("country in @country")
-            m.lines = m.lines.query("bus0 in @m.buses.index and bus1 in @m.buses.index")
-            m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index")
+            m = n[n.buses.country ==country].copy()
 
             _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False)
 

From bef4967e84567434aa836d6d572c0316abe6f96f Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 20 Jun 2022 18:58:23 +0200
Subject: [PATCH 11/23] clustering strategies moved to configurables

---
 config.default.yaml         |  4 ++++
 config.tutorial.yaml        |  4 ++++
 scripts/cluster_network.py  | 43 +++++++++++++++--------------------
 scripts/simplify_network.py | 45 ++++++++++++++++++++-----------------
 4 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index d2bf6159..9462719b 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -22,6 +22,10 @@ countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'F
 clustering:
   simplify:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+  aggregation_strategies:
+    generators:
+      p_nom_max: "sum" # use "min" for more conservative assumptions
+      p_nom_min: "sum"
 
 snapshots:
   start: "2013-01-01"
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 225d8f78..4dc7a94d 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -22,6 +22,10 @@ countries: ['BE']
 clustering:
   simplify:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+  aggregation_strategies:
+    generators:
+      p_nom_max: "sum" # use "min" for more conservative assumptions
+      p_nom_min: "sum"
 
 snapshots:
   start: "2013-03-01"
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 1d5608e2..0a3d768a 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -11,11 +11,10 @@ Relevant Settings
 
 .. code:: yaml
 
-    focus_weights:
+    clustering:
+      aggregation_strategies:
 
-    renewable: (keys)
-        {technology}:
-            potential:
+    focus_weights:
 
     solving:
         solver:
@@ -259,15 +258,16 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
 
 
 def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None,
-                              line_length_factor=1.25, potential_mode='simple', solver_name="cbc",
+                              line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc",
                               algorithm="kmeans", extended_link_costs=0, focus_weights=None):
 
-    if potential_mode == 'simple':
-        p_nom_max_strategy = pd.Series.sum
-    elif potential_mode == 'conservative':
-        p_nom_max_strategy = pd.Series.min
-    else:
-        raise AttributeError(f"potential_mode should be one of 'simple' or 'conservative' but is '{potential_mode}'")
+    bus_strategies = dict(country=_make_consense("Bus", "country"))
+    bus_strategies.update(aggregation_strategies.get("buses", {}))
+    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
+
+    # this snippet supports compatibility of PyPSA and PyPSA-EUR:
+    if "p_nom_max" in generator_strategies:
+        if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min
 
     if not isinstance(custom_busmap, pd.Series):
         busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
@@ -276,19 +276,12 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
 
     clustering = get_clustering_from_busmap(
         n, busmap,
-        bus_strategies=dict(country=_make_consense("Bus", "country")),
+        bus_strategies=bus_strategies,
         aggregate_generators_weighted=True,
         aggregate_generators_carriers=aggregate_carriers,
         aggregate_one_ports=["Load", "StorageUnit"],
         line_length_factor=line_length_factor,
-        generator_strategies={'p_nom_max': p_nom_max_strategy, 
-                              'p_nom_min': pd.Series.sum, 
-                              'p_min_pu': pd.Series.mean, 
-                              'marginal_cost': pd.Series.mean, 
-                              'committable': np.any, 
-                              'ramp_limit_up': pd.Series.max, 
-                              'ramp_limit_down': pd.Series.max,
-                              },
+        generator_strategies=generator_strategies,
         scale_link_capital_costs=False)
 
     if not n.links.empty:
@@ -375,8 +368,8 @@ if __name__ == "__main__":
                 "The `potential` configuration option must agree for all renewable carriers, for now!"
             )
             return v
-        potential_mode = consense(pd.Series([snakemake.config['renewable'][tech]['potential']
-                                             for tech in renewable_carriers]))
+        aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
+
         custom_busmap = snakemake.config["enable"].get("custom_busmap", False)
         if custom_busmap:
             custom_busmap = pd.read_csv(snakemake.input.custom_busmap, index_col=0, squeeze=True)
@@ -384,12 +377,12 @@ if __name__ == "__main__":
             logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
 
         clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers,
-                                               line_length_factor, potential_mode,
+                                               line_length_factor, aggregation_strategies,
                                                snakemake.config['solving']['solver']['name'],
                                                "kmeans", hvac_overhead_cost, focus_weights)
-
-    update_p_nom_max(n)
     
+    update_p_nom_max(clustering.network)
+
     clustering.network.export_to_netcdf(snakemake.output.network)
     for attr in ('busmap', 'linemap'): #also available: linemap_positive, linemap_negative
         getattr(clustering, attr).to_csv(snakemake.output[attr])
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 287dfe32..7d51c511 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -13,6 +13,10 @@ Relevant Settings
 
 .. code:: yaml
 
+    clustering:
+      simplify:
+      aggregation_strategies:
+
     costs:
         USD2013_to_EUR2013:
         discountrate:
@@ -22,10 +26,6 @@ Relevant Settings
     electricity:
         max_hours:
 
-    renewables: (keys)
-        {technology}:
-            potential:
-
     lines:
         length_factor:
 
@@ -320,7 +320,7 @@ def remove_stubs(n, costs, config, output):
 
     return n, busmap
 
-def aggregate_to_substations(n, buses_i=None):
+def aggregate_to_substations(n, config, aggregation_strategies=dict(), buses_i=None):
     # can be used to aggregate a selection of buses to electrically closest neighbors
     # if no buses are given, nodes that are no substations or without offshore connection are aggregated
     
@@ -345,19 +345,29 @@ def aggregate_to_substations(n, buses_i=None):
     busmap = n.buses.index.to_series()
     busmap.loc[buses_i] = dist.idxmin(1)
 
+    # default aggregation strategies must be specified within the function, otherwise (when defaults are passed in
+    # the function's definition) they get lost in case custom values for different variables are specified in the config
+    bus_strategies = dict(country=_make_consense("Bus", "country"))
+    bus_strategies.update(aggregation_strategies.get("buses", {}))
+    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
+
+    # this snippet supports compatibility of PyPSA and PyPSA-EUR:
+    if "p_nom_max" in generator_strategies:
+        if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min
+
     clustering = get_clustering_from_busmap(n, busmap,
-                                            bus_strategies=dict(country=_make_consense("Bus", "country")),
+                                            bus_strategies=bus_strategies,
                                             aggregate_generators_weighted=True,
                                             aggregate_generators_carriers=None,
                                             aggregate_one_ports=["Load", "StorageUnit"],
                                             line_length_factor=1.0,
-                                            generator_strategies={'p_nom_max': 'sum'},
+                                            generator_strategies=generator_strategies,
                                             scale_link_capital_costs=False)
         
     return clustering.network, busmap
 
 
-def cluster(n, n_clusters, config):
+def cluster(n, n_clusters, config, aggregation_strategies=dict()):
     logger.info(f"Clustering to {n_clusters} buses")
 
     focus_weights = config.get('focus_weights', None)
@@ -365,16 +375,9 @@ def cluster(n, n_clusters, config):
     renewable_carriers = pd.Index([tech
                                     for tech in n.generators.carrier.unique()
                                     if tech.split('-', 2)[0] in config['renewable']])
-    def consense(x):
-        v = x.iat[0]
-        assert ((x == v).all() or x.isnull().all()), (
-            "The `potential` configuration option must agree for all renewable carriers, for now!"
-        )
-        return v
-    potential_mode = (consense(pd.Series([config['renewable'][tech]['potential']
-                                            for tech in renewable_carriers]))
-                        if len(renewable_carriers) > 0 else 'conservative')
-    clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode,
+
+    clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False,
+                                           aggregation_strategies=aggregation_strategies,
                                            solver_name=config['solving']['solver']['name'],
                                            focus_weights=focus_weights)
 
@@ -389,6 +392,8 @@ if __name__ == "__main__":
 
     n = pypsa.Network(snakemake.input.network)
 
+    aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
+
     n, trafo_map = simplify_network_to_380(n)
 
     Nyears = n.snapshot_weightings.objective.sum() / 8760
@@ -402,11 +407,11 @@ if __name__ == "__main__":
     busmaps = [trafo_map, simplify_links_map, stub_map]
 
     if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False):
-        n, substation_map = aggregate_to_substations(n)
+        n, substation_map = aggregate_to_substations(n, snakemake.config, aggregation_strategies)
         busmaps.append(substation_map)
 
     if snakemake.wildcards.simpl:
-        n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config)
+        n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config, aggregation_strategies)
         busmaps.append(cluster_map)
 
     # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed

From b4d8dd8ecb827c3ed450375e80fcf3717cc2aaab Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 20 Jun 2022 19:21:08 +0200
Subject: [PATCH 12/23] add changes from PR #379

---
 config.default.yaml  | 9 +++++++--
 config.tutorial.yaml | 9 +++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index 9462719b..7162d449 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -24,8 +24,13 @@ clustering:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
   aggregation_strategies:
     generators:
-      p_nom_max: "sum" # use "min" for more conservative assumptions
-      p_nom_min: "sum"
+      p_nom_max: sum # use "min" for more conservative assumptions
+      p_nom_min: sum
+      p_min_pu: mean
+      marginal_cost: mean
+      committable: any
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 snapshots:
   start: "2013-01-01"
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 4dc7a94d..f18f23f4 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -24,8 +24,13 @@ clustering:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
   aggregation_strategies:
     generators:
-      p_nom_max: "sum" # use "min" for more conservative assumptions
-      p_nom_min: "sum"
+      p_nom_max: sum # use "min" for more conservative assumptions
+      p_nom_min: sum
+      p_min_pu: mean
+      marginal_cost: mean
+      committable: any
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 snapshots:
   start: "2013-03-01"

From bdd0cc3aa125fa5a721357ebfc47418b79475716 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Tue, 21 Jun 2022 18:42:49 +0200
Subject: [PATCH 13/23] clustering strats to configurables: review suggestions

---
 scripts/cluster_network.py  | 11 +++++++----
 scripts/simplify_network.py | 19 +++++++++++--------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 0a3d768a..fd66b043 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -263,11 +263,8 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
 
     bus_strategies = dict(country=_make_consense("Bus", "country"))
     bus_strategies.update(aggregation_strategies.get("buses", {}))
-    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
 
-    # this snippet supports compatibility of PyPSA and PyPSA-EUR:
-    if "p_nom_max" in generator_strategies:
-        if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min
+    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
 
     if not isinstance(custom_busmap, pd.Series):
         busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
@@ -369,6 +366,12 @@ if __name__ == "__main__":
             )
             return v
         aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
+        aggregation_strategies = {}
+        # translate str entries of aggregation_strategies to pd.Series functions:
+        aggregation_strategies = {
+            p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()}
+            for p in aggregation_strategies.keys()
+        }
 
         custom_busmap = snakemake.config["enable"].get("custom_busmap", False)
         if custom_busmap:
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 7d51c511..52e0c815 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -320,7 +320,7 @@ def remove_stubs(n, costs, config, output):
 
     return n, busmap
 
-def aggregate_to_substations(n, config, aggregation_strategies=dict(), buses_i=None):
+def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None):
     # can be used to aggregate a selection of buses to electrically closest neighbors
     # if no buses are given, nodes that are no substations or without offshore connection are aggregated
     
@@ -345,15 +345,13 @@ def aggregate_to_substations(n, config, aggregation_strategies=dict(), buses_i=N
     busmap = n.buses.index.to_series()
     busmap.loc[buses_i] = dist.idxmin(1)
 
-    # default aggregation strategies must be specified within the function, otherwise (when defaults are passed in
-    # the function's definition) they get lost in case custom values for different variables are specified in the config
+    # default aggregation strategies must be specified within the function, otherwise (when defaults
+    # are passed in the function's definition) they get lost in case custom values for different
+    # variables are specified in the config.
     bus_strategies = dict(country=_make_consense("Bus", "country"))
     bus_strategies.update(aggregation_strategies.get("buses", {}))
-    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
 
-    # this snippet supports compatibility of PyPSA and PyPSA-EUR:
-    if "p_nom_max" in generator_strategies:
-        if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min
+    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
 
     clustering = get_clustering_from_busmap(n, busmap,
                                             bus_strategies=bus_strategies,
@@ -393,6 +391,11 @@ if __name__ == "__main__":
     n = pypsa.Network(snakemake.input.network)
 
     aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
+    # translate str entries of aggregation_strategies to pd.Series functions:
+    aggregation_strategies = {
+        p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()}
+        for p in aggregation_strategies.keys()
+    }
 
     n, trafo_map = simplify_network_to_380(n)
 
@@ -407,7 +410,7 @@ if __name__ == "__main__":
     busmaps = [trafo_map, simplify_links_map, stub_map]
 
     if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False):
-        n, substation_map = aggregate_to_substations(n, snakemake.config, aggregation_strategies)
+        n, substation_map = aggregate_to_substations(n, aggregation_strategies)
         busmaps.append(substation_map)
 
     if snakemake.wildcards.simpl:

From 8dba48c4125a5578a65f4cf96c94f33780fb36ca Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Tue, 21 Jun 2022 19:08:22 +0200
Subject: [PATCH 14/23] clustering strats to configurables: documentation and
 testing

---
 doc/configtables/clustering.csv | 5 +++++
 doc/release_notes.rst           | 1 +
 test/config.test1.yaml          | 9 +++++++++
 3 files changed, 15 insertions(+)

diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv
index 2f63f955..f178ff5c 100644
--- a/doc/configtables/clustering.csv
+++ b/doc/configtables/clustering.csv
@@ -1,3 +1,8 @@
 ,Unit,Values,Description
 simplify,,,
 -- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
+-- aggregation_strategies,,,
+-- -- generators,,,
+-- -- -- {key},str,"{key} can be any of the component of the generator (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new generator."
+-- -- buses,,,
+-- -- -- {key},str,"{key} can be any of the component of the bus (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new bus."
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 963a1175..c000a046 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -74,6 +74,7 @@ Upcoming Release
 
 * Update rasterio version to correctly calculate exclusion raster
 
+* Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent.
 
 PyPSA-Eur 0.4.0 (22th September 2021)
 =====================================
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
index a9ce1e50..e3f39ab5 100755
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@@ -21,6 +21,15 @@ countries: ['BE']
 clustering:
   simplify:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
+  aggregation_strategies:
+    generators:
+      p_nom_max: sum # use "min" for more conservative assumptions
+      p_nom_min: sum
+      p_min_pu: mean
+      marginal_cost: mean
+      committable: any
+      ramp_limit_up: max
+      ramp_limit_down: max
 
 snapshots:
   start: "2013-03-01"

From c42d2bd97d79fde0cf47264f364e5f552b00ea48 Mon Sep 17 00:00:00 2001
From: Fabian <fab.hof@gmx.de>
Date: Thu, 23 Jun 2022 14:25:30 +0200
Subject: [PATCH 15/23] refactor save to geojson file functionality to allow
 import/export of empty geodfs

---
 doc/release_notes.rst               |  2 ++
 envs/environment.yaml               |  2 +-
 scripts/_helpers.py                 |  2 ++
 scripts/base_network.py             |  4 +++-
 scripts/build_bus_regions.py        | 20 ++++++++------------
 scripts/build_cutout.py             |  2 +-
 scripts/build_renewable_profiles.py |  6 +++++-
 scripts/build_shapes.py             | 26 ++++++++------------------
 scripts/cluster_network.py          | 20 +++++++-------------
 9 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 963a1175..39ec0ebe 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -45,6 +45,8 @@ This release is not on the ``master`` branch. It can be used with
 Upcoming Release
 ================
 
+* The workflow now supports to run a selection of countries which do not have any offshore regions assigned. Therefore the offshore technologies need to be disabled, otherwise the workflow will raise an error.
+
 * Add an efficiency factor of 88.55% to offshore wind capacity factors
   as a proxy for wake losses. More rigorous modelling is `planned <https://github.com/PyPSA/pypsa-eur/issues/153>`_
   [`#277 <https://github.com/PyPSA/pypsa-eur/pull/277>`_].
diff --git a/envs/environment.yaml b/envs/environment.yaml
index f8060de1..039b602a 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -27,7 +27,7 @@ dependencies:
   - powerplantmatching>=0.5.3
   - numpy
   - pandas
-  - geopandas
+  - geopandas>=0.11.0
   - xarray
   - netcdf4
   - networkx
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 6e47c053..d77266d8 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -4,7 +4,9 @@
 
 import pandas as pd
 from pathlib import Path
+from collections import OrderedDict
 
+REGION_COLS = ['geometry', 'name', 'x', 'y', 'country']
 
 def configure_logging(snakemake, skip_handlers=False):
     """
diff --git a/scripts/base_network.py b/scripts/base_network.py
index 50ec8e53..1d105225 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -391,7 +391,9 @@ def _set_countries_and_substations(n, config, country_shapes, offshore_shapes):
 
     countries = config['countries']
     country_shapes = gpd.read_file(country_shapes).set_index('name')['geometry']
-    offshore_shapes = gpd.read_file(offshore_shapes).set_index('name')['geometry']
+    # reindexing necessary for supporting empty geo-dataframes
+    offshore_shapes = gpd.read_file(offshore_shapes)
+    offshore_shapes = offshore_shapes.reindex(columns=['name', 'geometry']).set_index('name')['geometry']
     substation_b = buses['symbol'].str.contains('substation|converter station', case=False)
 
     def prefer_voltage(x, which):
diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py
index 8003d370..8869c9f4 100644
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@@ -42,7 +42,7 @@ Description
 """
 
 import logging
-from _helpers import configure_logging
+from _helpers import configure_logging, REGION_COLS
 
 import pypsa
 import os
@@ -55,13 +55,6 @@ from scipy.spatial import Voronoi
 logger = logging.getLogger(__name__)
 
 
-def save_to_geojson(s, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
-    schema = {**gpd.io.file.infer_schema(s), 'geometry': 'Unknown'}
-    s.to_file(fn, driver='GeoJSON', schema=schema)
-
-
 def voronoi_partition_pts(points, outline):
     """
     Compute the polygons of a voronoi partition of `points` within the
@@ -120,7 +113,8 @@ if __name__ == "__main__":
     n = pypsa.Network(snakemake.input.base_network)
 
     country_shapes = gpd.read_file(snakemake.input.country_shapes).set_index('name')['geometry']
-    offshore_shapes = gpd.read_file(snakemake.input.offshore_shapes).set_index('name')['geometry']
+    offshore_shapes = gpd.read_file(snakemake.input.offshore_shapes)
+    offshore_shapes = offshore_shapes.reindex(columns=REGION_COLS).set_index('name')['geometry']
 
     onshore_regions = []
     offshore_regions = []
@@ -151,6 +145,8 @@ if __name__ == "__main__":
         offshore_regions_c = offshore_regions_c.loc[offshore_regions_c.area > 1e-2]
         offshore_regions.append(offshore_regions_c)
 
-    save_to_geojson(pd.concat(onshore_regions, ignore_index=True), snakemake.output.regions_onshore)
-
-    save_to_geojson(pd.concat(offshore_regions, ignore_index=True), snakemake.output.regions_offshore)
+    pd.concat(onshore_regions, ignore_index=True).to_file(snakemake.output.regions_onshore)
+    if offshore_regions:
+        pd.concat(offshore_regions, ignore_index=True).to_file(snakemake.output.regions_offshore)
+    else:
+        offshore_shapes.to_frame().to_file(snakemake.output.regions_offshore)
\ No newline at end of file
diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py
index 78eafac6..5ab085a1 100644
--- a/scripts/build_cutout.py
+++ b/scripts/build_cutout.py
@@ -116,7 +116,7 @@ if __name__ == "__main__":
         # Determine the bounds from bus regions with a buffer of two grid cells
         onshore = gpd.read_file(snakemake.input.regions_onshore)
         offshore = gpd.read_file(snakemake.input.regions_offshore)
-        regions =  onshore.append(offshore)
+        regions =  pd.concat([onshore, offshore])
         d = max(cutout_params.get('dx', 0.25), cutout_params.get('dy', 0.25))*2
         cutout_params['bounds'] = regions.total_bounds + [-d, -d, d, d]
     elif {'x', 'y'}.issubset(cutout_params):
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index 37e1e9de..5db87c78 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -221,7 +221,11 @@ if __name__ == '__main__':
     client = Client(cluster, asynchronous=True)
  
     cutout = atlite.Cutout(snakemake.input['cutout'])
-    regions = gpd.read_file(snakemake.input.regions).set_index('name').rename_axis('bus')
+    regions = gpd.read_file(snakemake.input.regions)
+    assert not regions.empty, (f"List of regions in {snakemake.input.regions} is empty, please "
+                               "disable the corresponding renewable technology")
+    # do not pull up, set_index does not work if geo dataframe is empty
+    regions = regions.set_index('name').rename_axis('bus')
     buses = regions.index
 
     excluder = atlite.ExclusionContainer(crs=3035, res=100)
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 22aed1fe..09230ddc 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -129,14 +129,15 @@ def eez(country_shapes, eez, country_list):
     df['name'] = df['ISO_3digit'].map(lambda c: _get_country('alpha_2', alpha_3=c))
     s = df.set_index('name').geometry.map(lambda s: _simplify_polys(s, filterremote=False))
     s = gpd.GeoSeries({k:v for k,v in s.iteritems() if v.distance(country_shapes[k]) < 1e-3})
+    s = s.to_frame("geometry")
     s.index.name = "name"
     return s
 
 
 def country_cover(country_shapes, eez_shapes=None):
-    shapes = list(country_shapes)
+    shapes = country_shapes
     if eez_shapes is not None:
-        shapes += list(eez_shapes)
+        shapes = pd.concat([shapes, eez_shapes])
 
     europe_shape = unary_union(shapes)
     if isinstance(europe_shape, MultiPolygon):
@@ -203,16 +204,6 @@ def nuts3(country_shapes, nuts3, nuts3pop, nuts3gdp, ch_cantons, ch_popgdp):
     return df
 
 
-def save_to_geojson(df, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
-    if not isinstance(df, gpd.GeoDataFrame):
-        df = gpd.GeoDataFrame(dict(geometry=df))
-    df = df.reset_index()
-    schema = {**gpd.io.file.infer_schema(df), 'geometry': 'Unknown'}
-    df.to_file(fn, driver='GeoJSON', schema=schema)
-
-
 if __name__ == "__main__":
     if 'snakemake' not in globals():
         from _helpers import mock_snakemake
@@ -220,15 +211,14 @@ if __name__ == "__main__":
     configure_logging(snakemake)
 
     country_shapes = countries(snakemake.input.naturalearth, snakemake.config['countries'])
-    save_to_geojson(country_shapes, snakemake.output.country_shapes)
+    country_shapes.reset_index().to_file(snakemake.output.country_shapes)
 
     offshore_shapes = eez(country_shapes, snakemake.input.eez, snakemake.config['countries'])
-    save_to_geojson(offshore_shapes, snakemake.output.offshore_shapes)
+    offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)
 
-    europe_shape = country_cover(country_shapes, offshore_shapes)
-    save_to_geojson(gpd.GeoSeries(europe_shape), snakemake.output.europe_shape)
+    europe_shape = gpd.GeoDataFrame(geometry=[country_cover(country_shapes, offshore_shapes.geometry)])
+    europe_shape.reset_index().to_file(snakemake.output.europe_shape)
 
     nuts3_shapes = nuts3(country_shapes, snakemake.input.nuts3, snakemake.input.nuts3pop,
                          snakemake.input.nuts3gdp, snakemake.input.ch_cantons, snakemake.input.ch_popgdp)
-
-    save_to_geojson(nuts3_shapes, snakemake.output.nuts3_shapes)
+    nuts3_shapes.reset_index().to_file(snakemake.output.nuts3_shapes)
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 1d5608e2..7a4daaee 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -122,7 +122,7 @@ Exemplary unsolved network clustered to 37 nodes:
 """
 
 import logging
-from _helpers import configure_logging, update_p_nom_max
+from _helpers import configure_logging, update_p_nom_max, REGION_COLS
 
 import pypsa
 import os
@@ -303,24 +303,18 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
     return clustering
 
 
-def save_to_geojson(s, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
-    df = s.reset_index()
-    schema = {**gpd.io.file.infer_schema(df), 'geometry': 'Unknown'}
-    df.to_file(fn, driver='GeoJSON', schema=schema)
-
-
 def cluster_regions(busmaps, input=None, output=None):
 
     busmap = reduce(lambda x, y: x.map(y), busmaps[1:], busmaps[0])
 
     for which in ('regions_onshore', 'regions_offshore'):
-        regions = gpd.read_file(getattr(input, which)).set_index('name')
-        geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.unary_union)
-        regions_c = gpd.GeoDataFrame(dict(geometry=geom_c))
+        regions = gpd.read_file(getattr(input, which))
+        regions = regions.reindex(columns=REGION_COLS).set_index('name')
+        aggfunc = dict(x="mean", y="mean", country="first")
+        regions_c = regions.dissolve(busmap, aggfunc=aggfunc)
         regions_c.index.name = 'name'
-        save_to_geojson(regions_c, getattr(output, which))
+        regions_c = regions_c.reset_index()
+        regions_c.to_file(getattr(output, which))
 
 
 def plot_busmap_for_n_clusters(n, n_clusters, fn=None):

From c9c738e96b2240c8522f84a1941b163069c0e351 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Fri, 24 Jun 2022 20:57:53 +0200
Subject: [PATCH 16/23] clustering strats to configurables: set defaults for
 yaml-incompatible strats + include in stubaggregation

---
 config.default.yaml         |  1 +
 config.tutorial.yaml        |  1 +
 scripts/cluster_network.py  |  6 +++---
 scripts/simplify_network.py | 37 +++++++++++++++++++++++++------------
 test/config.test1.yaml      |  1 +
 5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index 7162d449..144f416e 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -31,6 +31,7 @@ clustering:
       committable: any
       ramp_limit_up: max
       ramp_limit_down: max
+      efficiency: mean
 
 snapshots:
   start: "2013-01-01"
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index f18f23f4..31ca7f99 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -31,6 +31,7 @@ clustering:
       committable: any
       ramp_limit_up: max
       ramp_limit_down: max
+      efficiency:mean
 
 snapshots:
   start: "2013-03-01"
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index fd66b043..93cd89cd 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -264,7 +264,8 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
     bus_strategies = dict(country=_make_consense("Bus", "country"))
     bus_strategies.update(aggregation_strategies.get("buses", {}))
 
-    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
+    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
+    generator_strategies.update(aggregation_strategies.get("generators", {}))
 
     if not isinstance(custom_busmap, pd.Series):
         busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
@@ -366,7 +367,6 @@ if __name__ == "__main__":
             )
             return v
         aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {})
-        aggregation_strategies = {}
         # translate str entries of aggregation_strategies to pd.Series functions:
         aggregation_strategies = {
             p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()}
@@ -383,7 +383,7 @@ if __name__ == "__main__":
                                                line_length_factor, aggregation_strategies,
                                                snakemake.config['solving']['solver']['name'],
                                                "kmeans", hvac_overhead_cost, focus_weights)
-    
+
     update_p_nom_max(clustering.network)
 
     clustering.network.export_to_netcdf(snakemake.output.network)
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 52e0c815..0fda5c77 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -189,7 +189,10 @@ def _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, out
             
 
 
-def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, aggregate_one_ports={"Load", "StorageUnit"}):
+def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
+                                   aggregate_one_ports={"Load", "StorageUnit"},
+                                   aggregation_strategies=dict()):
+
     def replace_components(n, c, df, pnl):
         n.mremove(c, n.df(c).index)
 
@@ -200,7 +203,12 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a
 
     _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, output)
 
-    generators, generators_pnl = aggregategenerators(n, busmap, custom_strategies={'p_nom_min': np.sum})
+    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
+    generator_strategies.update(aggregation_strategies.get("generators", {}))
+
+    generators, generators_pnl = aggregategenerators(
+        n, busmap, custom_strategies=generator_strategies
+    )
     replace_components(n, "Generator", generators, generators_pnl)
 
     for one_port in aggregate_one_ports:
@@ -214,7 +222,7 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a
         n.mremove(c, df.index[df.bus0.isin(buses_to_del) | df.bus1.isin(buses_to_del)])
 
 
-def simplify_links(n, costs, config, output):
+def simplify_links(n, costs, config, output, aggregation_strategies=dict()):
     ## Complex multi-node links are folded into end-points
     logger.info("Simplifying connected link components")
 
@@ -306,17 +314,19 @@ def simplify_links(n, costs, config, output):
 
     logger.debug("Collecting all components using the busmap")
 
-    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output)
+    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
+                                   aggregation_strategies=aggregation_strategies)
     return n, busmap
 
-def remove_stubs(n, costs, config, output):
+def remove_stubs(n, costs, config, output, aggregation_strategies=dict()):
     logger.info("Removing stubs")
 
     busmap = busmap_by_stubs(n) #  ['country'])
 
     connection_costs_to_bus = _compute_connection_costs_to_bus(n, busmap, costs, config)
 
-    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output)
+    _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
+                                   aggregation_strategies=aggregation_strategies)
 
     return n, busmap
 
@@ -345,13 +355,14 @@ def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None):
     busmap = n.buses.index.to_series()
     busmap.loc[buses_i] = dist.idxmin(1)
 
-    # default aggregation strategies must be specified within the function, otherwise (when defaults
-    # are passed in the function's definition) they get lost in case custom values for different
-    # variables are specified in the config.
+    # default aggregation strategies that cannot be defined in .yaml format must be specified within
+    # the function, otherwise (when defaults are passed in the function's definition) they get lost
+    # in case custom values for different variables are specified in the config.
     bus_strategies = dict(country=_make_consense("Bus", "country"))
     bus_strategies.update(aggregation_strategies.get("buses", {}))
 
-    generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"})
+    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
+    generator_strategies.update(aggregation_strategies.get("generators", {}))
 
     clustering = get_clustering_from_busmap(n, busmap,
                                             bus_strategies=bus_strategies,
@@ -403,9 +414,11 @@ if __name__ == "__main__":
 
     technology_costs = load_costs(snakemake.input.tech_costs, snakemake.config['costs'], snakemake.config['electricity'], Nyears)
 
-    n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output)
+    n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output,
+                                           aggregation_strategies)
 
-    n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output)
+    n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output,
+                               aggregation_strategies=aggregation_strategies)
 
     busmaps = [trafo_map, simplify_links_map, stub_map]
 
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
index e3f39ab5..6d626f7e 100755
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@@ -30,6 +30,7 @@ clustering:
       committable: any
       ramp_limit_up: max
       ramp_limit_down: max
+      efficiency: mean
 
 snapshots:
   start: "2013-03-01"

From 2dfa2753ba8b5de6df56d6e6f9b278aa32047d55 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 27 Jun 2022 13:59:04 +0200
Subject: [PATCH 17/23] clustering strats to configurables: config spacing

---
 config.tutorial.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 31ca7f99..edf0091c 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -31,7 +31,7 @@ clustering:
       committable: any
       ramp_limit_up: max
       ramp_limit_down: max
-      efficiency:mean
+      efficiency: mean
 
 snapshots:
   start: "2013-03-01"

From a3af137b74d6e505e2dae32fc7cea2b2226f834c Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 27 Jun 2022 14:18:47 +0200
Subject: [PATCH 18/23] clustering strats to configurables: move duplicate code
 to _helpers script & import

---
 scripts/_helpers.py         | 16 ++++++++++++++++
 scripts/cluster_network.py  |  8 ++------
 scripts/simplify_network.py | 14 +++-----------
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 6e47c053..3c3e6ac7 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -210,6 +210,22 @@ def progress_retrieve(url, file):
 
     urllib.request.urlretrieve(url, file, reporthook=dlProgress)
 
+def get_aggregation_strategies(aggregation_strategies):
+    # default aggregation strategies that cannot be defined in .yaml format must be specified within
+    # the function, otherwise (when defaults are passed in the function's definition) they get lost
+    # when custom values are specified in the config.
+
+    import numpy as np
+    from pypsa.networkclustering import _make_consense
+
+    bus_strategies = dict(country=_make_consense("Bus", "country"))
+    bus_strategies.update(aggregation_strategies.get("buses", {}))
+
+    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
+    generator_strategies.update(aggregation_strategies.get("generators", {}))
+
+    return bus_strategies, generator_strategies
+
 
 def mock_snakemake(rulename, **wildcards):
     """
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 93cd89cd..6186a00e 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -121,7 +121,7 @@ Exemplary unsolved network clustered to 37 nodes:
 """
 
 import logging
-from _helpers import configure_logging, update_p_nom_max
+from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies
 
 import pypsa
 import os
@@ -261,11 +261,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
                               line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc",
                               algorithm="kmeans", extended_link_costs=0, focus_weights=None):
 
-    bus_strategies = dict(country=_make_consense("Bus", "country"))
-    bus_strategies.update(aggregation_strategies.get("buses", {}))
-
-    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
-    generator_strategies.update(aggregation_strategies.get("generators", {}))
+    bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies)
 
     if not isinstance(custom_busmap, pd.Series):
         busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 0fda5c77..37e7e698 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -83,7 +83,7 @@ The rule :mod:`simplify_network` does up to four things:
 """
 
 import logging
-from _helpers import configure_logging, update_p_nom_max
+from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies
 
 from cluster_network import clustering_for_n_clusters, cluster_regions
 from add_electricity import load_costs
@@ -203,8 +203,7 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output,
 
     _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, output)
 
-    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
-    generator_strategies.update(aggregation_strategies.get("generators", {}))
+    _, generator_strategies = get_aggregation_strategies(aggregation_strategies)
 
     generators, generators_pnl = aggregategenerators(
         n, busmap, custom_strategies=generator_strategies
@@ -355,14 +354,7 @@ def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None):
     busmap = n.buses.index.to_series()
     busmap.loc[buses_i] = dist.idxmin(1)
 
-    # default aggregation strategies that cannot be defined in .yaml format must be specified within
-    # the function, otherwise (when defaults are passed in the function's definition) they get lost
-    # in case custom values for different variables are specified in the config.
-    bus_strategies = dict(country=_make_consense("Bus", "country"))
-    bus_strategies.update(aggregation_strategies.get("buses", {}))
-
-    generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf}
-    generator_strategies.update(aggregation_strategies.get("generators", {}))
+    bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies)
 
     clustering = get_clustering_from_busmap(n, busmap,
                                             bus_strategies=bus_strategies,

From b56d1f6f4d445536708779f3c886ddfec647aabf Mon Sep 17 00:00:00 2001
From: Fabian <fab.hof@gmx.de>
Date: Mon, 27 Jun 2022 17:35:19 +0200
Subject: [PATCH 19/23] conventional config section: update to more general
 attribute assignment scheme

---
 Snakefile                                     |  5 ++--
 config.default.yaml                           |  2 +-
 ...{nuclear_eafs.csv => nuclear_p_max_pu.csv} |  0
 doc/configuration.rst                         | 13 ++++++++++-
 doc/release_notes.rst                         |  6 ++---
 scripts/add_electricity.py                    | 23 +++++++++++--------
 6 files changed, 32 insertions(+), 17 deletions(-)
 rename data/{nuclear_eafs.csv => nuclear_p_max_pu.csv} (100%)

diff --git a/Snakefile b/Snakefile
index af2e6e90..71c6bc21 100644
--- a/Snakefile
+++ b/Snakefile
@@ -174,7 +174,7 @@ rule build_renewable_profiles:
     input:
         base_network="networks/base.nc",
         corine="data/bundle/corine/g250_clc06_V18_5.tif",
-        natura=lambda w: ("data/Natura2000_end2020.gpkg"
+        natura=lambda w: ("resources/natura.tiff"
                           if config["renewable"][w.technology]["natura"]
                           else []),
         gebco=lambda w: ("data/bundle/GEBCO_2014_2D.nc"
@@ -217,7 +217,8 @@ rule add_electricity:
         load='resources/load.csv',
         nuts3_shapes='resources/nuts3_shapes.geojson',
         **{f"profile_{tech}": f"resources/profile_{tech}.nc"
-           for tech in config['renewable']}
+           for tech in config['renewable']},
+        **{"conventional_{carrier}_{attrs}": fn for carrier in config.get('conventional', {None: {}}).values() for fn in carrier.values() if str(fn).startswith("data/")}, 
     output: "networks/elec.nc"
     log: "logs/add_electricity.log"
     benchmark: "benchmarks/add_electricity"
diff --git a/config.default.yaml b/config.default.yaml
index 177c5e74..a67562c6 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -187,7 +187,7 @@ renewable:
 
 conventional: 
   nuclear:
-    energy_availability_factors: "data/nuclear-eafs.csv" # float of file name
+    p_max_pu: "data/nuclear_p_max_pu.csv" # float of file name
 
 lines:
   types:
diff --git a/data/nuclear_eafs.csv b/data/nuclear_p_max_pu.csv
similarity index 100%
rename from data/nuclear_eafs.csv
rename to data/nuclear_p_max_pu.csv
diff --git a/doc/configuration.rst b/doc/configuration.rst
index 67d25228..c332ea7d 100644
--- a/doc/configuration.rst
+++ b/doc/configuration.rst
@@ -171,7 +171,7 @@ Define and specify the ``atlite.Cutout`` used for calculating renewable potentia
 .. literalinclude:: ../config.default.yaml
    :language: yaml
    :start-at:   hydro:
-   :end-before: lines:
+   :end-before: conventional:
 
 .. csv-table::
    :header-rows: 1
@@ -180,6 +180,17 @@ Define and specify the ``atlite.Cutout`` used for calculating renewable potentia
 
 .. _lines_cf:
 
+``conventional``
+=============
+
+Define additional generator attribute for conventional carrier types. If a scalar value is given it is applied to all generators. However if a string starting with "data/" is given, the value is interpreted as a path to a csv file with country specific values. Then, the values are read in and applied to all generators of the given carrier in the given country. Note that the value(s) overwrite the existing values in the corresponding section of the ``generators`` dataframe.   
+
+.. literalinclude:: ../config.default.yaml
+   :language: yaml
+   :start-at:   conventional:
+   :end-before: lines:
+
+
 ``lines``
 =============
 
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 57c0a2f0..3addf3ab 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -53,11 +53,11 @@ Upcoming Release
 
 * Add function to add global constraint on use of gas in :mod:`prepare_network`. This can be activated by including the keyword ``CH4L`` in the ``{opts}`` wildcard which enforces the limit set in ``electricity: gaslimit:`` given in MWh thermal. Alternatively, it is possible to append a number in the `{opts}` wildcard, e.g. `CH4L200` which limits the gas use to 200 TWh thermal.
 
-* Add configuration option to implement Energy Availability Factors (EAFs) for conventional generation technologies.	
-
 * A new section ``conventional`` was added to the config file. This section contains configurations for conventional carriers.  
 
-* Implement country-specific EAFs for nuclear power plants based on IAEA 2018-2020 reported country averages. These are specified ``data/nuclear_eafs.csv`` and translate to static ``p_max_pu`` values.
+* Add configuration option to implement arbitrary generator attributes for conventional generation technologies.	
+
+* Implement country-specific  Energy Availability Factors (EAFs) for nuclear power plants based on IAEA 2018-2020 reported country averages. These are specified ``data/nuclear_p_max_pu.csv`` and translate to static ``p_max_pu`` values.
 
 * The powerplants that have been shut down before 2021 are filtered out. 	
 
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index ea95fd94..88c2ce66 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -331,17 +331,20 @@ def attach_conventional_generators(n, costs, ppl, conventional_carriers, extenda
         
         # Generators with technology affected
         idx = n.generators.query("carrier == @carrier").index
-        factors = conventional_config[carrier].get("energy_availability_factors")
 
-        if isinstance(factors, float):
-            # Single value affecting all generators of technology k indiscriminantely of country
-            n.generators.loc[idx, "p_max_pu"] = factors
-        elif isinstance(factors, str):
-            factors = pd.read_csv(factors, index_col=0)['factor']
-            # Values affecting generators of technology k country-specific
-            # First map generator buses to countries; then map countries to p_max_pu
-            bus_factors = n.buses.country.map(factors)
-            n.generators.p_max_pu.update(n.generators.loc[idx].bus.map(bus_factors).dropna())
+        for key in list(set(conventional_carriers[carrier]) & set(n.generators)):
+
+            values = conventional_config[carrier][key]
+
+            if isinstance(values, str) and str(values).startswith("data/"):
+                # Values affecting generators of technology k country-specific
+                # First map generator buses to countries; then map countries to p_max_pu
+                values = pd.read_csv(values, index_col=0).iloc[:, 0]
+                bus_values = n.buses.country.map(values)
+                n.generators[key].update(n.generators.loc[idx].bus.map(bus_values).dropna())
+            else:
+                # Single value affecting all generators of technology k indiscriminantely of country
+                n.generators.loc[idx, key] = values
 
 
 

From 51de606aab6eddcba8d121873f779c4ce995afa8 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Mon, 27 Jun 2022 19:00:41 +0200
Subject: [PATCH 20/23] Revert "remove build/retrieve natura raster, directly
 use shapefile"

---
 .github/workflows/ci.yaml                    | 23 ++---
 Snakefile                                    | 27 ++++--
 config.default.yaml                          |  2 +
 config.tutorial.yaml                         | 10 ++-
 doc/configtables/toplevel.csv                |  2 +
 doc/preparation.rst                          |  2 +
 doc/preparation/build_natura_raster.rst      | 39 +++++++++
 doc/preparation/build_renewable_profiles.rst |  3 +
 doc/preparation/retrieve.rst                 | 27 ++++++
 doc/release_notes.rst                        |  5 --
 doc/tutorial.rst                             |  4 +-
 envs/environment.yaml                        |  1 -
 scripts/build_natura_raster.py               | 89 ++++++++++++++++++++
 scripts/build_renewable_profiles.py          |  4 +-
 test/config.test1.yaml                       |  2 +
 15 files changed, 200 insertions(+), 40 deletions(-)
 create mode 100644 doc/preparation/build_natura_raster.rst
 create mode 100644 scripts/build_natura_raster.py

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index d447a56f..c753deab 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -13,12 +13,13 @@ on:
     branches:
     - master
   pull_request:
+    branches:
+    - master
   schedule:
     - cron: "0 5 * * TUE"
 
 env:
-  CONDA_CACHE_NUMBER: 1 # Change this value to manually reset the environment cache
-  DATA_CACHE_NUMBER: 1
+  CACHE_NUMBER: 1 # Change this value to manually reset the environment cache
 
 jobs:
   build:
@@ -65,26 +66,16 @@ jobs:
             miniforge-version: latest
             activate-environment: pypsa-eur
             use-mamba: true
-
-      - name: Set cache dates
-        run: |
-          echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
-          echo "WEEK=$(date +'%Y%U')" >> $GITHUB_ENV
-
-      - name: Cache data and cutouts folders
-        uses: actions/cache@v3
-        with:
-          path: |
-            data
-            cutouts
-          key: data-cutouts-${{ env.WEEK }}-${{ env.DATA_CACHE_NUMBER }}
+            
+      - name: Set cache date
+        run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
 
       - name: Create environment cache
         uses: actions/cache@v2
         id: cache
         with:
           path: ${{ matrix.prefix }}
-          key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CONDA_CACHE_NUMBER }}
+          key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
 
       - name: Update environment due to outdated or unavailable cache
         run: mamba env update -n pypsa-eur -f envs/environment.yaml
diff --git a/Snakefile b/Snakefile
index 42ecc45b..7678a401 100644
--- a/Snakefile
+++ b/Snakefile
@@ -67,12 +67,6 @@ if config['enable'].get('retrieve_databundle', True):
         script: 'scripts/retrieve_databundle.py'
 
 
-rule retrieve_natura_data:
-    input: HTTP.remote("sdi.eea.europa.eu/datashare/s/H6QGCybMdLLnywo/download", additional_request_string="?path=%2FNatura2000_end2020_gpkg&files=Natura2000_end2020.gpkg", static=True)
-    output: "data/Natura2000_end2020.gpkg"
-    run: move(input[0], output[0])
-
-
 rule retrieve_load_data:
     input: HTTP.remote("data.open-power-system-data.org/time_series/2019-06-05/time_series_60min_singleindex.csv", keep_local=True, static=True)
     output: "data/load_raw.csv"
@@ -171,13 +165,28 @@ if config['enable'].get('retrieve_cutout', True):
         run: move(input[0], output[0])
 
 
+if config['enable'].get('build_natura_raster', False):
+    rule build_natura_raster:
+        input:
+            natura="data/bundle/natura/Natura2000_end2015.shp",
+            cutouts=expand("cutouts/{cutouts}.nc", **config['atlite'])
+        output: "resources/natura.tiff"
+        log: "logs/build_natura_raster.log"
+        script: "scripts/build_natura_raster.py"
+
+
+if config['enable'].get('retrieve_natura_raster', True):
+    rule retrieve_natura_raster:
+        input: HTTP.remote("zenodo.org/record/4706686/files/natura.tiff", keep_local=True, static=True)
+        output: "resources/natura.tiff"
+        run: move(input[0], output[0])
+
+
 rule build_renewable_profiles:
     input:
         base_network="networks/base.nc",
         corine="data/bundle/corine/g250_clc06_V18_5.tif",
-        natura=lambda w: ("data/Natura2000_end2020.gpkg"
-                          if config["renewable"][w.technology]["natura"]
-                          else []),
+        natura="resources/natura.tiff",
         gebco=lambda w: ("data/bundle/GEBCO_2014_2D.nc"
                          if "max_depth" in config["renewable"][w.technology].keys()
                          else []),
diff --git a/config.default.yaml b/config.default.yaml
index 7e6a1f78..144f416e 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -43,6 +43,8 @@ enable:
   retrieve_databundle: true
   build_cutout: false
   retrieve_cutout: true
+  build_natura_raster: false
+  retrieve_natura_raster: true
   custom_busmap: false
 
 electricity:
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index ba0bb01a..edf0091c 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -43,6 +43,8 @@ enable:
   retrieve_databundle: true
   build_cutout: false
   retrieve_cutout: true
+  build_natura_raster: false
+  retrieve_natura_raster: true
   custom_busmap: false
 
 electricity:
@@ -87,7 +89,7 @@ renewable:
                    24, 25, 26, 27, 28, 29, 31, 32]
       distance: 1000
       distance_grid_codes: [1, 2, 3, 4, 5, 6]
-    natura: false
+    natura: true
     potential: simple # or conservative
     clip_p_max_pu: 1.e-2
   offwind-ac:
@@ -98,7 +100,7 @@ renewable:
     capacity_per_sqkm: 3
     # correction_factor: 0.93
     corine: [44, 255]
-    natura: false
+    natura: true
     max_shore_distance: 30000
     potential: simple # or conservative
     clip_p_max_pu: 1.e-2
@@ -111,7 +113,7 @@ renewable:
     capacity_per_sqkm: 3
     # correction_factor: 0.93
     corine: [44, 255]
-    natura: false
+    natura: true
     min_shore_distance: 30000
     potential: simple # or conservative
     clip_p_max_pu: 1.e-2
@@ -133,7 +135,7 @@ renewable:
     # correction_factor: 0.854337
     corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
              14, 15, 16, 17, 18, 19, 20, 26, 31, 32]
-    natura: false
+    natura: true
     potential: simple # or conservative
     clip_p_max_pu: 1.e-2
 
diff --git a/doc/configtables/toplevel.csv b/doc/configtables/toplevel.csv
index 8965a0bc..b7f39d05 100644
--- a/doc/configtables/toplevel.csv
+++ b/doc/configtables/toplevel.csv
@@ -12,4 +12,6 @@ enable,,,
 -- retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zenodo via the rule :mod:`retrieve_databundle` or whether to keep a custom databundle located in the corresponding folder."
 -- build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`."
 -- retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`."
+-- build_natura_raster,bool,"{true, false}","Switch to enable the creation of the raster ``natura.tiff`` via the rule :mod:`build_natura_raster`."
+-- retrieve_natura_raster,bool,"{true, false}","Switch to enable the retrieval of ``natura.tiff`` from zenodo with :mod:`retrieve_natura_raster`."
 -- custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``."
diff --git a/doc/preparation.rst b/doc/preparation.rst
index 7f42190c..dba5e981 100644
--- a/doc/preparation.rst
+++ b/doc/preparation.rst
@@ -27,6 +27,7 @@ With these and the externally extracted ENTSO-E online map topology
 Then the process continues by calculating conventional power plant capacities, potentials, and per-unit availability time series for variable renewable energy carriers and hydro power plants with the following rules:
 
 - :mod:`build_powerplants` for today's thermal power plant capacities using `powerplantmatching <https://github.com/FRESNA/powerplantmatching>`_ allocating these to the closest substation for each powerplant,
+- :mod:`build_natura_raster` for rasterising NATURA2000 natural protection areas,
 - :mod:`build_renewable_profiles` for the hourly capacity factors and installation potentials constrained by land-use in each substation's Voronoi cell for PV, onshore and offshore wind, and
 - :mod:`build_hydro_profile` for the hourly per-unit hydro power availability time series.
 
@@ -40,6 +41,7 @@ together into a detailed PyPSA network stored in ``networks/elec.nc``.
    preparation/build_shapes
    preparation/build_load_data
    preparation/build_cutout
+   preparation/build_natura_raster
    preparation/prepare_links_p_nom
    preparation/base_network
    preparation/build_bus_regions
diff --git a/doc/preparation/build_natura_raster.rst b/doc/preparation/build_natura_raster.rst
new file mode 100644
index 00000000..e3ec4364
--- /dev/null
+++ b/doc/preparation/build_natura_raster.rst
@@ -0,0 +1,39 @@
+..
+  SPDX-FileCopyrightText: 2019-2020 The PyPSA-Eur Authors
+
+  SPDX-License-Identifier: CC-BY-4.0
+
+.. _natura:
+
+Rule ``build_natura_raster``
+===============================
+
+.. graphviz::
+    :align: center
+
+    digraph snakemake_dag {
+        graph [bgcolor=white,
+            margin=0,
+            size="8,5"
+        ];
+        node [fontname=sans,
+            fontsize=10,
+            penwidth=2,
+            shape=box,
+            style=rounded
+        ];
+        edge [color=grey,
+            penwidth=2
+        ];
+        9	 [color="0.22 0.6 0.85",
+            label=build_renewable_profiles];
+        12	 [color="0.31 0.6 0.85",
+            fillcolor=gray,
+            label=build_natura_raster,
+            style=filled];
+        12 -> 9;
+    }
+
+|
+
+.. automodule:: build_natura_raster
diff --git a/doc/preparation/build_renewable_profiles.rst b/doc/preparation/build_renewable_profiles.rst
index adc4d6ca..27e61583 100644
--- a/doc/preparation/build_renewable_profiles.rst
+++ b/doc/preparation/build_renewable_profiles.rst
@@ -41,6 +41,9 @@ Rule ``build_renewable_profiles``
         8	 [color="0.00 0.6 0.85",
             label=build_shapes];
         8 -> 9;
+        12	 [color="0.31 0.6 0.85",
+            label=build_natura_raster];
+        12 -> 9;
         13	 [color="0.56 0.6 0.85",
             label=build_cutout];
         13 -> 9;
diff --git a/doc/preparation/retrieve.rst b/doc/preparation/retrieve.rst
index 21187924..42479284 100644
--- a/doc/preparation/retrieve.rst
+++ b/doc/preparation/retrieve.rst
@@ -50,3 +50,30 @@ The :ref:`tutorial` uses a smaller cutout than required for the full model (30 M
 
 .. seealso::
     For details see :mod:`build_cutout` and read the `atlite documentation <https://atlite.readthedocs.io>`_.
+
+
+Rule ``retrieve_natura_raster``
+-------------------------------
+
+.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4706686.svg
+   :target: https://doi.org/10.5281/zenodo.4706686
+
+This rule, as a substitute for :mod:`build_natura_raster`, downloads an already rasterized version (`natura.tiff <https://zenodo.org/record/4706686/files/natura.tiff>`_) of `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas to reduce computation times. The file is placed into the ``resources`` sub-directory.
+
+**Relevant Settings**
+
+.. code:: yaml
+
+    enable:
+        build_natura_raster:
+
+.. seealso::
+    Documentation of the configuration file ``config.yaml`` at
+    :ref:`toplevel_cf`
+
+**Outputs**
+
+- ``resources/natura.tiff``: Rasterized version of `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas to reduce computation times.
+
+.. seealso::
+    For details see :mod:`build_natura_raster`.
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index df7af28e..c000a046 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -62,11 +62,6 @@ Upcoming Release
 
 * New network topology extracted from the ENTSO-E interactive map.
 
-* Remove rules to build or retrieve rasterized NATURA 2000 dataset. Renewable potential calculation now directly uses the shapefiles.
-
-* Cache data and cutouts folders. This cache will be updated weekly.
-
-* Add rule to automatically retrieve Natura2000 natural protection areas. Switch of file format to GPKG.
 * The unused argument ``simple_hvdc_costs`` in :mod:`add_electricity` was removed.
 
 * Iterative solving with impedance updates is skipped if there are no expandable lines.
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 1c247f8e..c37abb39 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -35,8 +35,8 @@ To run the tutorial, use this as your configuration file ``config.yaml``.
 
     .../pypsa-eur % cp config.tutorial.yaml config.yaml
 
-This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle`
-and :mod:`retrieve_cutout` totalling at less than 250 MB.
+This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle`,
+:mod:`retrieve_natura_raster`, :mod:`retrieve_cutout` totalling at less than 250 MB.
 The full set of data dependencies would consume 5.3 GB.
 For more information on the data dependencies of PyPSA-Eur, continue reading :ref:`data`.
 
diff --git a/envs/environment.yaml b/envs/environment.yaml
index fd6dff49..f8060de1 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -45,7 +45,6 @@ dependencies:
   # GIS dependencies:
   - cartopy
   - descartes
-  - fiona # explicit for Windows
   - rasterio<=1.2.9  # 1.2.10 creates error https://github.com/PyPSA/atlite/issues/238
 
   # PyPSA-Eur-Sec Dependencies
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
new file mode 100644
index 00000000..7fa9d544
--- /dev/null
+++ b/scripts/build_natura_raster.py
@@ -0,0 +1,89 @@
+# SPDX-FileCopyrightText: : 2017-2020 The PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: MIT
+
+"""
+Rasters the vector data of the `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas onto all cutout regions.
+
+Relevant Settings
+-----------------
+
+.. code:: yaml
+
+    renewable:
+        {technology}:
+            cutout:
+
+.. seealso::
+    Documentation of the configuration file ``config.yaml`` at
+    :ref:`renewable_cf`
+
+Inputs
+------
+
+- ``data/bundle/natura/Natura2000_end2015.shp``: `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas.
+
+    .. image:: ../img/natura.png
+        :scale: 33 %
+
+Outputs
+-------
+
+- ``resources/natura.tiff``: Rasterized version of `Natura 2000 <https://en.wikipedia.org/wiki/Natura_2000>`_ natural protection areas to reduce computation times.
+
+    .. image:: ../img/natura.png
+        :scale: 33 %
+
+Description
+-----------
+
+"""
+
+import logging
+from _helpers import configure_logging
+
+import atlite
+import geopandas as gpd
+import rasterio as rio
+from rasterio.features import geometry_mask
+from rasterio.warp import transform_bounds
+
+logger = logging.getLogger(__name__)
+
+
+def determine_cutout_xXyY(cutout_name):
+    cutout = atlite.Cutout(cutout_name)
+    assert cutout.crs.to_epsg() == 4326
+    x, X, y, Y = cutout.extent
+    dx, dy = cutout.dx, cutout.dy
+    return [x - dx/2., X + dx/2., y - dy/2., Y + dy/2.]
+
+
+def get_transform_and_shape(bounds, res):
+    left, bottom = [(b // res)* res for b in bounds[:2]]
+    right, top = [(b // res + 1) * res for b in bounds[2:]]
+    shape = int((top - bottom) // res), int((right - left) / res)
+    transform = rio.Affine(res, 0, left, 0, -res, top)
+    return transform, shape
+
+
+if __name__ == "__main__":
+    if 'snakemake' not in globals():
+        from _helpers import mock_snakemake
+        snakemake = mock_snakemake('build_natura_raster')
+    configure_logging(snakemake)
+
+    cutouts = snakemake.input.cutouts
+    xs, Xs, ys, Ys = zip(*(determine_cutout_xXyY(cutout) for cutout in cutouts))
+    bounds = transform_bounds(4326, 3035, min(xs), min(ys), max(Xs), max(Ys))
+    transform, out_shape = get_transform_and_shape(bounds, res=100)
+
+    # adjusted boundaries
+    shapes = gpd.read_file(snakemake.input.natura).to_crs(3035)
+    raster = ~geometry_mask(shapes.geometry, out_shape[::-1], transform)
+    raster = raster.astype(rio.uint8)
+
+    with rio.open(snakemake.output[0], 'w', driver='GTiff', dtype=rio.uint8,
+                  count=1, transform=transform, crs=3035, compress='lzw',
+                  width=raster.shape[1], height=raster.shape[0]) as dst:
+        dst.write(raster, indexes=1)
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index a49df1f5..37e1e9de 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -227,9 +227,7 @@ if __name__ == '__main__':
     excluder = atlite.ExclusionContainer(crs=3035, res=100)
 
     if config['natura']:
-        mask = regions.to_crs(3035).buffer(0) # buffer to avoid invalid geometry
-        natura = gpd.read_file(snakemake.input.natura, mask=mask)
-        excluder.add_geometry(natura.geometry)
+        excluder.add_raster(snakemake.input.natura, nodata=0, allow_no_overlap=True)
 
     corine = config.get("corine", {})
     if "grid_codes" in corine:
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
index 64b169a4..6d626f7e 100755
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@@ -42,6 +42,8 @@ enable:
   retrieve_databundle: true
   build_cutout: false
   retrieve_cutout: true
+  build_natura_raster: false
+  retrieve_natura_raster: true
   custom_busmap: false
 
 electricity:

From 29fe0fb7fb8ab34cde94dc01ca947cece2ed924c Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 27 Jun 2022 21:02:45 +0200
Subject: [PATCH 21/23] hierarchical clustering: account for changes from
 merging master

---
 scripts/cluster_network.py  | 1 +
 scripts/simplify_network.py | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index b1bf759d..59641a7e 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -12,6 +12,7 @@ Relevant Settings
 .. code:: yaml
 
     clustering:
+      cluster_network:
       aggregation_strategies:
 
     focus_weights:
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 7e258966..0b31e5c6 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -14,7 +14,8 @@ Relevant Settings
 .. code:: yaml
 
     clustering:
-      simplify:
+      simplify_network:
+      cluster_network:
       aggregation_strategies:
 
     costs:
@@ -415,7 +416,7 @@ if __name__ == "__main__":
     busmaps = [trafo_map, simplify_links_map, stub_map]
 
     cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {})
-    if cluster_config.get('clustering', {}).get('simplify', {}).get('to_substations', False):
+    if cluster_config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False):
         n, substation_map = aggregate_to_substations(n, aggregation_strategies)
         busmaps.append(substation_map)
 
@@ -429,7 +430,7 @@ if __name__ == "__main__":
         for carrier in carriers:
             buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus))
             logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.')
-            n, busmap_hac = aggregate_to_substations(n, buses_i)
+            n, busmap_hac = aggregate_to_substations(n, aggregation_strategies, buses_i)
             busmaps.append(busmap_hac)
 
     if snakemake.wildcards.simpl:

From f1dedd9429fe6a5633fdfeead552abfacea48e39 Mon Sep 17 00:00:00 2001
From: martacki <martha.frysztacki@kit.edu>
Date: Mon, 27 Jun 2022 21:11:49 +0200
Subject: [PATCH 22/23] hierarchical clustering: release notes & revert to old
 default clustering algorithm (kmeans)

---
 config.default.yaml    | 8 ++++----
 config.tutorial.yaml   | 8 ++++----
 doc/release_notes.rst  | 2 ++
 test/config.test1.yaml | 8 ++++----
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/config.default.yaml b/config.default.yaml
index e699c38f..e1ac6237 100755
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -22,11 +22,11 @@ countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'F
 clustering:
   simplify_network:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
-    algorithm: hac
-    feature: solar+onwind-time
-  cluster_network:
-    algorithm: hac # choose from: [hac, kmeans]
+    algorithm: kmeans # choose from: [hac, kmeans]
     feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
+  cluster_network:
+    algorithm: kmeans
+    feature: solar+onwind-time
   aggregation_strategies:
     generators:
       p_nom_max: sum # use "min" for more conservative assumptions
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 75703b8a..db20f1a4 100755
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -22,11 +22,11 @@ countries: ['BE']
 clustering:
   simplify_network:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
-    algorithm: hac
-    feature: solar+onwind-time
-  cluster_network:
-    algorithm: hac # choose from: [hac, kmeans]
+    algorithm: kmeans # choose from: [hac, kmeans]
     feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
+  cluster_network:
+    algorithm: kmeans
+    feature: solar+onwind-time
   aggregation_strategies:
     generators:
       p_nom_max: sum # use "min" for more conservative assumptions
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index c000a046..47a67970 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -76,6 +76,8 @@ Upcoming Release
 
 * Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent.
 
+* Hierarchical clustering was introduced. Distance metric is calculated from renewable potentials on hourly (feature entry ends with `-time`) or annual (feature entry in config end with `-cap`) values.
+
 PyPSA-Eur 0.4.0 (22th September 2021)
 =====================================
 
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
index d8d095b8..5aecfdf2 100755
--- a/test/config.test1.yaml
+++ b/test/config.test1.yaml
@@ -21,11 +21,11 @@ countries: ['BE']
 clustering:
   simplify_network:
     to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
-    algorithm: hac
-    feature: solar+onwind-time
-  cluster_network:
-    algorithm: hac # choose from: [hac, kmeans]
+    algorithm: kmeans # choose from: [hac, kmeans]
     feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
+  cluster_network:
+    algorithm: kmeans
+    feature: solar+onwind-time
   aggregation_strategies:
     generators:
       p_nom_max: sum # use "min" for more conservative assumptions

From 3294ad92ebafaa5ace5b3fcbdf2f3d29efe2537f Mon Sep 17 00:00:00 2001
From: Fabian Hofmann <fab.hof@gmx.de>
Date: Tue, 28 Jun 2022 13:14:47 +0200
Subject: [PATCH 23/23] Update scripts/solve_network.py

Co-authored-by: Martha Frysztacki <martha.frysztacki@kit.edu>
---
 scripts/solve_network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index 6c40ca4f..b3280a94 100755
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -104,7 +104,7 @@ def prepare_network(n, solve_opts):
     if load_shedding:
         n.add("Carrier", "load", color="#dd2e23", nice_name="Load shedding")
         buses_i = n.buses.query("carrier == 'AC'").index
-        if not np.isscalar(load_shedding): load_shedding = 1e2
+        if not np.isscalar(load_shedding): load_shedding = 1e2 # Eur/kWh
         # intersect between macroeconomic and surveybased
         # willingness to pay
         # http://journal.frontiersin.org/article/10.3389/fenrg.2015.00055/full)