From 299e71e2b3eacf53f796db3c9e965034bbb25d16 Mon Sep 17 00:00:00 2001 From: martacki Date: Wed, 17 Nov 2021 13:46:33 +0100 Subject: [PATCH 01/20] introduce hierarchical agglomeratice clustering (hac) --- config.default.yaml | 9 +++-- config.tutorial.yaml | 9 +++-- doc/configtables/clustering.csv | 9 +++-- envs/environment.yaml | 2 +- scripts/cluster_network.py | 63 +++++++++++++++++++++++++++++---- scripts/simplify_network.py | 10 ++++-- test/config.test1.yaml | 9 +++-- 7 files changed, 93 insertions(+), 18 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index f70e7c2c..631645b9 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -20,8 +20,13 @@ scenario: countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] clustering: - simplify: - to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + simplify_network: + to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: hac + feature: solar+onwind-time + cluster_network: + algorithm: hac # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. snapshots: start: "2013-01-01" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 26ead242..919da193 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -20,8 +20,13 @@ scenario: countries: ['DE'] clustering: - simplify: - to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + simplify_network: + to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: hac + feature: solar+onwind-time + cluster_network: + algorithm: hac # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. snapshots: start: "2013-03-01" diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv index 2f63f955..e488dd39 100644 --- a/doc/configtables/clustering.csv +++ b/doc/configtables/clustering.csv @@ -1,3 +1,8 @@ ,Unit,Values,Description -simplify,,, --- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" +simplify_network,,, +-- to_substations,bool,"One of {'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" +-- algorithm,str,"One of {‘kmenas’, ‘hac’}", +-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", +cluster_network,,, +-- algorithm,str,"One of {‘kmenas’, ‘hac’}", +-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", diff --git a/envs/environment.yaml b/envs/environment.yaml index 29d743ac..5f6a8310 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -12,7 +12,6 @@ dependencies: - pip - mamba # esp for windows build - - pypsa>=0.18 - atlite>=0.2.5 - dask<=2021.3.1 # until https://github.com/dask/dask/issues/7583 is solved @@ -56,5 +55,6 @@ dependencies: - tabula-py - pip: + - git+https://github.com/pypsa/pypsa.git#egg=pypsa - vresutils==0.3.1 - tsam>=1.1.0 diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 980b73b0..7aae3ae6 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -138,7 +138,7 @@ import seaborn as sns from functools import reduce from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering, - _make_consense, get_clustering_from_busmap) + busmap_by_hac, _make_consense, get_clustering_from_busmap) from add_electricity import load_costs @@ -170,6 +170,45 @@ def weighting_for_country(n, x): return (w * (100. / w.max())).clip(lower=1.).astype(int) +def get_feature_for_hac(n, buses_i, feature=None): #buses_i = n.buses.index + + if feature is None: + feature = "solar+onwind-time" + + carriers = feature.split('-')[0].split('+') + if "offwind" in carriers: + carriers.remove("offwind") + carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique()) + + if feature.split('-')[1] == 'cap': + feature_data = pd.DataFrame(index=buses_i, columns=carriers) + for carrier in carriers: + try: + feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() + .rename(index=lambda x: x.split(' ')[0])) + except: + feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() + .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])) + + if feature.split('-')[1] == 'time': + feature_data = pd.DataFrame(columns=buses_i) + for carrier in carriers: + try: + # without simpl wildcard (bus names are "X X"): + feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) + .rename(columns=lambda x: x.split(' ')[0]))[buses_i] + except: + # with simpl wildcard (bus names are "X X X"): + feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) + .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i] + feature_data = feature_data.T + feature_data.columns = feature_data.columns.astype(str) # Timestamp will raise error in sklearn>=v1.2 + + feature_data = feature_data.fillna(0) + + return feature_data + + def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): """Determine the number of clusters per country""" @@ -221,12 +260,18 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): return pd.Series(m.n.get_values(), index=L.index).astype(int) -def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds): +def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds): if algorithm == "kmeans": algorithm_kwds.setdefault('n_init', 1000) algorithm_kwds.setdefault('max_iter', 30000) algorithm_kwds.setdefault('tol', 1e-6) + if algorithm == "hac": + feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature) + elif feature is not None: + logger.info(f"keyword argument feature is only valid for algorithm 'hac'." + f"given feature {feature} will be ignored.") + n.determine_network_topology() n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name) @@ -250,8 +295,10 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) elif algorithm == "louvain": return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) + elif algorithm == "hac": + return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index]) else: - raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.") + raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.") return (n.buses.groupby(['country', 'sub_network'], group_keys=False) .apply(busmap_for_country).squeeze().rename('busmap')) @@ -259,7 +306,9 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None, line_length_factor=1.25, potential_mode='simple', solver_name="cbc", - algorithm="kmeans", extended_link_costs=0, focus_weights=None): + algorithm="kmeans", feature=None, extended_link_costs=0, focus_weights=None): + + logger.info(f"Clustering network using algorithm {algorithm} and feature {feature}...") if potential_mode == 'simple': p_nom_max_strategy = np.sum @@ -273,7 +322,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr busmap.index = busmap.index.astype(str) logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}") else: - busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) + busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature) clustering = get_clustering_from_busmap( n, busmap, @@ -313,7 +362,7 @@ def cluster_regions(busmaps, input=None, output=None): for which in ('regions_onshore', 'regions_offshore'): regions = gpd.read_file(getattr(input, which)).set_index('name') - geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.cascaded_union) + geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.unary_union) regions_c = gpd.GeoDataFrame(dict(geometry=geom_c)) regions_c.index.name = 'name' save_to_geojson(regions_c, getattr(output, which)) @@ -377,6 +426,8 @@ if __name__ == "__main__": line_length_factor=line_length_factor, potential_mode=potential_mode, solver_name=snakemake.config['solving']['solver']['name'], + algorithm=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('algorithm', 'kmeans'), + feature=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('feature', None), extended_link_costs=hvac_overhead_cost, focus_weights=focus_weights) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 85bc4d15..8a93952c 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -360,7 +360,7 @@ def aggregate_to_substations(n, buses_i=None): return clustering.network, busmap -def cluster(n, n_clusters): +def cluster(n, n_clusters, algorithm="kmeans", feature=None): logger.info(f"Clustering to {n_clusters} buses") focus_weights = snakemake.config.get('focus_weights', None) @@ -377,8 +377,10 @@ def cluster(n, n_clusters): potential_mode = (consense(pd.Series([snakemake.config['renewable'][tech]['potential'] for tech in renewable_carriers])) if len(renewable_carriers) > 0 else 'conservative') + clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode, solver_name=snakemake.config['solving']['solver']['name'], + algorithm=algorithm, feature=feature, focus_weights=focus_weights) return clustering.network, clustering.busmap @@ -400,12 +402,14 @@ if __name__ == "__main__": busmaps = [trafo_map, simplify_links_map, stub_map] - if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False): + if snakemake.config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False): n, substation_map = aggregate_to_substations(n) busmaps.append(substation_map) if snakemake.wildcards.simpl: - n, cluster_map = cluster(n, int(snakemake.wildcards.simpl)) + n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), + algorithm=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('algorithm', 'hac'), + feature=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('feature', None)) busmaps.append(cluster_map) # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed diff --git a/test/config.test1.yaml b/test/config.test1.yaml index 2986037b..0c34ea13 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -19,8 +19,13 @@ scenario: countries: ['DE'] clustering: - simplify: - to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + simplify_network: + to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: hac + feature: solar+onwind-time + cluster_network: + algorithm: hac # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. snapshots: start: "2013-03-01" From 6c4ea69e9563c257961e10f3e8d37b85d0cff7cf Mon Sep 17 00:00:00 2001 From: martacki Date: Fri, 10 Dec 2021 17:53:40 +0100 Subject: [PATCH 02/20] clustering: own config for clustering settings --- scripts/cluster_network.py | 14 +++++++++----- scripts/simplify_network.py | 5 +++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 7aae3ae6..415d3820 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -170,7 +170,10 @@ def weighting_for_country(n, x): return (w * (100. / w.max())).clip(lower=1.).astype(int) -def get_feature_for_hac(n, buses_i, feature=None): #buses_i = n.buses.index +def get_feature_for_hac(n, buses_i=None, feature=None): + + if buses_i is None: + buses_i = n.buses.index if feature is None: feature = "solar+onwind-time" @@ -269,8 +272,8 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori if algorithm == "hac": feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature) elif feature is not None: - logger.info(f"keyword argument feature is only valid for algorithm 'hac'." - f"given feature {feature} will be ignored.") + logger.warning(f"Keyword argument feature is only valid for algorithm 'hac'." + f"given feature {feature} will be ignored.") n.determine_network_topology() @@ -422,12 +425,13 @@ if __name__ == "__main__": potential_mode = consense(pd.Series([snakemake.config['renewable'][tech]['potential'] for tech in renewable_carriers])) custom_busmap = snakemake.config["enable"].get("custom_busmap", False) + cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {}) clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers, line_length_factor=line_length_factor, potential_mode=potential_mode, solver_name=snakemake.config['solving']['solver']['name'], - algorithm=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('algorithm', 'kmeans'), - feature=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('feature', None), + algorithm=cluster_config.get('algorithm', 'kmeans'), + feature=cluster_config.get('feature', None), extended_link_costs=hvac_overhead_cost, focus_weights=focus_weights) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 8a93952c..9464c3f7 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -407,9 +407,10 @@ if __name__ == "__main__": busmaps.append(substation_map) if snakemake.wildcards.simpl: + cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {}) n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), - algorithm=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('algorithm', 'hac'), - feature=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('feature', None)) + algorithm=cluster_config.get('algorithm', 'hac'), + feature=cluster_config.get('feature', None)) busmaps.append(cluster_map) # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed From 256ac48b470470571d0b5751319e8f441ce39e0c Mon Sep 17 00:00:00 2001 From: martacki Date: Fri, 4 Feb 2022 16:45:00 +0100 Subject: [PATCH 03/20] resolve merging master bugs --- scripts/cluster_network.py | 13 +++++++++---- scripts/simplify_network.py | 6 +++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index d4b3b139..1c55234c 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -190,9 +190,11 @@ def get_feature_for_hac(n, buses_i=None, feature=None): feature_data = pd.DataFrame(index=buses_i, columns=carriers) for carrier in carriers: try: + # without simpl wildcard (bus names are "X X"): feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() .rename(index=lambda x: x.split(' ')[0])) except: + # with simpl wildcard (bus names are "X X X"): feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])) @@ -208,7 +210,7 @@ def get_feature_for_hac(n, buses_i=None, feature=None): feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i] feature_data = feature_data.T - feature_data.columns = feature_data.columns.astype(str) # Timestamp will raise error in sklearn>=v1.2 + feature_data.columns = feature_data.columns.astype(str) # timestamp raises error in sklearn>=v1.2 feature_data = feature_data.fillna(0) @@ -309,9 +311,9 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None, line_length_factor=1.25, potential_mode='simple', solver_name="cbc", - algorithm="kmeans", feature=None, extended_link_costs=0, focus_weights=None): + algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None): - logger.info(f"Clustering network using algorithm {algorithm} and feature {feature}...") + logger.info(f"Clustering network using algorithm ``{algorithm}`` and feature ``{feature}``...") if potential_mode == 'simple': p_nom_max_strategy = np.sum @@ -424,10 +426,13 @@ if __name__ == "__main__": custom_busmap.index = custom_busmap.index.astype(str) logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}") + cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {}) clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers, line_length_factor, potential_mode, snakemake.config['solving']['solver']['name'], - "kmeans", hvac_overhead_cost, focus_weights) + cluster_config.get("algorithm", "hac"), + cluster_config.get("feature", "solar+onwind-time"), + hvac_overhead_cost, focus_weights) update_p_nom_max(n) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 0e9dd385..23facc79 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -353,11 +353,10 @@ def aggregate_to_substations(n, buses_i=None): line_length_factor=1.0, generator_strategies={'p_nom_max': 'sum'}, scale_link_capital_costs=False) - return clustering.network, busmap -def cluster(n, n_clusters, config, algorithm="kmeans", feature=None): +def cluster(n, n_clusters, config, algorithm="hac", feature=None): logger.info(f"Clustering to {n_clusters} buses") focus_weights = config.get('focus_weights', None) @@ -403,7 +402,8 @@ if __name__ == "__main__": busmaps = [trafo_map, simplify_links_map, stub_map] - if snakemake.config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False): + cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {}) + if cluster_config.get('to_substations', False): n, substation_map = aggregate_to_substations(n) busmaps.append(substation_map) From b5dbf4eb3240222e3a3145bbf87b3d66a63d7169 Mon Sep 17 00:00:00 2001 From: martacki Date: Fri, 4 Feb 2022 17:19:23 +0100 Subject: [PATCH 04/20] overwrite country of isolated buses --- scripts/cluster_network.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 1c55234c..bd49556f 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -271,11 +271,36 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori algorithm_kwds.setdefault('max_iter', 30000) algorithm_kwds.setdefault('tol', 1e-6) + def fix_country_assignment_for_hac(n): + # overwrite country of nodes that are disconnected from their country-topology + for country in n.buses.country.unique(): + m = n.copy() + m.buses = m.buses.query("country in @country") + m.lines = m.lines.query("bus0 in @m.buses.index and bus1 in @m.buses.index") + m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index") + + _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False) + component = pd.Series(labels, index=m.buses.index) + component_sizes = component.value_counts() + + if len(component_sizes)>1: + disconnected_bus = component[component==component_sizes[component_sizes==component_sizes.min()].index[0]].index + neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']] + new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0] + + logger.info(f"overwriting country ``{country}`` of bus ``{disconnected_bus}`` to new country ``{new_country}``, " + "because it is disconnected from its inital inter-country transmission grid.") + n.buses.at[disconnected_bus, "country"] = new_country + return n + if algorithm == "hac": + from scipy.sparse import csgraph + feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature) + n = fix_country_assignment_for_hac(n) elif feature is not None: logger.warning(f"Keyword argument feature is only valid for algorithm 'hac'." - f"given feature {feature} will be ignored.") + f"given feature ``{feature}`` will be ignored.") n.determine_network_topology() From 82a0338e9f3f8b68eb7d73aad0ceb3b26862d028 Mon Sep 17 00:00:00 2001 From: martacki Date: Fri, 4 Feb 2022 20:27:18 +0100 Subject: [PATCH 05/20] treatment of outliers and small feature-bugfix --- scripts/cluster_network.py | 17 +++++++++-------- scripts/simplify_network.py | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index bd49556f..2cc406eb 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -272,6 +272,8 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori algorithm_kwds.setdefault('tol', 1e-6) def fix_country_assignment_for_hac(n): + from scipy.sparse import csgraph + # overwrite country of nodes that are disconnected from their country-topology for country in n.buses.country.unique(): m = n.copy() @@ -288,19 +290,18 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']] new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0] - logger.info(f"overwriting country ``{country}`` of bus ``{disconnected_bus}`` to new country ``{new_country}``, " + logger.info(f"overwriting country `{country}` of bus `{disconnected_bus}` to new country `{new_country}`, " "because it is disconnected from its inital inter-country transmission grid.") n.buses.at[disconnected_bus, "country"] = new_country return n if algorithm == "hac": - from scipy.sparse import csgraph - feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature) n = fix_country_assignment_for_hac(n) - elif feature is not None: - logger.warning(f"Keyword argument feature is only valid for algorithm 'hac'." - f"given feature ``{feature}`` will be ignored.") + + if (algorithm != "hac") and (feature is not None): + logger.warning(f"Keyword argument feature is only valid for algorithm `hac`. " + f"Given feature `{feature}` will be ignored.") n.determine_network_topology() @@ -338,7 +339,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr line_length_factor=1.25, potential_mode='simple', solver_name="cbc", algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None): - logger.info(f"Clustering network using algorithm ``{algorithm}`` and feature ``{feature}``...") + logger.info(f"Clustering network using algorithm `{algorithm}` and feature `{feature}`...") if potential_mode == 'simple': p_nom_max_strategy = np.sum @@ -348,7 +349,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr raise AttributeError(f"potential_mode should be one of 'simple' or 'conservative' but is '{potential_mode}'") if not custom_busmap: - busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) + busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature) else: busmap = custom_busmap diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 23facc79..b28bc4dc 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -407,7 +407,21 @@ if __name__ == "__main__": n, substation_map = aggregate_to_substations(n) busmaps.append(substation_map) + # treatment of outliers (nodes without a profile for considered carrier) for "cluster_network" + if snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac": + carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+') + buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus)) + n, busmap_hac = aggregate_to_substations(n, buses_i) + busmaps.append(busmap_hac) + if snakemake.wildcards.simpl: + # treatment of outliers (nodes without a profile for a considered carrier) for "simplify" + if cluster_config.get("algorithm", "hac") == "hac": + carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+') + buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus)) + n, busmap_hac = aggregate_to_substations(n, buses_i) + busmaps.append(busmap_hac) + # conduct clustering n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config, algorithm=cluster_config.get('algorithm', 'hac'), feature=cluster_config.get('feature', None)) From f02d5fe82161f7373423c8ff6571a4319ab8c164 Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 7 Mar 2022 11:43:04 +0100 Subject: [PATCH 06/20] fix clustering setup for hac according to fneum suggestions --- doc/configtables/clustering.csv | 4 ++-- envs/environment.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv index e488dd39..d14ab395 100644 --- a/doc/configtables/clustering.csv +++ b/doc/configtables/clustering.csv @@ -1,8 +1,8 @@ ,Unit,Values,Description simplify_network,,, -- to_substations,bool,"One of {'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" --- algorithm,str,"One of {‘kmenas’, ‘hac’}", +-- algorithm,str,"One of {‘kmeans’, ‘hac’}", -- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", cluster_network,,, --- algorithm,str,"One of {‘kmenas’, ‘hac’}", +-- algorithm,str,"One of {‘kmeans’, ‘hac’}", -- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", diff --git a/envs/environment.yaml b/envs/environment.yaml index 1d8d9d28..c64b5fbb 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -10,6 +10,7 @@ dependencies: - python>=3.8 - pip + - pypsa>=0.19.1 - atlite>=0.2.5 - dask @@ -53,6 +54,5 @@ dependencies: - tabula-py - pip: - - git+https://github.com/pypsa/pypsa.git#egg=pypsa - vresutils>=0.3.1 - tsam>=1.1.0 From 68f332f5cc8603ceb890fb41f01cfc02bbebb10e Mon Sep 17 00:00:00 2001 From: martacki Date: Thu, 17 Mar 2022 15:42:04 +0100 Subject: [PATCH 07/20] suggestions by coroa and fneum --- scripts/cluster_network.py | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index f4058e79..003442ff 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -189,28 +189,20 @@ def get_feature_for_hac(n, buses_i=None, feature=None): if feature.split('-')[1] == 'cap': feature_data = pd.DataFrame(index=buses_i, columns=carriers) for carrier in carriers: - try: - # without simpl wildcard (bus names are "X X"): - feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() - .rename(index=lambda x: x.split(' ')[0])) - except: - # with simpl wildcard (bus names are "X X X"): - feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() - .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])) + gen_i = n.generators.query("carrier == @carrier").index + attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus) + feature_data[carrier] = attach if feature.split('-')[1] == 'time': feature_data = pd.DataFrame(columns=buses_i) for carrier in carriers: - try: - # without simpl wildcard (bus names are "X X"): - feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) - .rename(columns=lambda x: x.split(' ')[0]))[buses_i] - except: - # with simpl wildcard (bus names are "X X X"): - feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) - .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i] + gen_i = n.generators.query("carrier == @carrier").index + attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus) + feature_data = pd.concat([feature_data, attach], axis=0)[buses_i] + feature_data = feature_data.T - feature_data.columns = feature_data.columns.astype(str) # timestamp raises error in sklearn>=v1.2 + # timestamp raises error in sklearn >= v1.2: + feature_data.columns = feature_data.columns.astype(str) feature_data = feature_data.fillna(0) @@ -283,16 +275,24 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index") _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False) + component = pd.Series(labels, index=m.buses.index) component_sizes = component.value_counts() if len(component_sizes)>1: - disconnected_bus = component[component==component_sizes[component_sizes==component_sizes.min()].index[0]].index - neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']] + disconnected_bus = component[component==component_sizes.index[-1]].index[0] + + neighbor_bus = ( + n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus") + .iloc[0][['bus0', 'bus1']] + ) new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0] - logger.info(f"overwriting country `{country}` of bus `{disconnected_bus}` to new country `{new_country}`, " - "because it is disconnected from its inital inter-country transmission grid.") + logger.info( + f"overwriting country `{country}` of bus `{disconnected_bus}` " + f"to new country `{new_country}`, because it is disconnected " + "from its inital inter-country transmission grid." + ) n.buses.at[disconnected_bus, "country"] = new_country return n From 4c1c5e3a4e2a527792cb9a3dcc0b4d5c08d45f29 Mon Sep 17 00:00:00 2001 From: martacki Date: Thu, 17 Mar 2022 17:38:30 +0100 Subject: [PATCH 08/20] suggestions by coroa and fneum --- scripts/cluster_network.py | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index f4058e79..003442ff 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -189,28 +189,20 @@ def get_feature_for_hac(n, buses_i=None, feature=None): if feature.split('-')[1] == 'cap': feature_data = pd.DataFrame(index=buses_i, columns=carriers) for carrier in carriers: - try: - # without simpl wildcard (bus names are "X X"): - feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() - .rename(index=lambda x: x.split(' ')[0])) - except: - # with simpl wildcard (bus names are "X X X"): - feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() - .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])) + gen_i = n.generators.query("carrier == @carrier").index + attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus) + feature_data[carrier] = attach if feature.split('-')[1] == 'time': feature_data = pd.DataFrame(columns=buses_i) for carrier in carriers: - try: - # without simpl wildcard (bus names are "X X"): - feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) - .rename(columns=lambda x: x.split(' ')[0]))[buses_i] - except: - # with simpl wildcard (bus names are "X X X"): - feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) - .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i] + gen_i = n.generators.query("carrier == @carrier").index + attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus) + feature_data = pd.concat([feature_data, attach], axis=0)[buses_i] + feature_data = feature_data.T - feature_data.columns = feature_data.columns.astype(str) # timestamp raises error in sklearn>=v1.2 + # timestamp raises error in sklearn >= v1.2: + feature_data.columns = feature_data.columns.astype(str) feature_data = feature_data.fillna(0) @@ -283,16 +275,24 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index") _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False) + component = pd.Series(labels, index=m.buses.index) component_sizes = component.value_counts() if len(component_sizes)>1: - disconnected_bus = component[component==component_sizes[component_sizes==component_sizes.min()].index[0]].index - neighbor_bus = n.lines.query("bus0 in @disconnected_bus or bus1 in @disconnected_bus").iloc[0][['bus0','bus1']] + disconnected_bus = component[component==component_sizes.index[-1]].index[0] + + neighbor_bus = ( + n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus") + .iloc[0][['bus0', 'bus1']] + ) new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0] - logger.info(f"overwriting country `{country}` of bus `{disconnected_bus}` to new country `{new_country}`, " - "because it is disconnected from its inital inter-country transmission grid.") + logger.info( + f"overwriting country `{country}` of bus `{disconnected_bus}` " + f"to new country `{new_country}`, because it is disconnected " + "from its inital inter-country transmission grid." + ) n.buses.at[disconnected_bus, "country"] = new_country return n From 8cb4c17930909c873d66f003ea41a2e09a924e4f Mon Sep 17 00:00:00 2001 From: martacki Date: Tue, 22 Mar 2022 16:53:05 +0100 Subject: [PATCH 09/20] unify vre treatment for hac clustering for simplify_network and cluster_network --- scripts/simplify_network.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index b28bc4dc..f9ac8ad7 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -351,7 +351,7 @@ def aggregate_to_substations(n, buses_i=None): aggregate_generators_carriers=None, aggregate_one_ports=["Load", "StorageUnit"], line_length_factor=1.0, - generator_strategies={'p_nom_max': 'sum'}, + generator_strategies={'p_nom_max': 'sum', 'p_nom_min': 'sum'}, scale_link_capital_costs=False) return clustering.network, busmap @@ -407,21 +407,20 @@ if __name__ == "__main__": n, substation_map = aggregate_to_substations(n) busmaps.append(substation_map) - # treatment of outliers (nodes without a profile for considered carrier) for "cluster_network" - if snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac": + # treatment of outliers (nodes without a profile for considered carrier): + # all nodes that have no profile of the given carrier are being aggregated to closest neighbor + if ( + snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac" or + cluster_config.get("algorithm", "hac") == "hac" + ): carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+') - buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus)) - n, busmap_hac = aggregate_to_substations(n, buses_i) - busmaps.append(busmap_hac) - - if snakemake.wildcards.simpl: - # treatment of outliers (nodes without a profile for a considered carrier) for "simplify" - if cluster_config.get("algorithm", "hac") == "hac": - carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+') - buses_i = list(set(n.buses.index)-set(n.generators.query("carrier in @carriers").bus)) + for carrier in carriers: + buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus)) + logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.') n, busmap_hac = aggregate_to_substations(n, buses_i) busmaps.append(busmap_hac) - # conduct clustering + + if snakemake.wildcards.simpl: n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config, algorithm=cluster_config.get('algorithm', 'hac'), feature=cluster_config.get('feature', None)) From 7f29c31abe6cf133f28f4b4558cccb7f20899d72 Mon Sep 17 00:00:00 2001 From: martacki Date: Thu, 24 Mar 2022 13:17:01 +0100 Subject: [PATCH 10/20] .copy() shortcut --- scripts/cluster_network.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 9c6d541f..f659f8a8 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -269,10 +269,7 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori # overwrite country of nodes that are disconnected from their country-topology for country in n.buses.country.unique(): - m = n.copy() - m.buses = m.buses.query("country in @country") - m.lines = m.lines.query("bus0 in @m.buses.index and bus1 in @m.buses.index") - m.links = m.links.query("bus0 in @m.buses.index and bus1 in @m.buses.index") + m = n[n.buses.country ==country].copy() _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False) From bef4967e84567434aa836d6d572c0316abe6f96f Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 20 Jun 2022 18:58:23 +0200 Subject: [PATCH 11/20] clustering strategies moved to configurables --- config.default.yaml | 4 ++++ config.tutorial.yaml | 4 ++++ scripts/cluster_network.py | 43 +++++++++++++++-------------------- scripts/simplify_network.py | 45 ++++++++++++++++++++----------------- 4 files changed, 51 insertions(+), 45 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index d2bf6159..9462719b 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -22,6 +22,10 @@ countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'F clustering: simplify: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + aggregation_strategies: + generators: + p_nom_max: "sum" # use "min" for more conservative assumptions + p_nom_min: "sum" snapshots: start: "2013-01-01" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 225d8f78..4dc7a94d 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -22,6 +22,10 @@ countries: ['BE'] clustering: simplify: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + aggregation_strategies: + generators: + p_nom_max: "sum" # use "min" for more conservative assumptions + p_nom_min: "sum" snapshots: start: "2013-03-01" diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 1d5608e2..0a3d768a 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -11,11 +11,10 @@ Relevant Settings .. code:: yaml - focus_weights: + clustering: + aggregation_strategies: - renewable: (keys) - {technology}: - potential: + focus_weights: solving: solver: @@ -259,15 +258,16 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None, - line_length_factor=1.25, potential_mode='simple', solver_name="cbc", + line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc", algorithm="kmeans", extended_link_costs=0, focus_weights=None): - if potential_mode == 'simple': - p_nom_max_strategy = pd.Series.sum - elif potential_mode == 'conservative': - p_nom_max_strategy = pd.Series.min - else: - raise AttributeError(f"potential_mode should be one of 'simple' or 'conservative' but is '{potential_mode}'") + bus_strategies = dict(country=_make_consense("Bus", "country")) + bus_strategies.update(aggregation_strategies.get("buses", {})) + generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) + + # this snippet supports compatibility of PyPSA and PyPSA-EUR: + if "p_nom_max" in generator_strategies: + if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min if not isinstance(custom_busmap, pd.Series): busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) @@ -276,19 +276,12 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr clustering = get_clustering_from_busmap( n, busmap, - bus_strategies=dict(country=_make_consense("Bus", "country")), + bus_strategies=bus_strategies, aggregate_generators_weighted=True, aggregate_generators_carriers=aggregate_carriers, aggregate_one_ports=["Load", "StorageUnit"], line_length_factor=line_length_factor, - generator_strategies={'p_nom_max': p_nom_max_strategy, - 'p_nom_min': pd.Series.sum, - 'p_min_pu': pd.Series.mean, - 'marginal_cost': pd.Series.mean, - 'committable': np.any, - 'ramp_limit_up': pd.Series.max, - 'ramp_limit_down': pd.Series.max, - }, + generator_strategies=generator_strategies, scale_link_capital_costs=False) if not n.links.empty: @@ -375,8 +368,8 @@ if __name__ == "__main__": "The `potential` configuration option must agree for all renewable carriers, for now!" ) return v - potential_mode = consense(pd.Series([snakemake.config['renewable'][tech]['potential'] - for tech in renewable_carriers])) + aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) + custom_busmap = snakemake.config["enable"].get("custom_busmap", False) if custom_busmap: custom_busmap = pd.read_csv(snakemake.input.custom_busmap, index_col=0, squeeze=True) @@ -384,12 +377,12 @@ if __name__ == "__main__": logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}") clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers, - line_length_factor, potential_mode, + line_length_factor, aggregation_strategies, snakemake.config['solving']['solver']['name'], "kmeans", hvac_overhead_cost, focus_weights) - - update_p_nom_max(n) + update_p_nom_max(clustering.network) + clustering.network.export_to_netcdf(snakemake.output.network) for attr in ('busmap', 'linemap'): #also available: linemap_positive, linemap_negative getattr(clustering, attr).to_csv(snakemake.output[attr]) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 287dfe32..7d51c511 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -13,6 +13,10 @@ Relevant Settings .. code:: yaml + clustering: + simplify: + aggregation_strategies: + costs: USD2013_to_EUR2013: discountrate: @@ -22,10 +26,6 @@ Relevant Settings electricity: max_hours: - renewables: (keys) - {technology}: - potential: - lines: length_factor: @@ -320,7 +320,7 @@ def remove_stubs(n, costs, config, output): return n, busmap -def aggregate_to_substations(n, buses_i=None): +def aggregate_to_substations(n, config, aggregation_strategies=dict(), buses_i=None): # can be used to aggregate a selection of buses to electrically closest neighbors # if no buses are given, nodes that are no substations or without offshore connection are aggregated @@ -345,19 +345,29 @@ def aggregate_to_substations(n, buses_i=None): busmap = n.buses.index.to_series() busmap.loc[buses_i] = dist.idxmin(1) + # default aggregation strategies must be specified within the function, otherwise (when defaults are passed in + # the function's definition) they get lost in case custom values for different variables are specified in the config + bus_strategies = dict(country=_make_consense("Bus", "country")) + bus_strategies.update(aggregation_strategies.get("buses", {})) + generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) + + # this snippet supports compatibility of PyPSA and PyPSA-EUR: + if "p_nom_max" in generator_strategies: + if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min + clustering = get_clustering_from_busmap(n, busmap, - bus_strategies=dict(country=_make_consense("Bus", "country")), + bus_strategies=bus_strategies, aggregate_generators_weighted=True, aggregate_generators_carriers=None, aggregate_one_ports=["Load", "StorageUnit"], line_length_factor=1.0, - generator_strategies={'p_nom_max': 'sum'}, + generator_strategies=generator_strategies, scale_link_capital_costs=False) return clustering.network, busmap -def cluster(n, n_clusters, config): +def cluster(n, n_clusters, config, aggregation_strategies=dict()): logger.info(f"Clustering to {n_clusters} buses") focus_weights = config.get('focus_weights', None) @@ -365,16 +375,9 @@ def cluster(n, n_clusters, config): renewable_carriers = pd.Index([tech for tech in n.generators.carrier.unique() if tech.split('-', 2)[0] in config['renewable']]) - def consense(x): - v = x.iat[0] - assert ((x == v).all() or x.isnull().all()), ( - "The `potential` configuration option must agree for all renewable carriers, for now!" - ) - return v - potential_mode = (consense(pd.Series([config['renewable'][tech]['potential'] - for tech in renewable_carriers])) - if len(renewable_carriers) > 0 else 'conservative') - clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode, + + clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, + aggregation_strategies=aggregation_strategies, solver_name=config['solving']['solver']['name'], focus_weights=focus_weights) @@ -389,6 +392,8 @@ if __name__ == "__main__": n = pypsa.Network(snakemake.input.network) + aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) + n, trafo_map = simplify_network_to_380(n) Nyears = n.snapshot_weightings.objective.sum() / 8760 @@ -402,11 +407,11 @@ if __name__ == "__main__": busmaps = [trafo_map, simplify_links_map, stub_map] if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False): - n, substation_map = aggregate_to_substations(n) + n, substation_map = aggregate_to_substations(n, snakemake.config, aggregation_strategies) busmaps.append(substation_map) if snakemake.wildcards.simpl: - n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config) + n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config, aggregation_strategies) busmaps.append(cluster_map) # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed From b4d8dd8ecb827c3ed450375e80fcf3717cc2aaab Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 20 Jun 2022 19:21:08 +0200 Subject: [PATCH 12/20] add changes from PR #379 --- config.default.yaml | 9 +++++++-- config.tutorial.yaml | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index 9462719b..7162d449 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -24,8 +24,13 @@ clustering: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) aggregation_strategies: generators: - p_nom_max: "sum" # use "min" for more conservative assumptions - p_nom_min: "sum" + p_nom_max: sum # use "min" for more conservative assumptions + p_nom_min: sum + p_min_pu: mean + marginal_cost: mean + committable: any + ramp_limit_up: max + ramp_limit_down: max snapshots: start: "2013-01-01" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 4dc7a94d..f18f23f4 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -24,8 +24,13 @@ clustering: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) aggregation_strategies: generators: - p_nom_max: "sum" # use "min" for more conservative assumptions - p_nom_min: "sum" + p_nom_max: sum # use "min" for more conservative assumptions + p_nom_min: sum + p_min_pu: mean + marginal_cost: mean + committable: any + ramp_limit_up: max + ramp_limit_down: max snapshots: start: "2013-03-01" From bdd0cc3aa125fa5a721357ebfc47418b79475716 Mon Sep 17 00:00:00 2001 From: martacki Date: Tue, 21 Jun 2022 18:42:49 +0200 Subject: [PATCH 13/20] clustering strats to configurables: review suggestions --- scripts/cluster_network.py | 11 +++++++---- scripts/simplify_network.py | 19 +++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 0a3d768a..fd66b043 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -263,11 +263,8 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr bus_strategies = dict(country=_make_consense("Bus", "country")) bus_strategies.update(aggregation_strategies.get("buses", {})) - generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) - # this snippet supports compatibility of PyPSA and PyPSA-EUR: - if "p_nom_max" in generator_strategies: - if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min + generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) if not isinstance(custom_busmap, pd.Series): busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) @@ -369,6 +366,12 @@ if __name__ == "__main__": ) return v aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) + aggregation_strategies = {} + # translate str entries of aggregation_strategies to pd.Series functions: + aggregation_strategies = { + p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()} + for p in aggregation_strategies.keys() + } custom_busmap = snakemake.config["enable"].get("custom_busmap", False) if custom_busmap: diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 7d51c511..52e0c815 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -320,7 +320,7 @@ def remove_stubs(n, costs, config, output): return n, busmap -def aggregate_to_substations(n, config, aggregation_strategies=dict(), buses_i=None): +def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None): # can be used to aggregate a selection of buses to electrically closest neighbors # if no buses are given, nodes that are no substations or without offshore connection are aggregated @@ -345,15 +345,13 @@ def aggregate_to_substations(n, config, aggregation_strategies=dict(), buses_i=N busmap = n.buses.index.to_series() busmap.loc[buses_i] = dist.idxmin(1) - # default aggregation strategies must be specified within the function, otherwise (when defaults are passed in - # the function's definition) they get lost in case custom values for different variables are specified in the config + # default aggregation strategies must be specified within the function, otherwise (when defaults + # are passed in the function's definition) they get lost in case custom values for different + # variables are specified in the config. bus_strategies = dict(country=_make_consense("Bus", "country")) bus_strategies.update(aggregation_strategies.get("buses", {})) - generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) - # this snippet supports compatibility of PyPSA and PyPSA-EUR: - if "p_nom_max" in generator_strategies: - if generator_strategies["p_nom_max"] == "min": generator_strategies["p_nom_max"] = np.min + generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) clustering = get_clustering_from_busmap(n, busmap, bus_strategies=bus_strategies, @@ -393,6 +391,11 @@ if __name__ == "__main__": n = pypsa.Network(snakemake.input.network) aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) + # translate str entries of aggregation_strategies to pd.Series functions: + aggregation_strategies = { + p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()} + for p in aggregation_strategies.keys() + } n, trafo_map = simplify_network_to_380(n) @@ -407,7 +410,7 @@ if __name__ == "__main__": busmaps = [trafo_map, simplify_links_map, stub_map] if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False): - n, substation_map = aggregate_to_substations(n, snakemake.config, aggregation_strategies) + n, substation_map = aggregate_to_substations(n, aggregation_strategies) busmaps.append(substation_map) if snakemake.wildcards.simpl: From 8dba48c4125a5578a65f4cf96c94f33780fb36ca Mon Sep 17 00:00:00 2001 From: martacki Date: Tue, 21 Jun 2022 19:08:22 +0200 Subject: [PATCH 14/20] clustering strats to configurables: documentation and testing --- doc/configtables/clustering.csv | 5 +++++ doc/release_notes.rst | 1 + test/config.test1.yaml | 9 +++++++++ 3 files changed, 15 insertions(+) diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv index 2f63f955..f178ff5c 100644 --- a/doc/configtables/clustering.csv +++ b/doc/configtables/clustering.csv @@ -1,3 +1,8 @@ ,Unit,Values,Description simplify,,, -- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" +-- aggregation_strategies,,, +-- -- generators,,, +-- -- -- {key},str,"{key} can be any of the component of the generator (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new generator." +-- -- buses,,, +-- -- -- {key},str,"{key} can be any of the component of the bus (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new bus." diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 963a1175..c000a046 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -74,6 +74,7 @@ Upcoming Release * Update rasterio version to correctly calculate exclusion raster +* Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent. PyPSA-Eur 0.4.0 (22th September 2021) ===================================== diff --git a/test/config.test1.yaml b/test/config.test1.yaml index a9ce1e50..e3f39ab5 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -21,6 +21,15 @@ countries: ['BE'] clustering: simplify: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + aggregation_strategies: + generators: + p_nom_max: sum # use "min" for more conservative assumptions + p_nom_min: sum + p_min_pu: mean + marginal_cost: mean + committable: any + ramp_limit_up: max + ramp_limit_down: max snapshots: start: "2013-03-01" From c9c738e96b2240c8522f84a1941b163069c0e351 Mon Sep 17 00:00:00 2001 From: martacki Date: Fri, 24 Jun 2022 20:57:53 +0200 Subject: [PATCH 15/20] clustering strats to configurables: set defaults for yaml-incompatible strats + include in stubaggregation --- config.default.yaml | 1 + config.tutorial.yaml | 1 + scripts/cluster_network.py | 6 +++--- scripts/simplify_network.py | 37 +++++++++++++++++++++++++------------ test/config.test1.yaml | 1 + 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index 7162d449..144f416e 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -31,6 +31,7 @@ clustering: committable: any ramp_limit_up: max ramp_limit_down: max + efficiency: mean snapshots: start: "2013-01-01" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index f18f23f4..31ca7f99 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -31,6 +31,7 @@ clustering: committable: any ramp_limit_up: max ramp_limit_down: max + efficiency:mean snapshots: start: "2013-03-01" diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index fd66b043..93cd89cd 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -264,7 +264,8 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr bus_strategies = dict(country=_make_consense("Bus", "country")) bus_strategies.update(aggregation_strategies.get("buses", {})) - generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) + generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} + generator_strategies.update(aggregation_strategies.get("generators", {})) if not isinstance(custom_busmap, pd.Series): busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) @@ -366,7 +367,6 @@ if __name__ == "__main__": ) return v aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) - aggregation_strategies = {} # translate str entries of aggregation_strategies to pd.Series functions: aggregation_strategies = { p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()} @@ -383,7 +383,7 @@ if __name__ == "__main__": line_length_factor, aggregation_strategies, snakemake.config['solving']['solver']['name'], "kmeans", hvac_overhead_cost, focus_weights) - + update_p_nom_max(clustering.network) clustering.network.export_to_netcdf(snakemake.output.network) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 52e0c815..0fda5c77 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -189,7 +189,10 @@ def _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, out -def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, aggregate_one_ports={"Load", "StorageUnit"}): +def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, + aggregate_one_ports={"Load", "StorageUnit"}, + aggregation_strategies=dict()): + def replace_components(n, c, df, pnl): n.mremove(c, n.df(c).index) @@ -200,7 +203,12 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, output) - generators, generators_pnl = aggregategenerators(n, busmap, custom_strategies={'p_nom_min': np.sum}) + generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} + generator_strategies.update(aggregation_strategies.get("generators", {})) + + generators, generators_pnl = aggregategenerators( + n, busmap, custom_strategies=generator_strategies + ) replace_components(n, "Generator", generators, generators_pnl) for one_port in aggregate_one_ports: @@ -214,7 +222,7 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a n.mremove(c, df.index[df.bus0.isin(buses_to_del) | df.bus1.isin(buses_to_del)]) -def simplify_links(n, costs, config, output): +def simplify_links(n, costs, config, output, aggregation_strategies=dict()): ## Complex multi-node links are folded into end-points logger.info("Simplifying connected link components") @@ -306,17 +314,19 @@ def simplify_links(n, costs, config, output): logger.debug("Collecting all components using the busmap") - _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output) + _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, + aggregation_strategies=aggregation_strategies) return n, busmap -def remove_stubs(n, costs, config, output): +def remove_stubs(n, costs, config, output, aggregation_strategies=dict()): logger.info("Removing stubs") busmap = busmap_by_stubs(n) # ['country']) connection_costs_to_bus = _compute_connection_costs_to_bus(n, busmap, costs, config) - _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output) + _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, + aggregation_strategies=aggregation_strategies) return n, busmap @@ -345,13 +355,14 @@ def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None): busmap = n.buses.index.to_series() busmap.loc[buses_i] = dist.idxmin(1) - # default aggregation strategies must be specified within the function, otherwise (when defaults - # are passed in the function's definition) they get lost in case custom values for different - # variables are specified in the config. + # default aggregation strategies that cannot be defined in .yaml format must be specified within + # the function, otherwise (when defaults are passed in the function's definition) they get lost + # in case custom values for different variables are specified in the config. bus_strategies = dict(country=_make_consense("Bus", "country")) bus_strategies.update(aggregation_strategies.get("buses", {})) - generator_strategies = aggregation_strategies.get("generators", {"p_nom_max": "sum"}) + generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} + generator_strategies.update(aggregation_strategies.get("generators", {})) clustering = get_clustering_from_busmap(n, busmap, bus_strategies=bus_strategies, @@ -403,9 +414,11 @@ if __name__ == "__main__": technology_costs = load_costs(snakemake.input.tech_costs, snakemake.config['costs'], snakemake.config['electricity'], Nyears) - n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output) + n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output, + aggregation_strategies) - n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output) + n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output, + aggregation_strategies=aggregation_strategies) busmaps = [trafo_map, simplify_links_map, stub_map] diff --git a/test/config.test1.yaml b/test/config.test1.yaml index e3f39ab5..6d626f7e 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -30,6 +30,7 @@ clustering: committable: any ramp_limit_up: max ramp_limit_down: max + efficiency: mean snapshots: start: "2013-03-01" From 2dfa2753ba8b5de6df56d6e6f9b278aa32047d55 Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 27 Jun 2022 13:59:04 +0200 Subject: [PATCH 16/20] clustering strats to configurables: config spacing --- config.tutorial.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 31ca7f99..edf0091c 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -31,7 +31,7 @@ clustering: committable: any ramp_limit_up: max ramp_limit_down: max - efficiency:mean + efficiency: mean snapshots: start: "2013-03-01" From a3af137b74d6e505e2dae32fc7cea2b2226f834c Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 27 Jun 2022 14:18:47 +0200 Subject: [PATCH 17/20] clustering strats to configurables: move duplicate code to _helpers script & import --- scripts/_helpers.py | 16 ++++++++++++++++ scripts/cluster_network.py | 8 ++------ scripts/simplify_network.py | 14 +++----------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/scripts/_helpers.py b/scripts/_helpers.py index 6e47c053..3c3e6ac7 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -210,6 +210,22 @@ def progress_retrieve(url, file): urllib.request.urlretrieve(url, file, reporthook=dlProgress) +def get_aggregation_strategies(aggregation_strategies): + # default aggregation strategies that cannot be defined in .yaml format must be specified within + # the function, otherwise (when defaults are passed in the function's definition) they get lost + # when custom values are specified in the config. + + import numpy as np + from pypsa.networkclustering import _make_consense + + bus_strategies = dict(country=_make_consense("Bus", "country")) + bus_strategies.update(aggregation_strategies.get("buses", {})) + + generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} + generator_strategies.update(aggregation_strategies.get("generators", {})) + + return bus_strategies, generator_strategies + def mock_snakemake(rulename, **wildcards): """ diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 93cd89cd..6186a00e 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -121,7 +121,7 @@ Exemplary unsolved network clustered to 37 nodes: """ import logging -from _helpers import configure_logging, update_p_nom_max +from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies import pypsa import os @@ -261,11 +261,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc", algorithm="kmeans", extended_link_costs=0, focus_weights=None): - bus_strategies = dict(country=_make_consense("Bus", "country")) - bus_strategies.update(aggregation_strategies.get("buses", {})) - - generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} - generator_strategies.update(aggregation_strategies.get("generators", {})) + bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies) if not isinstance(custom_busmap, pd.Series): busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 0fda5c77..37e7e698 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -83,7 +83,7 @@ The rule :mod:`simplify_network` does up to four things: """ import logging -from _helpers import configure_logging, update_p_nom_max +from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies from cluster_network import clustering_for_n_clusters, cluster_regions from add_electricity import load_costs @@ -203,8 +203,7 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, output) - generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} - generator_strategies.update(aggregation_strategies.get("generators", {})) + _, generator_strategies = get_aggregation_strategies(aggregation_strategies) generators, generators_pnl = aggregategenerators( n, busmap, custom_strategies=generator_strategies @@ -355,14 +354,7 @@ def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None): busmap = n.buses.index.to_series() busmap.loc[buses_i] = dist.idxmin(1) - # default aggregation strategies that cannot be defined in .yaml format must be specified within - # the function, otherwise (when defaults are passed in the function's definition) they get lost - # in case custom values for different variables are specified in the config. - bus_strategies = dict(country=_make_consense("Bus", "country")) - bus_strategies.update(aggregation_strategies.get("buses", {})) - - generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} - generator_strategies.update(aggregation_strategies.get("generators", {})) + bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies) clustering = get_clustering_from_busmap(n, busmap, bus_strategies=bus_strategies, From 51de606aab6eddcba8d121873f779c4ce995afa8 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 27 Jun 2022 19:00:41 +0200 Subject: [PATCH 18/20] Revert "remove build/retrieve natura raster, directly use shapefile" --- .github/workflows/ci.yaml | 23 ++--- Snakefile | 27 ++++-- config.default.yaml | 2 + config.tutorial.yaml | 10 ++- doc/configtables/toplevel.csv | 2 + doc/preparation.rst | 2 + doc/preparation/build_natura_raster.rst | 39 +++++++++ doc/preparation/build_renewable_profiles.rst | 3 + doc/preparation/retrieve.rst | 27 ++++++ doc/release_notes.rst | 5 -- doc/tutorial.rst | 4 +- envs/environment.yaml | 1 - scripts/build_natura_raster.py | 89 ++++++++++++++++++++ scripts/build_renewable_profiles.py | 4 +- test/config.test1.yaml | 2 + 15 files changed, 200 insertions(+), 40 deletions(-) create mode 100644 doc/preparation/build_natura_raster.rst create mode 100644 scripts/build_natura_raster.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d447a56f..c753deab 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,12 +13,13 @@ on: branches: - master pull_request: + branches: + - master schedule: - cron: "0 5 * * TUE" env: - CONDA_CACHE_NUMBER: 1 # Change this value to manually reset the environment cache - DATA_CACHE_NUMBER: 1 + CACHE_NUMBER: 1 # Change this value to manually reset the environment cache jobs: build: @@ -65,26 +66,16 @@ jobs: miniforge-version: latest activate-environment: pypsa-eur use-mamba: true - - - name: Set cache dates - run: | - echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - echo "WEEK=$(date +'%Y%U')" >> $GITHUB_ENV - - - name: Cache data and cutouts folders - uses: actions/cache@v3 - with: - path: | - data - cutouts - key: data-cutouts-${{ env.WEEK }}-${{ env.DATA_CACHE_NUMBER }} + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - name: Create environment cache uses: actions/cache@v2 id: cache with: path: ${{ matrix.prefix }} - key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CONDA_CACHE_NUMBER }} + key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} - name: Update environment due to outdated or unavailable cache run: mamba env update -n pypsa-eur -f envs/environment.yaml diff --git a/Snakefile b/Snakefile index 42ecc45b..7678a401 100644 --- a/Snakefile +++ b/Snakefile @@ -67,12 +67,6 @@ if config['enable'].get('retrieve_databundle', True): script: 'scripts/retrieve_databundle.py' -rule retrieve_natura_data: - input: HTTP.remote("sdi.eea.europa.eu/datashare/s/H6QGCybMdLLnywo/download", additional_request_string="?path=%2FNatura2000_end2020_gpkg&files=Natura2000_end2020.gpkg", static=True) - output: "data/Natura2000_end2020.gpkg" - run: move(input[0], output[0]) - - rule retrieve_load_data: input: HTTP.remote("data.open-power-system-data.org/time_series/2019-06-05/time_series_60min_singleindex.csv", keep_local=True, static=True) output: "data/load_raw.csv" @@ -171,13 +165,28 @@ if config['enable'].get('retrieve_cutout', True): run: move(input[0], output[0]) +if config['enable'].get('build_natura_raster', False): + rule build_natura_raster: + input: + natura="data/bundle/natura/Natura2000_end2015.shp", + cutouts=expand("cutouts/{cutouts}.nc", **config['atlite']) + output: "resources/natura.tiff" + log: "logs/build_natura_raster.log" + script: "scripts/build_natura_raster.py" + + +if config['enable'].get('retrieve_natura_raster', True): + rule retrieve_natura_raster: + input: HTTP.remote("zenodo.org/record/4706686/files/natura.tiff", keep_local=True, static=True) + output: "resources/natura.tiff" + run: move(input[0], output[0]) + + rule build_renewable_profiles: input: base_network="networks/base.nc", corine="data/bundle/corine/g250_clc06_V18_5.tif", - natura=lambda w: ("data/Natura2000_end2020.gpkg" - if config["renewable"][w.technology]["natura"] - else []), + natura="resources/natura.tiff", gebco=lambda w: ("data/bundle/GEBCO_2014_2D.nc" if "max_depth" in config["renewable"][w.technology].keys() else []), diff --git a/config.default.yaml b/config.default.yaml index 7e6a1f78..144f416e 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -43,6 +43,8 @@ enable: retrieve_databundle: true build_cutout: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false electricity: diff --git a/config.tutorial.yaml b/config.tutorial.yaml index ba0bb01a..edf0091c 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -43,6 +43,8 @@ enable: retrieve_databundle: true build_cutout: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false electricity: @@ -87,7 +89,7 @@ renewable: 24, 25, 26, 27, 28, 29, 31, 32] distance: 1000 distance_grid_codes: [1, 2, 3, 4, 5, 6] - natura: false + natura: true potential: simple # or conservative clip_p_max_pu: 1.e-2 offwind-ac: @@ -98,7 +100,7 @@ renewable: capacity_per_sqkm: 3 # correction_factor: 0.93 corine: [44, 255] - natura: false + natura: true max_shore_distance: 30000 potential: simple # or conservative clip_p_max_pu: 1.e-2 @@ -111,7 +113,7 @@ renewable: capacity_per_sqkm: 3 # correction_factor: 0.93 corine: [44, 255] - natura: false + natura: true min_shore_distance: 30000 potential: simple # or conservative clip_p_max_pu: 1.e-2 @@ -133,7 +135,7 @@ renewable: # correction_factor: 0.854337 corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] - natura: false + natura: true potential: simple # or conservative clip_p_max_pu: 1.e-2 diff --git a/doc/configtables/toplevel.csv b/doc/configtables/toplevel.csv index 8965a0bc..b7f39d05 100644 --- a/doc/configtables/toplevel.csv +++ b/doc/configtables/toplevel.csv @@ -12,4 +12,6 @@ enable,,, -- retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zenodo via the rule :mod:`retrieve_databundle` or whether to keep a custom databundle located in the corresponding folder." -- build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`." -- retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`." +-- build_natura_raster,bool,"{true, false}","Switch to enable the creation of the raster ``natura.tiff`` via the rule :mod:`build_natura_raster`." +-- retrieve_natura_raster,bool,"{true, false}","Switch to enable the retrieval of ``natura.tiff`` from zenodo with :mod:`retrieve_natura_raster`." -- custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``." diff --git a/doc/preparation.rst b/doc/preparation.rst index 7f42190c..dba5e981 100644 --- a/doc/preparation.rst +++ b/doc/preparation.rst @@ -27,6 +27,7 @@ With these and the externally extracted ENTSO-E online map topology Then the process continues by calculating conventional power plant capacities, potentials, and per-unit availability time series for variable renewable energy carriers and hydro power plants with the following rules: - :mod:`build_powerplants` for today's thermal power plant capacities using `powerplantmatching `_ allocating these to the closest substation for each powerplant, +- :mod:`build_natura_raster` for rasterising NATURA2000 natural protection areas, - :mod:`build_renewable_profiles` for the hourly capacity factors and installation potentials constrained by land-use in each substation's Voronoi cell for PV, onshore and offshore wind, and - :mod:`build_hydro_profile` for the hourly per-unit hydro power availability time series. @@ -40,6 +41,7 @@ together into a detailed PyPSA network stored in ``networks/elec.nc``. preparation/build_shapes preparation/build_load_data preparation/build_cutout + preparation/build_natura_raster preparation/prepare_links_p_nom preparation/base_network preparation/build_bus_regions diff --git a/doc/preparation/build_natura_raster.rst b/doc/preparation/build_natura_raster.rst new file mode 100644 index 00000000..e3ec4364 --- /dev/null +++ b/doc/preparation/build_natura_raster.rst @@ -0,0 +1,39 @@ +.. + SPDX-FileCopyrightText: 2019-2020 The PyPSA-Eur Authors + + SPDX-License-Identifier: CC-BY-4.0 + +.. _natura: + +Rule ``build_natura_raster`` +=============================== + +.. graphviz:: + :align: center + + digraph snakemake_dag { + graph [bgcolor=white, + margin=0, + size="8,5" + ]; + node [fontname=sans, + fontsize=10, + penwidth=2, + shape=box, + style=rounded + ]; + edge [color=grey, + penwidth=2 + ]; + 9 [color="0.22 0.6 0.85", + label=build_renewable_profiles]; + 12 [color="0.31 0.6 0.85", + fillcolor=gray, + label=build_natura_raster, + style=filled]; + 12 -> 9; + } + +| + +.. automodule:: build_natura_raster diff --git a/doc/preparation/build_renewable_profiles.rst b/doc/preparation/build_renewable_profiles.rst index adc4d6ca..27e61583 100644 --- a/doc/preparation/build_renewable_profiles.rst +++ b/doc/preparation/build_renewable_profiles.rst @@ -41,6 +41,9 @@ Rule ``build_renewable_profiles`` 8 [color="0.00 0.6 0.85", label=build_shapes]; 8 -> 9; + 12 [color="0.31 0.6 0.85", + label=build_natura_raster]; + 12 -> 9; 13 [color="0.56 0.6 0.85", label=build_cutout]; 13 -> 9; diff --git a/doc/preparation/retrieve.rst b/doc/preparation/retrieve.rst index 21187924..42479284 100644 --- a/doc/preparation/retrieve.rst +++ b/doc/preparation/retrieve.rst @@ -50,3 +50,30 @@ The :ref:`tutorial` uses a smaller cutout than required for the full model (30 M .. seealso:: For details see :mod:`build_cutout` and read the `atlite documentation `_. + + +Rule ``retrieve_natura_raster`` +------------------------------- + +.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4706686.svg + :target: https://doi.org/10.5281/zenodo.4706686 + +This rule, as a substitute for :mod:`build_natura_raster`, downloads an already rasterized version (`natura.tiff `_) of `Natura 2000 `_ natural protection areas to reduce computation times. The file is placed into the ``resources`` sub-directory. + +**Relevant Settings** + +.. code:: yaml + + enable: + build_natura_raster: + +.. seealso:: + Documentation of the configuration file ``config.yaml`` at + :ref:`toplevel_cf` + +**Outputs** + +- ``resources/natura.tiff``: Rasterized version of `Natura 2000 `_ natural protection areas to reduce computation times. + +.. seealso:: + For details see :mod:`build_natura_raster`. diff --git a/doc/release_notes.rst b/doc/release_notes.rst index df7af28e..c000a046 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -62,11 +62,6 @@ Upcoming Release * New network topology extracted from the ENTSO-E interactive map. -* Remove rules to build or retrieve rasterized NATURA 2000 dataset. Renewable potential calculation now directly uses the shapefiles. - -* Cache data and cutouts folders. This cache will be updated weekly. - -* Add rule to automatically retrieve Natura2000 natural protection areas. Switch of file format to GPKG. * The unused argument ``simple_hvdc_costs`` in :mod:`add_electricity` was removed. * Iterative solving with impedance updates is skipped if there are no expandable lines. diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 1c247f8e..c37abb39 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -35,8 +35,8 @@ To run the tutorial, use this as your configuration file ``config.yaml``. .../pypsa-eur % cp config.tutorial.yaml config.yaml -This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle` -and :mod:`retrieve_cutout` totalling at less than 250 MB. +This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle`, +:mod:`retrieve_natura_raster`, :mod:`retrieve_cutout` totalling at less than 250 MB. The full set of data dependencies would consume 5.3 GB. For more information on the data dependencies of PyPSA-Eur, continue reading :ref:`data`. diff --git a/envs/environment.yaml b/envs/environment.yaml index fd6dff49..f8060de1 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -45,7 +45,6 @@ dependencies: # GIS dependencies: - cartopy - descartes - - fiona # explicit for Windows - rasterio<=1.2.9 # 1.2.10 creates error https://github.com/PyPSA/atlite/issues/238 # PyPSA-Eur-Sec Dependencies diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py new file mode 100644 index 00000000..7fa9d544 --- /dev/null +++ b/scripts/build_natura_raster.py @@ -0,0 +1,89 @@ +# SPDX-FileCopyrightText: : 2017-2020 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + +""" +Rasters the vector data of the `Natura 2000 `_ natural protection areas onto all cutout regions. + +Relevant Settings +----------------- + +.. code:: yaml + + renewable: + {technology}: + cutout: + +.. seealso:: + Documentation of the configuration file ``config.yaml`` at + :ref:`renewable_cf` + +Inputs +------ + +- ``data/bundle/natura/Natura2000_end2015.shp``: `Natura 2000 `_ natural protection areas. + + .. image:: ../img/natura.png + :scale: 33 % + +Outputs +------- + +- ``resources/natura.tiff``: Rasterized version of `Natura 2000 `_ natural protection areas to reduce computation times. + + .. image:: ../img/natura.png + :scale: 33 % + +Description +----------- + +""" + +import logging +from _helpers import configure_logging + +import atlite +import geopandas as gpd +import rasterio as rio +from rasterio.features import geometry_mask +from rasterio.warp import transform_bounds + +logger = logging.getLogger(__name__) + + +def determine_cutout_xXyY(cutout_name): + cutout = atlite.Cutout(cutout_name) + assert cutout.crs.to_epsg() == 4326 + x, X, y, Y = cutout.extent + dx, dy = cutout.dx, cutout.dy + return [x - dx/2., X + dx/2., y - dy/2., Y + dy/2.] + + +def get_transform_and_shape(bounds, res): + left, bottom = [(b // res)* res for b in bounds[:2]] + right, top = [(b // res + 1) * res for b in bounds[2:]] + shape = int((top - bottom) // res), int((right - left) / res) + transform = rio.Affine(res, 0, left, 0, -res, top) + return transform, shape + + +if __name__ == "__main__": + if 'snakemake' not in globals(): + from _helpers import mock_snakemake + snakemake = mock_snakemake('build_natura_raster') + configure_logging(snakemake) + + cutouts = snakemake.input.cutouts + xs, Xs, ys, Ys = zip(*(determine_cutout_xXyY(cutout) for cutout in cutouts)) + bounds = transform_bounds(4326, 3035, min(xs), min(ys), max(Xs), max(Ys)) + transform, out_shape = get_transform_and_shape(bounds, res=100) + + # adjusted boundaries + shapes = gpd.read_file(snakemake.input.natura).to_crs(3035) + raster = ~geometry_mask(shapes.geometry, out_shape[::-1], transform) + raster = raster.astype(rio.uint8) + + with rio.open(snakemake.output[0], 'w', driver='GTiff', dtype=rio.uint8, + count=1, transform=transform, crs=3035, compress='lzw', + width=raster.shape[1], height=raster.shape[0]) as dst: + dst.write(raster, indexes=1) diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index a49df1f5..37e1e9de 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -227,9 +227,7 @@ if __name__ == '__main__': excluder = atlite.ExclusionContainer(crs=3035, res=100) if config['natura']: - mask = regions.to_crs(3035).buffer(0) # buffer to avoid invalid geometry - natura = gpd.read_file(snakemake.input.natura, mask=mask) - excluder.add_geometry(natura.geometry) + excluder.add_raster(snakemake.input.natura, nodata=0, allow_no_overlap=True) corine = config.get("corine", {}) if "grid_codes" in corine: diff --git a/test/config.test1.yaml b/test/config.test1.yaml index 64b169a4..6d626f7e 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -42,6 +42,8 @@ enable: retrieve_databundle: true build_cutout: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false electricity: From 29fe0fb7fb8ab34cde94dc01ca947cece2ed924c Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 27 Jun 2022 21:02:45 +0200 Subject: [PATCH 19/20] hierarchical clustering: account for changes from merging master --- scripts/cluster_network.py | 1 + scripts/simplify_network.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index b1bf759d..59641a7e 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -12,6 +12,7 @@ Relevant Settings .. code:: yaml clustering: + cluster_network: aggregation_strategies: focus_weights: diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 7e258966..0b31e5c6 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -14,7 +14,8 @@ Relevant Settings .. code:: yaml clustering: - simplify: + simplify_network: + cluster_network: aggregation_strategies: costs: @@ -415,7 +416,7 @@ if __name__ == "__main__": busmaps = [trafo_map, simplify_links_map, stub_map] cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {}) - if cluster_config.get('clustering', {}).get('simplify', {}).get('to_substations', False): + if cluster_config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False): n, substation_map = aggregate_to_substations(n, aggregation_strategies) busmaps.append(substation_map) @@ -429,7 +430,7 @@ if __name__ == "__main__": for carrier in carriers: buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus)) logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.') - n, busmap_hac = aggregate_to_substations(n, buses_i) + n, busmap_hac = aggregate_to_substations(n, aggregation_strategies, buses_i) busmaps.append(busmap_hac) if snakemake.wildcards.simpl: From f1dedd9429fe6a5633fdfeead552abfacea48e39 Mon Sep 17 00:00:00 2001 From: martacki Date: Mon, 27 Jun 2022 21:11:49 +0200 Subject: [PATCH 20/20] hierarchical clustering: release notes & revert to old default clustering algorithm (kmeans) --- config.default.yaml | 8 ++++---- config.tutorial.yaml | 8 ++++---- doc/release_notes.rst | 2 ++ test/config.test1.yaml | 8 ++++---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index e699c38f..e1ac6237 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -22,11 +22,11 @@ countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'F clustering: simplify_network: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) - algorithm: hac - feature: solar+onwind-time - cluster_network: - algorithm: hac # choose from: [hac, kmeans] + algorithm: kmeans # choose from: [hac, kmeans] feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. + cluster_network: + algorithm: kmeans + feature: solar+onwind-time aggregation_strategies: generators: p_nom_max: sum # use "min" for more conservative assumptions diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 75703b8a..db20f1a4 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -22,11 +22,11 @@ countries: ['BE'] clustering: simplify_network: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) - algorithm: hac - feature: solar+onwind-time - cluster_network: - algorithm: hac # choose from: [hac, kmeans] + algorithm: kmeans # choose from: [hac, kmeans] feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. + cluster_network: + algorithm: kmeans + feature: solar+onwind-time aggregation_strategies: generators: p_nom_max: sum # use "min" for more conservative assumptions diff --git a/doc/release_notes.rst b/doc/release_notes.rst index c000a046..47a67970 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -76,6 +76,8 @@ Upcoming Release * Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent. +* Hierarchical clustering was introduced. Distance metric is calculated from renewable potentials on hourly (feature entry ends with `-time`) or annual (feature entry in config end with `-cap`) values. + PyPSA-Eur 0.4.0 (22th September 2021) ===================================== diff --git a/test/config.test1.yaml b/test/config.test1.yaml index d8d095b8..5aecfdf2 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -21,11 +21,11 @@ countries: ['BE'] clustering: simplify_network: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) - algorithm: hac - feature: solar+onwind-time - cluster_network: - algorithm: hac # choose from: [hac, kmeans] + algorithm: kmeans # choose from: [hac, kmeans] feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. + cluster_network: + algorithm: kmeans + feature: solar+onwind-time aggregation_strategies: generators: p_nom_max: sum # use "min" for more conservative assumptions