From 299e71e2b3eacf53f796db3c9e965034bbb25d16 Mon Sep 17 00:00:00 2001 From: martacki Date: Wed, 17 Nov 2021 13:46:33 +0100 Subject: [PATCH] introduce hierarchical agglomeratice clustering (hac) --- config.default.yaml | 9 +++-- config.tutorial.yaml | 9 +++-- doc/configtables/clustering.csv | 9 +++-- envs/environment.yaml | 2 +- scripts/cluster_network.py | 63 +++++++++++++++++++++++++++++---- scripts/simplify_network.py | 10 ++++-- test/config.test1.yaml | 9 +++-- 7 files changed, 93 insertions(+), 18 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index f70e7c2c..631645b9 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -20,8 +20,13 @@ scenario: countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] clustering: - simplify: - to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + simplify_network: + to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: hac + feature: solar+onwind-time + cluster_network: + algorithm: hac # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. snapshots: start: "2013-01-01" diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 26ead242..919da193 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -20,8 +20,13 @@ scenario: countries: ['DE'] clustering: - simplify: - to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + simplify_network: + to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: hac + feature: solar+onwind-time + cluster_network: + algorithm: hac # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. snapshots: start: "2013-03-01" diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv index 2f63f955..e488dd39 100644 --- a/doc/configtables/clustering.csv +++ b/doc/configtables/clustering.csv @@ -1,3 +1,8 @@ ,Unit,Values,Description -simplify,,, --- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" +simplify_network,,, +-- to_substations,bool,"One of {'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" +-- algorithm,str,"One of {‘kmenas’, ‘hac’}", +-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", +cluster_network,,, +-- algorithm,str,"One of {‘kmenas’, ‘hac’}", +-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", diff --git a/envs/environment.yaml b/envs/environment.yaml index 29d743ac..5f6a8310 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -12,7 +12,6 @@ dependencies: - pip - mamba # esp for windows build - - pypsa>=0.18 - atlite>=0.2.5 - dask<=2021.3.1 # until https://github.com/dask/dask/issues/7583 is solved @@ -56,5 +55,6 @@ dependencies: - tabula-py - pip: + - git+https://github.com/pypsa/pypsa.git#egg=pypsa - vresutils==0.3.1 - tsam>=1.1.0 diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 980b73b0..7aae3ae6 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -138,7 +138,7 @@ import seaborn as sns from functools import reduce from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering, - _make_consense, get_clustering_from_busmap) + busmap_by_hac, _make_consense, get_clustering_from_busmap) from add_electricity import load_costs @@ -170,6 +170,45 @@ def weighting_for_country(n, x): return (w * (100. / w.max())).clip(lower=1.).astype(int) +def get_feature_for_hac(n, buses_i, feature=None): #buses_i = n.buses.index + + if feature is None: + feature = "solar+onwind-time" + + carriers = feature.split('-')[0].split('+') + if "offwind" in carriers: + carriers.remove("offwind") + carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique()) + + if feature.split('-')[1] == 'cap': + feature_data = pd.DataFrame(index=buses_i, columns=carriers) + for carrier in carriers: + try: + feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() + .rename(index=lambda x: x.split(' ')[0])) + except: + feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean() + .rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1])) + + if feature.split('-')[1] == 'time': + feature_data = pd.DataFrame(columns=buses_i) + for carrier in carriers: + try: + # without simpl wildcard (bus names are "X X"): + feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) + .rename(columns=lambda x: x.split(' ')[0]))[buses_i] + except: + # with simpl wildcard (bus names are "X X X"): + feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier) + .rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i] + feature_data = feature_data.T + feature_data.columns = feature_data.columns.astype(str) # Timestamp will raise error in sklearn>=v1.2 + + feature_data = feature_data.fillna(0) + + return feature_data + + def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): """Determine the number of clusters per country""" @@ -221,12 +260,18 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): return pd.Series(m.n.get_values(), index=L.index).astype(int) -def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds): +def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds): if algorithm == "kmeans": algorithm_kwds.setdefault('n_init', 1000) algorithm_kwds.setdefault('max_iter', 30000) algorithm_kwds.setdefault('tol', 1e-6) + if algorithm == "hac": + feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature) + elif feature is not None: + logger.info(f"keyword argument feature is only valid for algorithm 'hac'." + f"given feature {feature} will be ignored.") + n.determine_network_topology() n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name) @@ -250,8 +295,10 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) elif algorithm == "louvain": return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) + elif algorithm == "hac": + return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index]) else: - raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.") + raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.") return (n.buses.groupby(['country', 'sub_network'], group_keys=False) .apply(busmap_for_country).squeeze().rename('busmap')) @@ -259,7 +306,9 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None, line_length_factor=1.25, potential_mode='simple', solver_name="cbc", - algorithm="kmeans", extended_link_costs=0, focus_weights=None): + algorithm="kmeans", feature=None, extended_link_costs=0, focus_weights=None): + + logger.info(f"Clustering network using algorithm {algorithm} and feature {feature}...") if potential_mode == 'simple': p_nom_max_strategy = np.sum @@ -273,7 +322,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr busmap.index = busmap.index.astype(str) logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}") else: - busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) + busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature) clustering = get_clustering_from_busmap( n, busmap, @@ -313,7 +362,7 @@ def cluster_regions(busmaps, input=None, output=None): for which in ('regions_onshore', 'regions_offshore'): regions = gpd.read_file(getattr(input, which)).set_index('name') - geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.cascaded_union) + geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.unary_union) regions_c = gpd.GeoDataFrame(dict(geometry=geom_c)) regions_c.index.name = 'name' save_to_geojson(regions_c, getattr(output, which)) @@ -377,6 +426,8 @@ if __name__ == "__main__": line_length_factor=line_length_factor, potential_mode=potential_mode, solver_name=snakemake.config['solving']['solver']['name'], + algorithm=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('algorithm', 'kmeans'), + feature=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('feature', None), extended_link_costs=hvac_overhead_cost, focus_weights=focus_weights) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 85bc4d15..8a93952c 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -360,7 +360,7 @@ def aggregate_to_substations(n, buses_i=None): return clustering.network, busmap -def cluster(n, n_clusters): +def cluster(n, n_clusters, algorithm="kmeans", feature=None): logger.info(f"Clustering to {n_clusters} buses") focus_weights = snakemake.config.get('focus_weights', None) @@ -377,8 +377,10 @@ def cluster(n, n_clusters): potential_mode = (consense(pd.Series([snakemake.config['renewable'][tech]['potential'] for tech in renewable_carriers])) if len(renewable_carriers) > 0 else 'conservative') + clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode, solver_name=snakemake.config['solving']['solver']['name'], + algorithm=algorithm, feature=feature, focus_weights=focus_weights) return clustering.network, clustering.busmap @@ -400,12 +402,14 @@ if __name__ == "__main__": busmaps = [trafo_map, simplify_links_map, stub_map] - if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False): + if snakemake.config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False): n, substation_map = aggregate_to_substations(n) busmaps.append(substation_map) if snakemake.wildcards.simpl: - n, cluster_map = cluster(n, int(snakemake.wildcards.simpl)) + n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), + algorithm=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('algorithm', 'hac'), + feature=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('feature', None)) busmaps.append(cluster_map) # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed diff --git a/test/config.test1.yaml b/test/config.test1.yaml index 2986037b..0c34ea13 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -19,8 +19,13 @@ scenario: countries: ['DE'] clustering: - simplify: - to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + simplify_network: + to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: hac + feature: solar+onwind-time + cluster_network: + algorithm: hac # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. snapshots: start: "2013-03-01"