introduce hierarchical agglomeratice clustering (hac)

This commit is contained in:
martacki 2021-11-17 13:46:33 +01:00
parent 1da908b956
commit 299e71e2b3
7 changed files with 93 additions and 18 deletions

View File

@ -20,8 +20,13 @@ scenario:
countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']
clustering: clustering:
simplify: simplify_network:
to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
algorithm: hac
feature: solar+onwind-time
cluster_network:
algorithm: hac # choose from: [hac, kmeans]
feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
snapshots: snapshots:
start: "2013-01-01" start: "2013-01-01"

View File

@ -20,8 +20,13 @@ scenario:
countries: ['DE'] countries: ['DE']
clustering: clustering:
simplify: simplify_network:
to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
algorithm: hac
feature: solar+onwind-time
cluster_network:
algorithm: hac # choose from: [hac, kmeans]
feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
snapshots: snapshots:
start: "2013-03-01" start: "2013-03-01"

View File

@ -1,3 +1,8 @@
,Unit,Values,Description ,Unit,Values,Description
simplify,,, simplify_network,,,
-- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" -- to_substations,bool,"One of {'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
-- algorithm,str,"One of {kmenas, hac}",
-- feature,str,"Str in the format carrier1+carrier2+...+carrierN-X, where CarrierI can be from {solar, onwind, offwind, ror} and X is one of {cap, time}.",
cluster_network,,,
-- algorithm,str,"One of {kmenas, hac}",
-- feature,str,"Str in the format carrier1+carrier2+...+carrierN-X, where CarrierI can be from {solar, onwind, offwind, ror} and X is one of {cap, time}.",

1 Unit Values Description
2 simplify simplify_network
3 -- to_substations bool {'true','false'} One of {'true','false'} Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones
4 -- algorithm str One of {‘kmenas’, ‘hac’}
5 -- feature str Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.
6 cluster_network
7 -- algorithm str One of {‘kmenas’, ‘hac’}
8 -- feature str Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.

View File

@ -12,7 +12,6 @@ dependencies:
- pip - pip
- mamba # esp for windows build - mamba # esp for windows build
- pypsa>=0.18
- atlite>=0.2.5 - atlite>=0.2.5
- dask<=2021.3.1 # until https://github.com/dask/dask/issues/7583 is solved - dask<=2021.3.1 # until https://github.com/dask/dask/issues/7583 is solved
@ -56,5 +55,6 @@ dependencies:
- tabula-py - tabula-py
- pip: - pip:
- git+https://github.com/pypsa/pypsa.git#egg=pypsa
- vresutils==0.3.1 - vresutils==0.3.1
- tsam>=1.1.0 - tsam>=1.1.0

View File

@ -138,7 +138,7 @@ import seaborn as sns
from functools import reduce from functools import reduce
from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering, from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering,
_make_consense, get_clustering_from_busmap) busmap_by_hac, _make_consense, get_clustering_from_busmap)
from add_electricity import load_costs from add_electricity import load_costs
@ -170,6 +170,45 @@ def weighting_for_country(n, x):
return (w * (100. / w.max())).clip(lower=1.).astype(int) return (w * (100. / w.max())).clip(lower=1.).astype(int)
def get_feature_for_hac(n, buses_i, feature=None): #buses_i = n.buses.index
if feature is None:
feature = "solar+onwind-time"
carriers = feature.split('-')[0].split('+')
if "offwind" in carriers:
carriers.remove("offwind")
carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique())
if feature.split('-')[1] == 'cap':
feature_data = pd.DataFrame(index=buses_i, columns=carriers)
for carrier in carriers:
try:
feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
.rename(index=lambda x: x.split(' ')[0]))
except:
feature_data[carrier] = (n.generators_t.p_max_pu.filter(like=carrier).mean()
.rename(index=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))
if feature.split('-')[1] == 'time':
feature_data = pd.DataFrame(columns=buses_i)
for carrier in carriers:
try:
# without simpl wildcard (bus names are "X X"):
feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
.rename(columns=lambda x: x.split(' ')[0]))[buses_i]
except:
# with simpl wildcard (bus names are "X X X"):
feature_data = feature_data.append(n.generators_t.p_max_pu.filter(like=carrier)
.rename(columns=lambda x: x.split(' ')[0] + ' ' + x.split(' ')[1]))[buses_i]
feature_data = feature_data.T
feature_data.columns = feature_data.columns.astype(str) # Timestamp will raise error in sklearn>=v1.2
feature_data = feature_data.fillna(0)
return feature_data
def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None): def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None):
"""Determine the number of clusters per country""" """Determine the number of clusters per country"""
@ -221,12 +260,18 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name=None):
return pd.Series(m.n.get_values(), index=L.index).astype(int) return pd.Series(m.n.get_values(), index=L.index).astype(int)
def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds): def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds):
if algorithm == "kmeans": if algorithm == "kmeans":
algorithm_kwds.setdefault('n_init', 1000) algorithm_kwds.setdefault('n_init', 1000)
algorithm_kwds.setdefault('max_iter', 30000) algorithm_kwds.setdefault('max_iter', 30000)
algorithm_kwds.setdefault('tol', 1e-6) algorithm_kwds.setdefault('tol', 1e-6)
if algorithm == "hac":
feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
elif feature is not None:
logger.info(f"keyword argument feature is only valid for algorithm 'hac'."
f"given feature {feature} will be ignored.")
n.determine_network_topology() n.determine_network_topology()
n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name) n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name)
@ -250,8 +295,10 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
elif algorithm == "louvain": elif algorithm == "louvain":
return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
elif algorithm == "hac":
return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index])
else: else:
raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.") raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.")
return (n.buses.groupby(['country', 'sub_network'], group_keys=False) return (n.buses.groupby(['country', 'sub_network'], group_keys=False)
.apply(busmap_for_country).squeeze().rename('busmap')) .apply(busmap_for_country).squeeze().rename('busmap'))
@ -259,7 +306,9 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None, def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None,
line_length_factor=1.25, potential_mode='simple', solver_name="cbc", line_length_factor=1.25, potential_mode='simple', solver_name="cbc",
algorithm="kmeans", extended_link_costs=0, focus_weights=None): algorithm="kmeans", feature=None, extended_link_costs=0, focus_weights=None):
logger.info(f"Clustering network using algorithm {algorithm} and feature {feature}...")
if potential_mode == 'simple': if potential_mode == 'simple':
p_nom_max_strategy = np.sum p_nom_max_strategy = np.sum
@ -273,7 +322,7 @@ def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carr
busmap.index = busmap.index.astype(str) busmap.index = busmap.index.astype(str)
logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}") logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
else: else:
busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature)
clustering = get_clustering_from_busmap( clustering = get_clustering_from_busmap(
n, busmap, n, busmap,
@ -313,7 +362,7 @@ def cluster_regions(busmaps, input=None, output=None):
for which in ('regions_onshore', 'regions_offshore'): for which in ('regions_onshore', 'regions_offshore'):
regions = gpd.read_file(getattr(input, which)).set_index('name') regions = gpd.read_file(getattr(input, which)).set_index('name')
geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.cascaded_union) geom_c = regions.geometry.groupby(busmap).apply(shapely.ops.unary_union)
regions_c = gpd.GeoDataFrame(dict(geometry=geom_c)) regions_c = gpd.GeoDataFrame(dict(geometry=geom_c))
regions_c.index.name = 'name' regions_c.index.name = 'name'
save_to_geojson(regions_c, getattr(output, which)) save_to_geojson(regions_c, getattr(output, which))
@ -377,6 +426,8 @@ if __name__ == "__main__":
line_length_factor=line_length_factor, line_length_factor=line_length_factor,
potential_mode=potential_mode, potential_mode=potential_mode,
solver_name=snakemake.config['solving']['solver']['name'], solver_name=snakemake.config['solving']['solver']['name'],
algorithm=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('algorithm', 'kmeans'),
feature=snakemake.config.get('clustering', {}).get('cluster_network', {}).get('feature', None),
extended_link_costs=hvac_overhead_cost, extended_link_costs=hvac_overhead_cost,
focus_weights=focus_weights) focus_weights=focus_weights)

View File

@ -360,7 +360,7 @@ def aggregate_to_substations(n, buses_i=None):
return clustering.network, busmap return clustering.network, busmap
def cluster(n, n_clusters): def cluster(n, n_clusters, algorithm="kmeans", feature=None):
logger.info(f"Clustering to {n_clusters} buses") logger.info(f"Clustering to {n_clusters} buses")
focus_weights = snakemake.config.get('focus_weights', None) focus_weights = snakemake.config.get('focus_weights', None)
@ -377,8 +377,10 @@ def cluster(n, n_clusters):
potential_mode = (consense(pd.Series([snakemake.config['renewable'][tech]['potential'] potential_mode = (consense(pd.Series([snakemake.config['renewable'][tech]['potential']
for tech in renewable_carriers])) for tech in renewable_carriers]))
if len(renewable_carriers) > 0 else 'conservative') if len(renewable_carriers) > 0 else 'conservative')
clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode, clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode,
solver_name=snakemake.config['solving']['solver']['name'], solver_name=snakemake.config['solving']['solver']['name'],
algorithm=algorithm, feature=feature,
focus_weights=focus_weights) focus_weights=focus_weights)
return clustering.network, clustering.busmap return clustering.network, clustering.busmap
@ -400,12 +402,14 @@ if __name__ == "__main__":
busmaps = [trafo_map, simplify_links_map, stub_map] busmaps = [trafo_map, simplify_links_map, stub_map]
if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False): if snakemake.config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False):
n, substation_map = aggregate_to_substations(n) n, substation_map = aggregate_to_substations(n)
busmaps.append(substation_map) busmaps.append(substation_map)
if snakemake.wildcards.simpl: if snakemake.wildcards.simpl:
n, cluster_map = cluster(n, int(snakemake.wildcards.simpl)) n, cluster_map = cluster(n, int(snakemake.wildcards.simpl),
algorithm=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('algorithm', 'hac'),
feature=snakemake.config.get('clustering', {}).get('simplify_network', {}).get('feature', None))
busmaps.append(cluster_map) busmaps.append(cluster_map)
# some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed

View File

@ -19,8 +19,13 @@ scenario:
countries: ['DE'] countries: ['DE']
clustering: clustering:
simplify: simplify_network:
to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) to_substations: true # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
algorithm: hac
feature: solar+onwind-time
cluster_network:
algorithm: hac # choose from: [hac, kmeans]
feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
snapshots: snapshots:
start: "2013-03-01" start: "2013-03-01"