Merge pull request #293 from PyPSA/introduce_hac_clustering

introduce hierarchical agglomeratice clustering (hac)
This commit is contained in:
Fabian Neumann 2022-06-28 07:44:52 +02:00 committed by GitHub
commit f9a996e5df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 140 additions and 21 deletions

View File

@ -20,8 +20,13 @@ scenario:
countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']
clustering:
simplify:
simplify_network:
to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
algorithm: kmeans # choose from: [hac, kmeans]
feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
cluster_network:
algorithm: kmeans
feature: solar+onwind-time
aggregation_strategies:
generators:
p_nom_max: sum # use "min" for more conservative assumptions

View File

@ -20,8 +20,13 @@ scenario:
countries: ['BE']
clustering:
simplify:
simplify_network:
to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
algorithm: kmeans # choose from: [hac, kmeans]
feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
cluster_network:
algorithm: kmeans
feature: solar+onwind-time
aggregation_strategies:
generators:
p_nom_max: sum # use "min" for more conservative assumptions

View File

@ -1,8 +1,13 @@
,Unit,Values,Description
simplify,,,
simplify_network,,,
-- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones"
-- aggregation_strategies,,,
-- -- generators,,,
-- -- -- {key},str,"{key} can be any of the component of the generator (str). Its value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new generator."
-- -- buses,,,
-- -- -- {key},str,"{key} can be any of the component of the bus (str). Its value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new bus."
-- algorithm,str,"One of {kmeans, hac}",
-- feature,str,"Str in the format carrier1+carrier2+...+carrierN-X, where CarrierI can be from {solar, onwind, offwind, ror} and X is one of {cap, time}.",
cluster_network
-- algorithm,str,"One of {kmeans, hac}",
-- feature,str,"Str in the format carrier1+carrier2+...+carrierN-X, where CarrierI can be from {solar, onwind, offwind, ror} and X is one of {cap, time}.",
aggregation_strategies,,,
-- generators,,,
-- -- {key},str,"{key} can be any of the component of the generator (str). Its value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new generator."
-- buses,,,
-- -- {key},str,"{key} can be any of the component of the bus (str). Its value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new bus."

Can't render this file because it has a wrong number of fields in line 6.

View File

@ -76,6 +76,8 @@ Upcoming Release
* Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent.
* Hierarchical clustering was introduced. Distance metric is calculated from renewable potentials on hourly (feature entry ends with `-time`) or annual (feature entry in config end with `-cap`) values.
PyPSA-Eur 0.4.0 (22th September 2021)
=====================================

View File

@ -10,7 +10,7 @@ dependencies:
- python>=3.8
- pip
- pypsa>=0.18.1
- pypsa>=0.19.1
- atlite>=0.2.6
- dask

View File

@ -12,6 +12,7 @@ Relevant Settings
.. code:: yaml
clustering:
cluster_network:
aggregation_strategies:
focus_weights:
@ -137,7 +138,7 @@ import seaborn as sns
from functools import reduce
from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering,
_make_consense, get_clustering_from_busmap)
busmap_by_hac, _make_consense, get_clustering_from_busmap)
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
@ -172,6 +173,42 @@ def weighting_for_country(n, x):
return (w * (100. / w.max())).clip(lower=1.).astype(int)
def get_feature_for_hac(n, buses_i=None, feature=None):
if buses_i is None:
buses_i = n.buses.index
if feature is None:
feature = "solar+onwind-time"
carriers = feature.split('-')[0].split('+')
if "offwind" in carriers:
carriers.remove("offwind")
carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique())
if feature.split('-')[1] == 'cap':
feature_data = pd.DataFrame(index=buses_i, columns=carriers)
for carrier in carriers:
gen_i = n.generators.query("carrier == @carrier").index
attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus)
feature_data[carrier] = attach
if feature.split('-')[1] == 'time':
feature_data = pd.DataFrame(columns=buses_i)
for carrier in carriers:
gen_i = n.generators.query("carrier == @carrier").index
attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus)
feature_data = pd.concat([feature_data, attach], axis=0)[buses_i]
feature_data = feature_data.T
# timestamp raises error in sklearn >= v1.2:
feature_data.columns = feature_data.columns.astype(str)
feature_data = feature_data.fillna(0)
return feature_data
def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
"""Determine the number of clusters per country"""
@ -220,13 +257,50 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"):
return pd.Series(m.n.get_values(), index=L.index).round().astype(int)
def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds):
def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds):
if algorithm == "kmeans":
algorithm_kwds.setdefault('n_init', 1000)
algorithm_kwds.setdefault('max_iter', 30000)
algorithm_kwds.setdefault('tol', 1e-6)
algorithm_kwds.setdefault('random_state', 0)
def fix_country_assignment_for_hac(n):
from scipy.sparse import csgraph
# overwrite country of nodes that are disconnected from their country-topology
for country in n.buses.country.unique():
m = n[n.buses.country ==country].copy()
_, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False)
component = pd.Series(labels, index=m.buses.index)
component_sizes = component.value_counts()
if len(component_sizes)>1:
disconnected_bus = component[component==component_sizes.index[-1]].index[0]
neighbor_bus = (
n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus")
.iloc[0][['bus0', 'bus1']]
)
new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0]
logger.info(
f"overwriting country `{country}` of bus `{disconnected_bus}` "
f"to new country `{new_country}`, because it is disconnected "
"from its inital inter-country transmission grid."
)
n.buses.at[disconnected_bus, "country"] = new_country
return n
if algorithm == "hac":
feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature)
n = fix_country_assignment_for_hac(n)
if (algorithm != "hac") and (feature is not None):
logger.warning(f"Keyword argument feature is only valid for algorithm `hac`. "
f"Given feature `{feature}` will be ignored.")
n.determine_network_topology()
n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name)
@ -250,8 +324,10 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
elif algorithm == "louvain":
return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds)
elif algorithm == "hac":
return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index])
else:
raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.")
raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.")
return (n.buses.groupby(['country', 'sub_network'], group_keys=False)
.apply(busmap_for_country).squeeze().rename('busmap'))
@ -259,12 +335,12 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori
def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None,
line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc",
algorithm="kmeans", extended_link_costs=0, focus_weights=None):
algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None):
bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies)
if not isinstance(custom_busmap, pd.Series):
busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm)
busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature)
else:
busmap = custom_busmap
@ -375,10 +451,13 @@ if __name__ == "__main__":
custom_busmap.index = custom_busmap.index.astype(str)
logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}")
cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {})
clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers,
line_length_factor, aggregation_strategies,
snakemake.config['solving']['solver']['name'],
"kmeans", hvac_overhead_cost, focus_weights)
cluster_config.get("algorithm", "hac"),
cluster_config.get("feature", "solar+onwind-time"),
hvac_overhead_cost, focus_weights)
update_p_nom_max(clustering.network)

View File

@ -14,7 +14,8 @@ Relevant Settings
.. code:: yaml
clustering:
simplify:
simplify_network:
cluster_network:
aggregation_strategies:
costs:
@ -364,11 +365,10 @@ def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None):
line_length_factor=1.0,
generator_strategies=generator_strategies,
scale_link_capital_costs=False)
return clustering.network, busmap
def cluster(n, n_clusters, config, aggregation_strategies=dict()):
def cluster(n, n_clusters, config, algorithm="hac", feature=None, aggregation_strategies=dict()):
logger.info(f"Clustering to {n_clusters} buses")
focus_weights = config.get('focus_weights', None)
@ -380,6 +380,7 @@ def cluster(n, n_clusters, config, aggregation_strategies=dict()):
clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False,
aggregation_strategies=aggregation_strategies,
solver_name=config['solving']['solver']['name'],
algorithm=algorithm, feature=feature,
focus_weights=focus_weights)
return clustering.network, clustering.busmap
@ -414,12 +415,29 @@ if __name__ == "__main__":
busmaps = [trafo_map, simplify_links_map, stub_map]
if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False):
cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {})
if cluster_config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False):
n, substation_map = aggregate_to_substations(n, aggregation_strategies)
busmaps.append(substation_map)
# treatment of outliers (nodes without a profile for considered carrier):
# all nodes that have no profile of the given carrier are being aggregated to closest neighbor
if (
snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac" or
cluster_config.get("algorithm", "hac") == "hac"
):
carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+')
for carrier in carriers:
buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus))
logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.')
n, busmap_hac = aggregate_to_substations(n, aggregation_strategies, buses_i)
busmaps.append(busmap_hac)
if snakemake.wildcards.simpl:
n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config, aggregation_strategies)
n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config,
cluster_config.get('algorithm', 'hac'),
cluster_config.get('feature', None),
aggregation_strategies)
busmaps.append(cluster_map)
# some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed

View File

@ -19,8 +19,13 @@ scenario:
countries: ['BE']
clustering:
simplify:
simplify_network:
to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
algorithm: kmeans # choose from: [hac, kmeans]
feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
cluster_network:
algorithm: kmeans
feature: solar+onwind-time
aggregation_strategies:
generators:
p_nom_max: sum # use "min" for more conservative assumptions