diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d447a56f..c753deab 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,12 +13,13 @@ on: branches: - master pull_request: + branches: + - master schedule: - cron: "0 5 * * TUE" env: - CONDA_CACHE_NUMBER: 1 # Change this value to manually reset the environment cache - DATA_CACHE_NUMBER: 1 + CACHE_NUMBER: 1 # Change this value to manually reset the environment cache jobs: build: @@ -65,26 +66,16 @@ jobs: miniforge-version: latest activate-environment: pypsa-eur use-mamba: true - - - name: Set cache dates - run: | - echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - echo "WEEK=$(date +'%Y%U')" >> $GITHUB_ENV - - - name: Cache data and cutouts folders - uses: actions/cache@v3 - with: - path: | - data - cutouts - key: data-cutouts-${{ env.WEEK }}-${{ env.DATA_CACHE_NUMBER }} + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - name: Create environment cache uses: actions/cache@v2 id: cache with: path: ${{ matrix.prefix }} - key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CONDA_CACHE_NUMBER }} + key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} - name: Update environment due to outdated or unavailable cache run: mamba env update -n pypsa-eur -f envs/environment.yaml diff --git a/Snakefile b/Snakefile index 42ecc45b..7678a401 100644 --- a/Snakefile +++ b/Snakefile @@ -67,12 +67,6 @@ if config['enable'].get('retrieve_databundle', True): script: 'scripts/retrieve_databundle.py' -rule retrieve_natura_data: - input: HTTP.remote("sdi.eea.europa.eu/datashare/s/H6QGCybMdLLnywo/download", additional_request_string="?path=%2FNatura2000_end2020_gpkg&files=Natura2000_end2020.gpkg", static=True) - output: "data/Natura2000_end2020.gpkg" - run: move(input[0], output[0]) - - rule retrieve_load_data: input: HTTP.remote("data.open-power-system-data.org/time_series/2019-06-05/time_series_60min_singleindex.csv", keep_local=True, static=True) output: "data/load_raw.csv" @@ -171,13 +165,28 @@ if config['enable'].get('retrieve_cutout', True): run: move(input[0], output[0]) +if config['enable'].get('build_natura_raster', False): + rule build_natura_raster: + input: + natura="data/bundle/natura/Natura2000_end2015.shp", + cutouts=expand("cutouts/{cutouts}.nc", **config['atlite']) + output: "resources/natura.tiff" + log: "logs/build_natura_raster.log" + script: "scripts/build_natura_raster.py" + + +if config['enable'].get('retrieve_natura_raster', True): + rule retrieve_natura_raster: + input: HTTP.remote("zenodo.org/record/4706686/files/natura.tiff", keep_local=True, static=True) + output: "resources/natura.tiff" + run: move(input[0], output[0]) + + rule build_renewable_profiles: input: base_network="networks/base.nc", corine="data/bundle/corine/g250_clc06_V18_5.tif", - natura=lambda w: ("data/Natura2000_end2020.gpkg" - if config["renewable"][w.technology]["natura"] - else []), + natura="resources/natura.tiff", gebco=lambda w: ("data/bundle/GEBCO_2014_2D.nc" if "max_depth" in config["renewable"][w.technology].keys() else []), diff --git a/config.default.yaml b/config.default.yaml index 971293b7..e1ac6237 100755 --- a/config.default.yaml +++ b/config.default.yaml @@ -20,8 +20,23 @@ scenario: countries: ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'] clustering: - simplify: + simplify_network: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: kmeans # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. + cluster_network: + algorithm: kmeans + feature: solar+onwind-time + aggregation_strategies: + generators: + p_nom_max: sum # use "min" for more conservative assumptions + p_nom_min: sum + p_min_pu: mean + marginal_cost: mean + committable: any + ramp_limit_up: max + ramp_limit_down: max + efficiency: mean snapshots: start: "2013-01-01" @@ -33,6 +48,8 @@ enable: retrieve_databundle: true build_cutout: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false electricity: diff --git a/config.tutorial.yaml b/config.tutorial.yaml index ec72e11c..db20f1a4 100755 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -20,8 +20,23 @@ scenario: countries: ['BE'] clustering: - simplify: + simplify_network: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: kmeans # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. + cluster_network: + algorithm: kmeans + feature: solar+onwind-time + aggregation_strategies: + generators: + p_nom_max: sum # use "min" for more conservative assumptions + p_nom_min: sum + p_min_pu: mean + marginal_cost: mean + committable: any + ramp_limit_up: max + ramp_limit_down: max + efficiency: mean snapshots: start: "2013-03-01" @@ -33,6 +48,8 @@ enable: retrieve_databundle: true build_cutout: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false electricity: @@ -77,7 +94,7 @@ renewable: 24, 25, 26, 27, 28, 29, 31, 32] distance: 1000 distance_grid_codes: [1, 2, 3, 4, 5, 6] - natura: false + natura: true potential: simple # or conservative clip_p_max_pu: 1.e-2 offwind-ac: @@ -88,7 +105,7 @@ renewable: capacity_per_sqkm: 3 # correction_factor: 0.93 corine: [44, 255] - natura: false + natura: true max_shore_distance: 30000 potential: simple # or conservative clip_p_max_pu: 1.e-2 @@ -101,7 +118,7 @@ renewable: capacity_per_sqkm: 3 # correction_factor: 0.93 corine: [44, 255] - natura: false + natura: true min_shore_distance: 30000 potential: simple # or conservative clip_p_max_pu: 1.e-2 @@ -123,7 +140,7 @@ renewable: # correction_factor: 0.854337 corine: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 26, 31, 32] - natura: false + natura: true potential: simple # or conservative clip_p_max_pu: 1.e-2 diff --git a/doc/configtables/clustering.csv b/doc/configtables/clustering.csv index 2f63f955..cf004dad 100644 --- a/doc/configtables/clustering.csv +++ b/doc/configtables/clustering.csv @@ -1,3 +1,13 @@ ,Unit,Values,Description -simplify,,, +simplify_network,,, -- to_substations,bool,"{'true','false'}","Aggregates all nodes without power injection (positive or negative, i.e. demand or generation) to electrically closest ones" +-- algorithm,str,"One of {‘kmeans’, ‘hac’}", +-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", +cluster_network +-- algorithm,str,"One of {‘kmeans’, ‘hac’}", +-- feature,str,"Str in the format ‘carrier1+carrier2+...+carrierN-X’, where CarrierI can be from {‘solar’, ‘onwind’, ‘offwind’, ‘ror’} and X is one of {‘cap’, ‘time’}.", +aggregation_strategies,,, +-- generators,,, +-- -- {key},str,"{key} can be any of the component of the generator (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new generator." +-- buses,,, +-- -- {key},str,"{key} can be any of the component of the bus (str). It’s value can be any that can be converted to pandas.Series using getattr(). For example one of {min, max, sum}.","Aggregates the component according to the given strategy. For example, if sum, then all values within each cluster are summed to represent the new bus." diff --git a/doc/configtables/toplevel.csv b/doc/configtables/toplevel.csv index 8965a0bc..b7f39d05 100644 --- a/doc/configtables/toplevel.csv +++ b/doc/configtables/toplevel.csv @@ -12,4 +12,6 @@ enable,,, -- retrieve_databundle,bool,"{true, false}","Switch to retrieve databundle from zenodo via the rule :mod:`retrieve_databundle` or whether to keep a custom databundle located in the corresponding folder." -- build_cutout,bool,"{true, false}","Switch to enable the building of cutouts via the rule :mod:`build_cutout`." -- retrieve_cutout,bool,"{true, false}","Switch to enable the retrieval of cutouts from zenodo with :mod:`retrieve_cutout`." +-- build_natura_raster,bool,"{true, false}","Switch to enable the creation of the raster ``natura.tiff`` via the rule :mod:`build_natura_raster`." +-- retrieve_natura_raster,bool,"{true, false}","Switch to enable the retrieval of ``natura.tiff`` from zenodo with :mod:`retrieve_natura_raster`." -- custom_busmap,bool,"{true, false}","Switch to enable the use of custom busmaps in rule :mod:`cluster_network`. If activated the rule looks for provided busmaps at ``data/custom_busmap_elec_s{simpl}_{clusters}.csv`` which should have the same format as ``resources/busmap_elec_s{simpl}_{clusters}.csv``, i.e. the index should contain the buses of ``networks/elec_s{simpl}.nc``." diff --git a/doc/preparation.rst b/doc/preparation.rst index 7f42190c..dba5e981 100644 --- a/doc/preparation.rst +++ b/doc/preparation.rst @@ -27,6 +27,7 @@ With these and the externally extracted ENTSO-E online map topology Then the process continues by calculating conventional power plant capacities, potentials, and per-unit availability time series for variable renewable energy carriers and hydro power plants with the following rules: - :mod:`build_powerplants` for today's thermal power plant capacities using `powerplantmatching `_ allocating these to the closest substation for each powerplant, +- :mod:`build_natura_raster` for rasterising NATURA2000 natural protection areas, - :mod:`build_renewable_profiles` for the hourly capacity factors and installation potentials constrained by land-use in each substation's Voronoi cell for PV, onshore and offshore wind, and - :mod:`build_hydro_profile` for the hourly per-unit hydro power availability time series. @@ -40,6 +41,7 @@ together into a detailed PyPSA network stored in ``networks/elec.nc``. preparation/build_shapes preparation/build_load_data preparation/build_cutout + preparation/build_natura_raster preparation/prepare_links_p_nom preparation/base_network preparation/build_bus_regions diff --git a/doc/preparation/build_natura_raster.rst b/doc/preparation/build_natura_raster.rst new file mode 100644 index 00000000..e3ec4364 --- /dev/null +++ b/doc/preparation/build_natura_raster.rst @@ -0,0 +1,39 @@ +.. + SPDX-FileCopyrightText: 2019-2020 The PyPSA-Eur Authors + + SPDX-License-Identifier: CC-BY-4.0 + +.. _natura: + +Rule ``build_natura_raster`` +=============================== + +.. graphviz:: + :align: center + + digraph snakemake_dag { + graph [bgcolor=white, + margin=0, + size="8,5" + ]; + node [fontname=sans, + fontsize=10, + penwidth=2, + shape=box, + style=rounded + ]; + edge [color=grey, + penwidth=2 + ]; + 9 [color="0.22 0.6 0.85", + label=build_renewable_profiles]; + 12 [color="0.31 0.6 0.85", + fillcolor=gray, + label=build_natura_raster, + style=filled]; + 12 -> 9; + } + +| + +.. automodule:: build_natura_raster diff --git a/doc/preparation/build_renewable_profiles.rst b/doc/preparation/build_renewable_profiles.rst index adc4d6ca..27e61583 100644 --- a/doc/preparation/build_renewable_profiles.rst +++ b/doc/preparation/build_renewable_profiles.rst @@ -41,6 +41,9 @@ Rule ``build_renewable_profiles`` 8 [color="0.00 0.6 0.85", label=build_shapes]; 8 -> 9; + 12 [color="0.31 0.6 0.85", + label=build_natura_raster]; + 12 -> 9; 13 [color="0.56 0.6 0.85", label=build_cutout]; 13 -> 9; diff --git a/doc/preparation/retrieve.rst b/doc/preparation/retrieve.rst index 21187924..42479284 100644 --- a/doc/preparation/retrieve.rst +++ b/doc/preparation/retrieve.rst @@ -50,3 +50,30 @@ The :ref:`tutorial` uses a smaller cutout than required for the full model (30 M .. seealso:: For details see :mod:`build_cutout` and read the `atlite documentation `_. + + +Rule ``retrieve_natura_raster`` +------------------------------- + +.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4706686.svg + :target: https://doi.org/10.5281/zenodo.4706686 + +This rule, as a substitute for :mod:`build_natura_raster`, downloads an already rasterized version (`natura.tiff `_) of `Natura 2000 `_ natural protection areas to reduce computation times. The file is placed into the ``resources`` sub-directory. + +**Relevant Settings** + +.. code:: yaml + + enable: + build_natura_raster: + +.. seealso:: + Documentation of the configuration file ``config.yaml`` at + :ref:`toplevel_cf` + +**Outputs** + +- ``resources/natura.tiff``: Rasterized version of `Natura 2000 `_ natural protection areas to reduce computation times. + +.. seealso:: + For details see :mod:`build_natura_raster`. diff --git a/doc/release_notes.rst b/doc/release_notes.rst index e05f3d1f..d1e68c48 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -64,11 +64,6 @@ Upcoming Release * New network topology extracted from the ENTSO-E interactive map. -* Remove rules to build or retrieve rasterized NATURA 2000 dataset. Renewable potential calculation now directly uses the shapefiles. - -* Cache data and cutouts folders. This cache will be updated weekly. - -* Add rule to automatically retrieve Natura2000 natural protection areas. Switch of file format to GPKG. * The unused argument ``simple_hvdc_costs`` in :mod:`add_electricity` was removed. * Iterative solving with impedance updates is skipped if there are no expandable lines. @@ -81,6 +76,9 @@ Upcoming Release * Update rasterio version to correctly calculate exclusion raster +* Clustering strategies for generators and buses have moved from distinct scripts to configurables to unify the process and make it more transparent. + +* Hierarchical clustering was introduced. Distance metric is calculated from renewable potentials on hourly (feature entry ends with `-time`) or annual (feature entry in config end with `-cap`) values. PyPSA-Eur 0.4.0 (22th September 2021) ===================================== diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 1c247f8e..c37abb39 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -35,8 +35,8 @@ To run the tutorial, use this as your configuration file ``config.yaml``. .../pypsa-eur % cp config.tutorial.yaml config.yaml -This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle` -and :mod:`retrieve_cutout` totalling at less than 250 MB. +This configuration is set to download a reduced data set via the rules :mod:`retrieve_databundle`, +:mod:`retrieve_natura_raster`, :mod:`retrieve_cutout` totalling at less than 250 MB. The full set of data dependencies would consume 5.3 GB. For more information on the data dependencies of PyPSA-Eur, continue reading :ref:`data`. diff --git a/envs/environment.yaml b/envs/environment.yaml index c7bc1312..be0a4651 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -10,7 +10,7 @@ dependencies: - python>=3.8 - pip - - pypsa>=0.18.1 + - pypsa>=0.19.1 - atlite>=0.2.6 - dask @@ -45,7 +45,6 @@ dependencies: # GIS dependencies: - cartopy - descartes - - fiona # explicit for Windows - rasterio<=1.2.9 # 1.2.10 creates error https://github.com/PyPSA/atlite/issues/238 # PyPSA-Eur-Sec Dependencies diff --git a/scripts/_helpers.py b/scripts/_helpers.py index d77266d8..dc638915 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -212,6 +212,22 @@ def progress_retrieve(url, file): urllib.request.urlretrieve(url, file, reporthook=dlProgress) +def get_aggregation_strategies(aggregation_strategies): + # default aggregation strategies that cannot be defined in .yaml format must be specified within + # the function, otherwise (when defaults are passed in the function's definition) they get lost + # when custom values are specified in the config. + + import numpy as np + from pypsa.networkclustering import _make_consense + + bus_strategies = dict(country=_make_consense("Bus", "country")) + bus_strategies.update(aggregation_strategies.get("buses", {})) + + generator_strategies = {'build_year': lambda x: 0, 'lifetime': lambda x: np.inf} + generator_strategies.update(aggregation_strategies.get("generators", {})) + + return bus_strategies, generator_strategies + def mock_snakemake(rulename, **wildcards): """ diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py new file mode 100644 index 00000000..7fa9d544 --- /dev/null +++ b/scripts/build_natura_raster.py @@ -0,0 +1,89 @@ +# SPDX-FileCopyrightText: : 2017-2020 The PyPSA-Eur Authors +# +# SPDX-License-Identifier: MIT + +""" +Rasters the vector data of the `Natura 2000 `_ natural protection areas onto all cutout regions. + +Relevant Settings +----------------- + +.. code:: yaml + + renewable: + {technology}: + cutout: + +.. seealso:: + Documentation of the configuration file ``config.yaml`` at + :ref:`renewable_cf` + +Inputs +------ + +- ``data/bundle/natura/Natura2000_end2015.shp``: `Natura 2000 `_ natural protection areas. + + .. image:: ../img/natura.png + :scale: 33 % + +Outputs +------- + +- ``resources/natura.tiff``: Rasterized version of `Natura 2000 `_ natural protection areas to reduce computation times. + + .. image:: ../img/natura.png + :scale: 33 % + +Description +----------- + +""" + +import logging +from _helpers import configure_logging + +import atlite +import geopandas as gpd +import rasterio as rio +from rasterio.features import geometry_mask +from rasterio.warp import transform_bounds + +logger = logging.getLogger(__name__) + + +def determine_cutout_xXyY(cutout_name): + cutout = atlite.Cutout(cutout_name) + assert cutout.crs.to_epsg() == 4326 + x, X, y, Y = cutout.extent + dx, dy = cutout.dx, cutout.dy + return [x - dx/2., X + dx/2., y - dy/2., Y + dy/2.] + + +def get_transform_and_shape(bounds, res): + left, bottom = [(b // res)* res for b in bounds[:2]] + right, top = [(b // res + 1) * res for b in bounds[2:]] + shape = int((top - bottom) // res), int((right - left) / res) + transform = rio.Affine(res, 0, left, 0, -res, top) + return transform, shape + + +if __name__ == "__main__": + if 'snakemake' not in globals(): + from _helpers import mock_snakemake + snakemake = mock_snakemake('build_natura_raster') + configure_logging(snakemake) + + cutouts = snakemake.input.cutouts + xs, Xs, ys, Ys = zip(*(determine_cutout_xXyY(cutout) for cutout in cutouts)) + bounds = transform_bounds(4326, 3035, min(xs), min(ys), max(Xs), max(Ys)) + transform, out_shape = get_transform_and_shape(bounds, res=100) + + # adjusted boundaries + shapes = gpd.read_file(snakemake.input.natura).to_crs(3035) + raster = ~geometry_mask(shapes.geometry, out_shape[::-1], transform) + raster = raster.astype(rio.uint8) + + with rio.open(snakemake.output[0], 'w', driver='GTiff', dtype=rio.uint8, + count=1, transform=transform, crs=3035, compress='lzw', + width=raster.shape[1], height=raster.shape[0]) as dst: + dst.write(raster, indexes=1) diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index 8224ee31..5db87c78 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -231,9 +231,7 @@ if __name__ == '__main__': excluder = atlite.ExclusionContainer(crs=3035, res=100) if config['natura']: - mask = regions.to_crs(3035).buffer(0) # buffer to avoid invalid geometry - natura = gpd.read_file(snakemake.input.natura, mask=mask) - excluder.add_geometry(natura.geometry) + excluder.add_raster(snakemake.input.natura, nodata=0, allow_no_overlap=True) corine = config.get("corine", {}) if "grid_codes" in corine: diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py index 7a4daaee..05298c84 100644 --- a/scripts/cluster_network.py +++ b/scripts/cluster_network.py @@ -11,11 +11,11 @@ Relevant Settings .. code:: yaml - focus_weights: + clustering: + cluster_network: + aggregation_strategies: - renewable: (keys) - {technology}: - potential: + focus_weights: solving: solver: @@ -122,7 +122,7 @@ Exemplary unsolved network clustered to 37 nodes: """ import logging -from _helpers import configure_logging, update_p_nom_max, REGION_COLS +from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies, REGION_COLS import pypsa import os @@ -138,7 +138,7 @@ import seaborn as sns from functools import reduce from pypsa.networkclustering import (busmap_by_kmeans, busmap_by_spectral_clustering, - _make_consense, get_clustering_from_busmap) + busmap_by_hac, _make_consense, get_clustering_from_busmap) import warnings warnings.filterwarnings(action='ignore', category=UserWarning) @@ -173,6 +173,42 @@ def weighting_for_country(n, x): return (w * (100. / w.max())).clip(lower=1.).astype(int) +def get_feature_for_hac(n, buses_i=None, feature=None): + + if buses_i is None: + buses_i = n.buses.index + + if feature is None: + feature = "solar+onwind-time" + + carriers = feature.split('-')[0].split('+') + if "offwind" in carriers: + carriers.remove("offwind") + carriers = np.append(carriers, network.generators.carrier.filter(like='offwind').unique()) + + if feature.split('-')[1] == 'cap': + feature_data = pd.DataFrame(index=buses_i, columns=carriers) + for carrier in carriers: + gen_i = n.generators.query("carrier == @carrier").index + attach = n.generators_t.p_max_pu[gen_i].mean().rename(index = n.generators.loc[gen_i].bus) + feature_data[carrier] = attach + + if feature.split('-')[1] == 'time': + feature_data = pd.DataFrame(columns=buses_i) + for carrier in carriers: + gen_i = n.generators.query("carrier == @carrier").index + attach = n.generators_t.p_max_pu[gen_i].rename(columns = n.generators.loc[gen_i].bus) + feature_data = pd.concat([feature_data, attach], axis=0)[buses_i] + + feature_data = feature_data.T + # timestamp raises error in sklearn >= v1.2: + feature_data.columns = feature_data.columns.astype(str) + + feature_data = feature_data.fillna(0) + + return feature_data + + def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"): """Determine the number of clusters per country""" @@ -221,13 +257,50 @@ def distribute_clusters(n, n_clusters, focus_weights=None, solver_name="cbc"): return pd.Series(m.n.get_values(), index=L.index).round().astype(int) -def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", **algorithm_kwds): +def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algorithm="kmeans", feature=None, **algorithm_kwds): if algorithm == "kmeans": algorithm_kwds.setdefault('n_init', 1000) algorithm_kwds.setdefault('max_iter', 30000) algorithm_kwds.setdefault('tol', 1e-6) algorithm_kwds.setdefault('random_state', 0) + def fix_country_assignment_for_hac(n): + from scipy.sparse import csgraph + + # overwrite country of nodes that are disconnected from their country-topology + for country in n.buses.country.unique(): + m = n[n.buses.country ==country].copy() + + _, labels = csgraph.connected_components(m.adjacency_matrix(), directed=False) + + component = pd.Series(labels, index=m.buses.index) + component_sizes = component.value_counts() + + if len(component_sizes)>1: + disconnected_bus = component[component==component_sizes.index[-1]].index[0] + + neighbor_bus = ( + n.lines.query("bus0 == @disconnected_bus or bus1 == @disconnected_bus") + .iloc[0][['bus0', 'bus1']] + ) + new_country = list(set(n.buses.loc[neighbor_bus].country)-set([country]))[0] + + logger.info( + f"overwriting country `{country}` of bus `{disconnected_bus}` " + f"to new country `{new_country}`, because it is disconnected " + "from its inital inter-country transmission grid." + ) + n.buses.at[disconnected_bus, "country"] = new_country + return n + + if algorithm == "hac": + feature = get_feature_for_hac(n, buses_i=n.buses.index, feature=feature) + n = fix_country_assignment_for_hac(n) + + if (algorithm != "hac") and (feature is not None): + logger.warning(f"Keyword argument feature is only valid for algorithm `hac`. " + f"Given feature `{feature}` will be ignored.") + n.determine_network_topology() n_clusters = distribute_clusters(n, n_clusters, focus_weights=focus_weights, solver_name=solver_name) @@ -251,44 +324,34 @@ def busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights=None, algori return prefix + busmap_by_spectral_clustering(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) elif algorithm == "louvain": return prefix + busmap_by_louvain(reduce_network(n, x), n_clusters[x.name], **algorithm_kwds) + elif algorithm == "hac": + return prefix + busmap_by_hac(n, n_clusters[x.name], buses_i=x.index, feature=feature.loc[x.index]) else: - raise ValueError(f"`algorithm` must be one of 'kmeans', 'spectral' or 'louvain'. Is {algorithm}.") + raise ValueError(f"`algorithm` must be one of 'kmeans', 'hac', 'spectral' or 'louvain'. Is {algorithm}.") return (n.buses.groupby(['country', 'sub_network'], group_keys=False) .apply(busmap_for_country).squeeze().rename('busmap')) def clustering_for_n_clusters(n, n_clusters, custom_busmap=False, aggregate_carriers=None, - line_length_factor=1.25, potential_mode='simple', solver_name="cbc", - algorithm="kmeans", extended_link_costs=0, focus_weights=None): + line_length_factor=1.25, aggregation_strategies=dict(), solver_name="cbc", + algorithm="hac", feature=None, extended_link_costs=0, focus_weights=None): - if potential_mode == 'simple': - p_nom_max_strategy = pd.Series.sum - elif potential_mode == 'conservative': - p_nom_max_strategy = pd.Series.min - else: - raise AttributeError(f"potential_mode should be one of 'simple' or 'conservative' but is '{potential_mode}'") + bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies) if not isinstance(custom_busmap, pd.Series): - busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm) + busmap = busmap_for_n_clusters(n, n_clusters, solver_name, focus_weights, algorithm, feature) else: busmap = custom_busmap clustering = get_clustering_from_busmap( n, busmap, - bus_strategies=dict(country=_make_consense("Bus", "country")), + bus_strategies=bus_strategies, aggregate_generators_weighted=True, aggregate_generators_carriers=aggregate_carriers, aggregate_one_ports=["Load", "StorageUnit"], line_length_factor=line_length_factor, - generator_strategies={'p_nom_max': p_nom_max_strategy, - 'p_nom_min': pd.Series.sum, - 'p_min_pu': pd.Series.mean, - 'marginal_cost': pd.Series.mean, - 'committable': np.any, - 'ramp_limit_up': pd.Series.max, - 'ramp_limit_down': pd.Series.max, - }, + generator_strategies=generator_strategies, scale_link_capital_costs=False) if not n.links.empty: @@ -369,21 +432,29 @@ if __name__ == "__main__": "The `potential` configuration option must agree for all renewable carriers, for now!" ) return v - potential_mode = consense(pd.Series([snakemake.config['renewable'][tech]['potential'] - for tech in renewable_carriers])) + aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) + # translate str entries of aggregation_strategies to pd.Series functions: + aggregation_strategies = { + p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()} + for p in aggregation_strategies.keys() + } + custom_busmap = snakemake.config["enable"].get("custom_busmap", False) if custom_busmap: custom_busmap = pd.read_csv(snakemake.input.custom_busmap, index_col=0, squeeze=True) custom_busmap.index = custom_busmap.index.astype(str) logger.info(f"Imported custom busmap from {snakemake.input.custom_busmap}") + cluster_config = snakemake.config.get('clustering', {}).get('cluster_network', {}) clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap, aggregate_carriers, - line_length_factor, potential_mode, + line_length_factor, aggregation_strategies, snakemake.config['solving']['solver']['name'], - "kmeans", hvac_overhead_cost, focus_weights) + cluster_config.get("algorithm", "hac"), + cluster_config.get("feature", "solar+onwind-time"), + hvac_overhead_cost, focus_weights) + + update_p_nom_max(clustering.network) - update_p_nom_max(n) - clustering.network.export_to_netcdf(snakemake.output.network) for attr in ('busmap', 'linemap'): #also available: linemap_positive, linemap_negative getattr(clustering, attr).to_csv(snakemake.output[attr]) diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py index 287dfe32..0b31e5c6 100644 --- a/scripts/simplify_network.py +++ b/scripts/simplify_network.py @@ -13,6 +13,11 @@ Relevant Settings .. code:: yaml + clustering: + simplify_network: + cluster_network: + aggregation_strategies: + costs: USD2013_to_EUR2013: discountrate: @@ -22,10 +27,6 @@ Relevant Settings electricity: max_hours: - renewables: (keys) - {technology}: - potential: - lines: length_factor: @@ -83,7 +84,7 @@ The rule :mod:`simplify_network` does up to four things: """ import logging -from _helpers import configure_logging, update_p_nom_max +from _helpers import configure_logging, update_p_nom_max, get_aggregation_strategies from cluster_network import clustering_for_n_clusters, cluster_regions from add_electricity import load_costs @@ -189,7 +190,10 @@ def _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, out -def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, aggregate_one_ports={"Load", "StorageUnit"}): +def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, + aggregate_one_ports={"Load", "StorageUnit"}, + aggregation_strategies=dict()): + def replace_components(n, c, df, pnl): n.mremove(c, n.df(c).index) @@ -200,7 +204,11 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a _adjust_capital_costs_using_connection_costs(n, connection_costs_to_bus, output) - generators, generators_pnl = aggregategenerators(n, busmap, custom_strategies={'p_nom_min': np.sum}) + _, generator_strategies = get_aggregation_strategies(aggregation_strategies) + + generators, generators_pnl = aggregategenerators( + n, busmap, custom_strategies=generator_strategies + ) replace_components(n, "Generator", generators, generators_pnl) for one_port in aggregate_one_ports: @@ -214,7 +222,7 @@ def _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, a n.mremove(c, df.index[df.bus0.isin(buses_to_del) | df.bus1.isin(buses_to_del)]) -def simplify_links(n, costs, config, output): +def simplify_links(n, costs, config, output, aggregation_strategies=dict()): ## Complex multi-node links are folded into end-points logger.info("Simplifying connected link components") @@ -306,21 +314,23 @@ def simplify_links(n, costs, config, output): logger.debug("Collecting all components using the busmap") - _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output) + _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, + aggregation_strategies=aggregation_strategies) return n, busmap -def remove_stubs(n, costs, config, output): +def remove_stubs(n, costs, config, output, aggregation_strategies=dict()): logger.info("Removing stubs") busmap = busmap_by_stubs(n) # ['country']) connection_costs_to_bus = _compute_connection_costs_to_bus(n, busmap, costs, config) - _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output) + _aggregate_and_move_components(n, busmap, connection_costs_to_bus, output, + aggregation_strategies=aggregation_strategies) return n, busmap -def aggregate_to_substations(n, buses_i=None): +def aggregate_to_substations(n, aggregation_strategies=dict(), buses_i=None): # can be used to aggregate a selection of buses to electrically closest neighbors # if no buses are given, nodes that are no substations or without offshore connection are aggregated @@ -345,19 +355,20 @@ def aggregate_to_substations(n, buses_i=None): busmap = n.buses.index.to_series() busmap.loc[buses_i] = dist.idxmin(1) + bus_strategies, generator_strategies = get_aggregation_strategies(aggregation_strategies) + clustering = get_clustering_from_busmap(n, busmap, - bus_strategies=dict(country=_make_consense("Bus", "country")), + bus_strategies=bus_strategies, aggregate_generators_weighted=True, aggregate_generators_carriers=None, aggregate_one_ports=["Load", "StorageUnit"], line_length_factor=1.0, - generator_strategies={'p_nom_max': 'sum'}, + generator_strategies=generator_strategies, scale_link_capital_costs=False) - return clustering.network, busmap -def cluster(n, n_clusters, config): +def cluster(n, n_clusters, config, algorithm="hac", feature=None, aggregation_strategies=dict()): logger.info(f"Clustering to {n_clusters} buses") focus_weights = config.get('focus_weights', None) @@ -365,17 +376,11 @@ def cluster(n, n_clusters, config): renewable_carriers = pd.Index([tech for tech in n.generators.carrier.unique() if tech.split('-', 2)[0] in config['renewable']]) - def consense(x): - v = x.iat[0] - assert ((x == v).all() or x.isnull().all()), ( - "The `potential` configuration option must agree for all renewable carriers, for now!" - ) - return v - potential_mode = (consense(pd.Series([config['renewable'][tech]['potential'] - for tech in renewable_carriers])) - if len(renewable_carriers) > 0 else 'conservative') - clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, potential_mode=potential_mode, + + clustering = clustering_for_n_clusters(n, n_clusters, custom_busmap=False, + aggregation_strategies=aggregation_strategies, solver_name=config['solving']['solver']['name'], + algorithm=algorithm, feature=feature, focus_weights=focus_weights) return clustering.network, clustering.busmap @@ -389,24 +394,50 @@ if __name__ == "__main__": n = pypsa.Network(snakemake.input.network) + aggregation_strategies = snakemake.config["clustering"].get("aggregation_strategies", {}) + # translate str entries of aggregation_strategies to pd.Series functions: + aggregation_strategies = { + p: {k: getattr(pd.Series, v) for k,v in aggregation_strategies[p].items()} + for p in aggregation_strategies.keys() + } + n, trafo_map = simplify_network_to_380(n) Nyears = n.snapshot_weightings.objective.sum() / 8760 technology_costs = load_costs(snakemake.input.tech_costs, snakemake.config['costs'], snakemake.config['electricity'], Nyears) - n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output) + n, simplify_links_map = simplify_links(n, technology_costs, snakemake.config, snakemake.output, + aggregation_strategies) - n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output) + n, stub_map = remove_stubs(n, technology_costs, snakemake.config, snakemake.output, + aggregation_strategies=aggregation_strategies) busmaps = [trafo_map, simplify_links_map, stub_map] - if snakemake.config.get('clustering', {}).get('simplify', {}).get('to_substations', False): - n, substation_map = aggregate_to_substations(n) + cluster_config = snakemake.config.get('clustering', {}).get('simplify_network', {}) + if cluster_config.get('clustering', {}).get('simplify_network', {}).get('to_substations', False): + n, substation_map = aggregate_to_substations(n, aggregation_strategies) busmaps.append(substation_map) + # treatment of outliers (nodes without a profile for considered carrier): + # all nodes that have no profile of the given carrier are being aggregated to closest neighbor + if ( + snakemake.config.get("clustering", {}).get("cluster_network", {}).get("algorithm", "hac") == "hac" or + cluster_config.get("algorithm", "hac") == "hac" + ): + carriers = cluster_config.get("feature", "solar+onwind-time").split('-')[0].split('+') + for carrier in carriers: + buses_i = list(set(n.buses.index)-set(n.generators.query("carrier == @carrier").bus)) + logger.info(f'clustering preparaton (hac): aggregating {len(buses_i)} buses of type {carrier}.') + n, busmap_hac = aggregate_to_substations(n, aggregation_strategies, buses_i) + busmaps.append(busmap_hac) + if snakemake.wildcards.simpl: - n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config) + n, cluster_map = cluster(n, int(snakemake.wildcards.simpl), snakemake.config, + cluster_config.get('algorithm', 'hac'), + cluster_config.get('feature', None), + aggregation_strategies) busmaps.append(cluster_map) # some entries in n.buses are not updated in previous functions, therefore can be wrong. as they are not needed diff --git a/test/config.test1.yaml b/test/config.test1.yaml index 0fd7623e..5aecfdf2 100755 --- a/test/config.test1.yaml +++ b/test/config.test1.yaml @@ -19,8 +19,23 @@ scenario: countries: ['BE'] clustering: - simplify: + simplify_network: to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections) + algorithm: kmeans # choose from: [hac, kmeans] + feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc. + cluster_network: + algorithm: kmeans + feature: solar+onwind-time + aggregation_strategies: + generators: + p_nom_max: sum # use "min" for more conservative assumptions + p_nom_min: sum + p_min_pu: mean + marginal_cost: mean + committable: any + ramp_limit_up: max + ramp_limit_down: max + efficiency: mean snapshots: start: "2013-03-01" @@ -32,6 +47,8 @@ enable: retrieve_databundle: true build_cutout: false retrieve_cutout: true + build_natura_raster: false + retrieve_natura_raster: true custom_busmap: false electricity: