From b49895a0411533a47803e176ac149e6ea7bba9c4 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 19 Jul 2024 19:43:11 +0200 Subject: [PATCH] determine_availability_matrix_MD_UA: enable parallelism & remove plots (#1170) * determine_availability_matrix_MD_UA: enable parallelism through temp files and remove plots * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- doc/release_notes.rst | 8 +++-- rules/build_electricity.smk | 1 - scripts/build_gdp_pop_non_nuts3.py | 9 ++--- .../determine_availability_matrix_MD_UA.py | 35 ++++++++++++------- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 2cf0fe70..eb29ce4b 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -31,14 +31,16 @@ Upcoming Release * Bugfix: Correctly read in threshold capacity below which to remove components from previous planning horizons in :mod:`add_brownfield`. -* For countries not contained in the NUTS3-specific datasets (i.e. MD and UA), the mapping of GDP per capita and population per bus region used to spatially distribute electricity demand is now endogenised in a new rule :mod:`build_gdp_ppp_non_nuts3`. https://github.com/PyPSA/pypsa-eur/pull/1146 +* For countries not contained in the NUTS3-specific datasets (i.e. MD and UA), the mapping of GDP per capita and population per bus region used to spatially distribute electricity demand is now endogenised in a new rule :mod:`build_gdp_ppp_non_nuts3`. https://github.com/PyPSA/pypsa-eur/pull/1146 -* The databundle has been updated to release v0.3.0, which includes raw GDP and population data for countries outside the NUTS system (UA, MD). https://github.com/PyPSA/pypsa-eur/pull/1146 +* The databundle has been updated to release v0.3.0, which includes raw GDP and population data for countries outside the NUTS system (UA, MD). https://github.com/PyPSA/pypsa-eur/pull/1146 -* Updated filtering in :mod:`determine_availability_matrix_MD_UA.py` to improve speed. https://github.com/PyPSA/pypsa-eur/pull/1146 +* Updated filtering in :mod:`determine_availability_matrix_MD_UA.py` to improve speed. https://github.com/PyPSA/pypsa-eur/pull/1146 * Bugfix: Impose minimum value of zero for district heating progress between current and future market share in :mod:`build_district_heat_share`. +* Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files. + PyPSA-Eur 0.11.0 (25th May 2024) ===================================== diff --git a/rules/build_electricity.smk b/rules/build_electricity.smk index a9ab71ef..18ff8230 100644 --- a/rules/build_electricity.smk +++ b/rules/build_electricity.smk @@ -202,7 +202,6 @@ rule determine_availability_matrix_MD_UA: + ".nc", output: availability_matrix=resources("availability_matrix_MD-UA_{technology}.nc"), - availability_map=resources("availability_matrix_MD-UA_{technology}.png"), log: logs("determine_availability_matrix_MD_UA_{technology}.log"), threads: config["atlite"].get("nprocesses", 4) diff --git a/scripts/build_gdp_pop_non_nuts3.py b/scripts/build_gdp_pop_non_nuts3.py index fad73dfe..d475aec9 100644 --- a/scripts/build_gdp_pop_non_nuts3.py +++ b/scripts/build_gdp_pop_non_nuts3.py @@ -3,10 +3,11 @@ # # SPDX-License-Identifier: MIT """ -Maps the per-capita GDP and population values to non-NUTS3 regions. The script -takes as input the country code, a GeoDataFrame containing the regions, and the -file paths to the datasets containing the GDP and POP values for non-NUTS3 -countries. +Maps the per-capita GDP and population values to non-NUTS3 regions. + +The script takes as input the country code, a GeoDataFrame containing +the regions, and the file paths to the datasets containing the GDP and +POP values for non-NUTS3 countries. """ import logging diff --git a/scripts/determine_availability_matrix_MD_UA.py b/scripts/determine_availability_matrix_MD_UA.py index 2ed11d3c..0e7962ab 100644 --- a/scripts/determine_availability_matrix_MD_UA.py +++ b/scripts/determine_availability_matrix_MD_UA.py @@ -8,16 +8,15 @@ Create land elibility analysis for Ukraine and Moldova with different datasets. import functools import logging +import os import time +from tempfile import NamedTemporaryFile import atlite import fiona import geopandas as gpd -import matplotlib.pyplot as plt import numpy as np from _helpers import configure_logging, set_scenario_config -from atlite.gis import shape_availability -from rasterio.plot import show logger = logging.getLogger(__name__) @@ -40,7 +39,7 @@ if __name__ == "__main__": configure_logging(snakemake) set_scenario_config(snakemake) - nprocesses = None # snakemake.config["atlite"].get("nprocesses") + nprocesses = int(snakemake.threads) noprogress = not snakemake.config["atlite"].get("show_progress", True) config = snakemake.config["renewable"][snakemake.wildcards.technology] @@ -95,8 +94,15 @@ if __name__ == "__main__": bbox=regions.geometry, layer=layer, ).to_crs(3035) + + # temporary file needed for parallelization + with NamedTemporaryFile(suffix=".geojson", delete=False) as f: + plg_tmp_fn = f.name if not wdpa.empty: - excluder.add_geometry(wdpa.geometry) + wdpa[["geometry"]].to_file(plg_tmp_fn) + while not os.path.exists(plg_tmp_fn): + time.sleep(1) + excluder.add_geometry(plg_tmp_fn) layer = get_wdpa_layer_name(wdpa_fn, "points") wdpa_pts = gpd.read_file( @@ -109,8 +115,15 @@ if __name__ == "__main__": wdpa_pts = wdpa_pts.set_geometry( wdpa_pts["geometry"].buffer(wdpa_pts["buffer_radius"]) ) + + # temporary file needed for parallelization + with NamedTemporaryFile(suffix=".geojson", delete=False) as f: + pts_tmp_fn = f.name if not wdpa_pts.empty: - excluder.add_geometry(wdpa_pts.geometry) + wdpa_pts[["geometry"]].to_file(pts_tmp_fn) + while not os.path.exists(pts_tmp_fn): + time.sleep(1) + excluder.add_geometry(pts_tmp_fn) if "max_depth" in config: # lambda not supported for atlite + multiprocessing @@ -146,13 +159,9 @@ if __name__ == "__main__": else: availability = cutout.availabilitymatrix(regions, excluder, **kwargs) - regions_geometry = regions.to_crs(3035).geometry - band, transform = shape_availability(regions_geometry, excluder) - fig, ax = plt.subplots(figsize=(4, 8)) - gpd.GeoSeries(regions_geometry.union_all()).plot(ax=ax, color="none") - show(band, transform=transform, cmap="Greens", ax=ax) - plt.axis("off") - plt.savefig(snakemake.output.availability_map, bbox_inches="tight", dpi=500) + for fn in [pts_tmp_fn, plg_tmp_fn]: + if os.path.exists(fn): + os.remove(fn) availability = availability.sel(bus=buses)