determine_availability_matrix_MD_UA: enable parallelism & remove plots (#1170)

* determine_availability_matrix_MD_UA: enable parallelism through temp files and remove plots * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-07-19 19:43:11 +02:00 · 2024-07-19 19:43:11 +02:00 · b49895a041
commit b49895a041
parent ba55971c23
4 changed files with 32 additions and 21 deletions
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@ -31,14 +31,16 @@ Upcoming Release
 * Bugfix: Correctly read in threshold capacity below which to remove components from previous planning horizons in :mod:`add_brownfield`.
-* For countries not contained in the NUTS3-specific datasets (i.e. MD and UA), the mapping of GDP per capita and population per bus region used to spatially distribute electricity demand is now endogenised in a new rule :mod:`build_gdp_ppp_non_nuts3`. https://github.com/PyPSA/pypsa-eur/pull/1146 
+* For countries not contained in the NUTS3-specific datasets (i.e. MD and UA), the mapping of GDP per capita and population per bus region used to spatially distribute electricity demand is now endogenised in a new rule :mod:`build_gdp_ppp_non_nuts3`. https://github.com/PyPSA/pypsa-eur/pull/1146
-* The databundle has been updated to release v0.3.0, which includes raw GDP and population data for countries outside the NUTS system (UA, MD). https://github.com/PyPSA/pypsa-eur/pull/1146 
+* The databundle has been updated to release v0.3.0, which includes raw GDP and population data for countries outside the NUTS system (UA, MD). https://github.com/PyPSA/pypsa-eur/pull/1146
-* Updated filtering in :mod:`determine_availability_matrix_MD_UA.py` to improve speed. https://github.com/PyPSA/pypsa-eur/pull/1146 
+* Updated filtering in :mod:`determine_availability_matrix_MD_UA.py` to improve speed. https://github.com/PyPSA/pypsa-eur/pull/1146
 * Bugfix: Impose minimum value of zero for district heating progress between current and future market share in :mod:`build_district_heat_share`.
 * Enable parallelism in :mod:`determine_availability_matrix_MD_UA.py` and remove plots. This requires the use of temporary files.
 PyPSA-Eur 0.11.0 (25th May 2024)
 =====================================
--- a/rules/build_electricity.smk
+++ b/rules/build_electricity.smk
@ -202,7 +202,6 @@ rule determine_availability_matrix_MD_UA:
        + ".nc",
    output:
        availability_matrix=resources("availability_matrix_MD-UA_{technology}.nc"),
        availability_map=resources("availability_matrix_MD-UA_{technology}.png"),
    log:
        logs("determine_availability_matrix_MD_UA_{technology}.log"),
    threads: config["atlite"].get("nprocesses", 4)
--- a/scripts/build_gdp_pop_non_nuts3.py
+++ b/scripts/build_gdp_pop_non_nuts3.py
@ -3,10 +3,11 @@
 #
 # SPDX-License-Identifier: MIT
 """
-Maps the per-capita GDP and population values to non-NUTS3 regions. The script
+Maps the per-capita GDP and population values to non-NUTS3 regions.
-takes as input the country code, a GeoDataFrame containing the regions, and the
+
-file paths to the datasets containing the GDP and POP values for non-NUTS3
+The script takes as input the country code, a GeoDataFrame containing
-countries.
+the regions, and the file paths to the datasets containing the GDP and
 POP values for non-NUTS3 countries.
 """
 import logging
--- a/scripts/determine_availability_matrix_MD_UA.py
+++ b/scripts/determine_availability_matrix_MD_UA.py
@ -8,16 +8,15 @@ Create land elibility analysis for Ukraine and Moldova with different datasets.
 import functools
 import logging
 import os
 import time
 from tempfile import NamedTemporaryFile
 import atlite
 import fiona
 import geopandas as gpd
 import matplotlib.pyplot as plt
 import numpy as np
 from _helpers import configure_logging, set_scenario_config
 from atlite.gis import shape_availability
 from rasterio.plot import show
 logger = logging.getLogger(__name__)
@ -40,7 +39,7 @@ if __name__ == "__main__":
    configure_logging(snakemake)
    set_scenario_config(snakemake)
-    nprocesses = None  # snakemake.config["atlite"].get("nprocesses")
+    nprocesses = int(snakemake.threads)
    noprogress = not snakemake.config["atlite"].get("show_progress", True)
    config = snakemake.config["renewable"][snakemake.wildcards.technology]
@ -95,8 +94,15 @@ if __name__ == "__main__":
            bbox=regions.geometry,
            layer=layer,
        ).to_crs(3035)
        # temporary file needed for parallelization
        with NamedTemporaryFile(suffix=".geojson", delete=False) as f:
            plg_tmp_fn = f.name
        if not wdpa.empty:
-            excluder.add_geometry(wdpa.geometry)
+            wdpa[["geometry"]].to_file(plg_tmp_fn)
            while not os.path.exists(plg_tmp_fn):
                time.sleep(1)
            excluder.add_geometry(plg_tmp_fn)
        layer = get_wdpa_layer_name(wdpa_fn, "points")
        wdpa_pts = gpd.read_file(
@ -109,8 +115,15 @@ if __name__ == "__main__":
        wdpa_pts = wdpa_pts.set_geometry(
            wdpa_pts["geometry"].buffer(wdpa_pts["buffer_radius"])
        )
        # temporary file needed for parallelization
        with NamedTemporaryFile(suffix=".geojson", delete=False) as f:
            pts_tmp_fn = f.name
        if not wdpa_pts.empty:
-            excluder.add_geometry(wdpa_pts.geometry)
+            wdpa_pts[["geometry"]].to_file(pts_tmp_fn)
            while not os.path.exists(pts_tmp_fn):
                time.sleep(1)
            excluder.add_geometry(pts_tmp_fn)
    if "max_depth" in config:
        # lambda not supported for atlite + multiprocessing
@ -146,13 +159,9 @@ if __name__ == "__main__":
    else:
        availability = cutout.availabilitymatrix(regions, excluder, **kwargs)
-    regions_geometry = regions.to_crs(3035).geometry
+    for fn in [pts_tmp_fn, plg_tmp_fn]:
-    band, transform = shape_availability(regions_geometry, excluder)
+        if os.path.exists(fn):
-    fig, ax = plt.subplots(figsize=(4, 8))
+            os.remove(fn)
    gpd.GeoSeries(regions_geometry.union_all()).plot(ax=ax, color="none")
    show(band, transform=transform, cmap="Greens", ax=ax)
    plt.axis("off")
    plt.savefig(snakemake.output.availability_map, bbox_inches="tight", dpi=500)
    availability = availability.sel(bus=buses)