scenario-management: use list for including wildcards with shared resource

This commit is contained in:
Fabian 2023-08-24 13:17:44 +02:00
parent b91a7b9c51
commit 04a2bea176
5 changed files with 71 additions and 39 deletions

View File

@ -25,10 +25,10 @@ COSTS = f"data/costs_{config['costs']['year']}.csv"
ATLITE_NPROCESSES = config["atlite"].get("nprocesses", 4)
run = config["run"]
scenario = run.get("scenario", {})
scenarios = run.get("scenarios", {})
if run["name"]:
if scenario.get("enable"):
fn = Path(scenario["file"])
if scenarios.get("enable"):
fn = Path(scenarios["file"])
scenarios = yaml.safe_load(fn.read_text())
RDIR = "{run}/"
else:

View File

@ -8,7 +8,7 @@ run:
name:
- test-elec-no-offshore-wind
- test-elec-no-onshore-wind
scenario:
scenarios:
enable: true
file: "config/test/scenarios.electricity.yaml"
disable_progressbar: true

View File

@ -1,8 +1,8 @@
,Unit,Values,Description
name,--,str/list,"Specify a name for your run. Results will be stored under this name. If ``scenario: enable`` is set to ``true``, the name must contain a subset of scenario names defined in ``scenario: file``."
scenario,,,
scenarios,,,
-- enable,bool,"{true, false}","Switch to select whether workflow should generate scenarios based on ``file``."
-- file,str,,Path to the scenario yaml file. The scenario file contains config overrides for each scenario. In order to be taken account, ``run:scenarios`` has to be set to ``true`` and ``run:name`` has to be a subset of top level keys given in the scenario file. In order to automatically create a `scenario.yaml` file based on a combindation of settings, alter and use the ``create_scenarios.py`` script in ``scripts``.
disable_progrssbar,bool,"{true, false}","Switch to select whether progressbar should be disabled."
shared_resources,bool/str,,"Switch to select whether resources should be shared across runs. If a string is passed, it is assumed to be a wildcard or 'base' that indicates the cutoff after which resources are no longer shared. If 'base' is passed, resources before creating the elec.nc file are shared."
shared_resources,bool/str/list,,"Switch to select whether resources should be shared across runs. If a string or list is passed, it is assumed to be wildcard(s) which indicates up to which set of wildcards the resource folder should be shared. If set to 'base', only resources before creating the elec.nc file are shared."
shared_cutouts,bool,"{true, false}","Switch to select whether cutouts should be shared across runs."

Can't render this file because it has a wrong number of fields in line 5.

View File

@ -58,7 +58,7 @@ def config_provider(*keys, default=None):
my_param=config_provider("key1", "key2", default="some_default_value")
"""
# Using functools.partial to freeze certain arguments in our getter functions.
if config["run"].get("scenarios", False):
if config["run"].get("scenarios", {}).get("enable", False):
return partial(dynamic_getter, keys=keys, default=default)
else:
return partial(static_getter, keys=keys, default=default)

View File

@ -8,6 +8,7 @@ import logging
import os
import re
import urllib
from functools import partial
from pathlib import Path
import pandas as pd
@ -21,41 +22,72 @@ logger = logging.getLogger(__name__)
REGION_COLS = ["geometry", "name", "x", "y", "country"]
def get_run_path(fn, dir, rdir, shared_resources):
"""
Dynamically provide paths based on shared resources and filename.
Use this function for snakemake rule inputs or outputs that should be
optionally shared across runs or created individually for each run.
Parameters
----------
fn : str
The filename for the path to be generated.
dir : str
The base directory.
rdir : str
Relative directory for non-shared resources.
shared_resources : str, list, or bool
Specifies which resources should be shared.
- If string or list, assumed to be superset of wildcards for sharing.
- If "base", special handling for shared "base" resources.
- If boolean, directly specifies if the resource is shared.
Returns
-------
str
Full path where the resource should be stored.
Notes
-----
Special case for "base" allows no wildcards other than
"technology" and excludes filenames starting with "networks/elec" or
"add_electricity".
"""
pattern = r"\{([^{}]+)\}"
existing_wildcards = list(re.findall(pattern, fn))
if shared_resources == "base":
# special case for shared "base" resources
no_relevant_wildcards = not len(set(existing_wildcards) - {"technology"})
no_elec_rule = not fn.startswith("networks/elec") and not fn.startswith(
"add_electricity"
)
is_shared = no_relevant_wildcards and no_elec_rule
elif isinstance(shared_resources, (str, list)):
if isinstance(shared_resources, str):
shared_resources = [shared_resources]
is_shared = set(existing_wildcards).issubset(shared_resources)
else:
is_shared = shared_resources
if is_shared:
return f"{dir}{fn}"
else:
return f"{dir}{rdir}{fn}"
def path_provider(dir, rdir, shared_resources):
"""
Dynamically provide paths based on shared resources.
Returns a partial function that dynamically provides paths based on shared
resources and the filename.
Use this function whenever there is an input or output to a
snakemake rule that should, optionally, be either shared across runs
or created individually for each run. If shared_resources is a
string, it is assumed to be the wildcard that indicates the cutoff
after which resources are no longer shared. The function returns a
function which takes a filename and returns a path that is either
shared or individual to each run.
Returns
-------
partial function
A partial function that takes a filename as input and
returns the path to the file based on the shared_resources parameter.
"""
def path(fn):
pattern = r"\{([^{}]+)\}"
existing_wildcards = list(re.findall(pattern, fn))
if shared_resources == "base":
# special case for shared "base" resources
no_relevant_wildcards = not len(set(existing_wildcards) - {"technology"})
no_elec_rule = not fn.startswith("networks/elec") and not fn.startswith(
"add_electricity"
)
is_shared = no_relevant_wildcards and no_elec_rule
elif isinstance(shared_resources, str):
final_wildcard = shared_resources
is_shared = final_wildcard not in existing_wildcards[:-1]
else:
is_shared = shared_resources
if is_shared:
return f"{dir}{fn}"
else:
return f"{dir}{rdir}{fn}"
return path
return partial(get_run_path, dir=dir, rdir=rdir, shared_resources=shared_resources)
# Define a context manager to temporarily mute print statements
@ -67,7 +99,7 @@ def mute_print():
def set_scenario_config(snakemake):
scenario = snakemake.config["run"].get("scenario", {})
scenario = snakemake.config["run"].get("scenarios", {})
if scenario.get("enable") and "run" in snakemake.wildcards.keys():
try:
with open(scenario["file"], "r") as f: