diff --git a/scripts/build_biomass_potentials.py b/scripts/build_biomass_potentials.py index c80c6b46..69b886fc 100644 --- a/scripts/build_biomass_potentials.py +++ b/scripts/build_biomass_potentials.py @@ -196,7 +196,7 @@ def convert_nuts2_to_regions(bio_nuts2, regions): if __name__ == "__main__": if 'snakemake' not in globals(): from helper import mock_snakemake - snakemake = mock_snakemake('build_biomass_potentials') + snakemake = mock_snakemake('build_biomass_potentials', simpl='', clusters='5') config = snakemake.config['biomass'] year = config["year"] diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index 38a9895c..7ff5d216 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -1,6 +1,6 @@ from functools import partial from tqdm import tqdm -from helper import mute +from helper import mute_print import multiprocessing as mp import pandas as pd @@ -9,7 +9,6 @@ import numpy as np idx = pd.IndexSlice -mute() def cartesian(s1, s2): """Cartesian product of two pd.Series""" @@ -137,12 +136,13 @@ def build_eurostat(input_eurostat, countries, report_year, year): 2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx" } - dfs = pd.read_excel( - input_eurostat + filenames[report_year], - sheet_name=None, - skiprows=1, - index_col=list(range(4)), - ) + with mute_print(): + dfs = pd.read_excel( + input_eurostat + filenames[report_year], + sheet_name=None, + skiprows=1, + index_col=list(range(4)), + ) # sorted_index necessary for slicing lookup = eurostat_country_to_alpha2 @@ -379,11 +379,13 @@ def idees_per_country(ct, year): def build_idees(countries, year): nprocesses = snakemake.threads + func = partial(idees_per_country, year=year) tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries), desc='Build from IDEES database') - with mp.Pool(processes=nprocesses, initializer=mute) as pool: - totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) + with mute_print(): + with mp.Pool(processes=nprocesses) as pool: + totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) totals = pd.concat(totals_list, axis=1) @@ -568,7 +570,7 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"): # https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16 # downloaded 201228 (modified by EEA last on 201221) - df = pd.read_csv(input_co2, encoding="latin-1") + df = pd.read_csv(input_co2, encoding="latin-1", low_memory=False) df.replace(dict(Year="1985-1987"), 1986, inplace=True) df.Year = df.Year.astype(int) diff --git a/scripts/build_industrial_production_per_country.py b/scripts/build_industrial_production_per_country.py index c88919ab..a00e41ab 100644 --- a/scripts/build_industrial_production_per_country.py +++ b/scripts/build_industrial_production_per_country.py @@ -4,7 +4,7 @@ import pandas as pd import numpy as np import multiprocessing as mp from tqdm import tqdm -from helper import mute +from helper import mute_print tj_to_ktoe = 0.0238845 ktoe_to_twh = 0.01163 @@ -113,15 +113,17 @@ def get_energy_ratio(country): else: # estimate physical output, energy consumption in the sector and country fn = f"{eurostat_dir}/{eb_names[country]}.XLSX" - df = pd.read_excel(fn, sheet_name='2016', index_col=2, - header=0, skiprows=1).squeeze('columns') + with mute_print(): + df = pd.read_excel(fn, sheet_name='2016', index_col=2, + header=0, skiprows=1).squeeze('columns') e_country = df.loc[eb_sectors.keys( ), 'Total all products'].rename(eb_sectors) fn = f'{jrc_dir}/JRC-IDEES-2015_Industry_EU28.xlsx' - df = pd.read_excel(fn, sheet_name='Ind_Summary', - index_col=0, header=0).squeeze('columns') + with mute_print(): + df = pd.read_excel(fn, sheet_name='Ind_Summary', + index_col=0, header=0).squeeze('columns') assert df.index[48] == "by sector" year_i = df.columns.get_loc(year) @@ -140,8 +142,9 @@ def industry_production_per_country(country): jrc_country = jrc_names.get(country, country) fn = f'{jrc_dir}/JRC-IDEES-2015_Industry_{jrc_country}.xlsx' sheet = sub_sheet_name_dict[sector] - df = pd.read_excel(fn, sheet_name=sheet, - index_col=0, header=0).squeeze('columns') + with mute_print(): + df = pd.read_excel(fn, sheet_name=sheet, + index_col=0, header=0).squeeze('columns') year_i = df.columns.get_loc(year) df = df.iloc[find_physical_output(df), year_i] @@ -168,7 +171,7 @@ def industry_production(countries): func = industry_production_per_country tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries), desc="Build industry production") - with mp.Pool(processes=nprocesses, initializer=mute) as pool: + with mp.Pool(processes=nprocesses) as pool: demand_l = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) demand = pd.concat(demand_l, axis=1).T diff --git a/scripts/build_industry_sector_ratios.py b/scripts/build_industry_sector_ratios.py index a8c00941..32a2634e 100644 --- a/scripts/build_industry_sector_ratios.py +++ b/scripts/build_industry_sector_ratios.py @@ -1,9 +1,7 @@ """Build industry sector ratios.""" import pandas as pd -from helper import mute - -mute() +from helper import mute_print # GWh/ktoe OR MWh/toe toe_to_MWh = 11.630 @@ -77,13 +75,14 @@ def load_idees_data(sector, country="EU28"): def usecols(x): return isinstance(x, str) or x == year - idees = pd.read_excel( - f"{snakemake.input.idees}/JRC-IDEES-2015_Industry_{country}.xlsx", - sheet_name=list(sheets.values()), - index_col=0, - header=0, - usecols=usecols, - ) + with mute_print(): + idees = pd.read_excel( + f"{snakemake.input.idees}/JRC-IDEES-2015_Industry_{country}.xlsx", + sheet_name=list(sheets.values()), + index_col=0, + header=0, + usecols=usecols, + ) for k, v in sheets.items(): idees[k] = idees.pop(v).squeeze() diff --git a/scripts/helper.py b/scripts/helper.py index b099061c..ef3dd828 100644 --- a/scripts/helper.py +++ b/scripts/helper.py @@ -1,5 +1,6 @@ import os import sys +import contextlib import yaml import pytz import pandas as pd @@ -11,11 +12,15 @@ from pypsa.components import components, component_attrs import logging logger = logging.getLogger(__name__) -def mute(): - """hide irrelevant outputs of subprocess in multiprocessing pools. - also hide irrelevant outputs caused by pd.read_excel""" - sys.stdout = open(os.devnull, 'w') +# Define a context manager to temporarily mute print statements +@contextlib.contextmanager +def mute_print(): + with open(os.devnull, 'w') as devnull: + with contextlib.redirect_stdout(devnull): + yield + + def override_component_attrs(directory): """Tell PyPSA that links can have multiple outputs by overriding the component_attrs. This can be done for