Merge pull request #307 from PyPSA/read-excel-mute-follow-up

mute excel reading with context manager
This commit is contained in:
Fabian Hofmann 2023-02-22 07:24:25 +01:00 committed by GitHub
commit 2d5b832e66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 43 additions and 34 deletions

View File

@ -196,7 +196,7 @@ def convert_nuts2_to_regions(bio_nuts2, regions):
if __name__ == "__main__": if __name__ == "__main__":
if 'snakemake' not in globals(): if 'snakemake' not in globals():
from helper import mock_snakemake from helper import mock_snakemake
snakemake = mock_snakemake('build_biomass_potentials') snakemake = mock_snakemake('build_biomass_potentials', simpl='', clusters='5')
config = snakemake.config['biomass'] config = snakemake.config['biomass']
year = config["year"] year = config["year"]

View File

@ -1,6 +1,6 @@
from functools import partial from functools import partial
from tqdm import tqdm from tqdm import tqdm
from helper import mute from helper import mute_print
import multiprocessing as mp import multiprocessing as mp
import pandas as pd import pandas as pd
@ -9,7 +9,6 @@ import numpy as np
idx = pd.IndexSlice idx = pd.IndexSlice
mute()
def cartesian(s1, s2): def cartesian(s1, s2):
"""Cartesian product of two pd.Series""" """Cartesian product of two pd.Series"""
@ -137,6 +136,7 @@ def build_eurostat(input_eurostat, countries, report_year, year):
2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx" 2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx"
} }
with mute_print():
dfs = pd.read_excel( dfs = pd.read_excel(
input_eurostat + filenames[report_year], input_eurostat + filenames[report_year],
sheet_name=None, sheet_name=None,
@ -379,10 +379,12 @@ def idees_per_country(ct, year):
def build_idees(countries, year): def build_idees(countries, year):
nprocesses = snakemake.threads nprocesses = snakemake.threads
func = partial(idees_per_country, year=year) func = partial(idees_per_country, year=year)
tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries), tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries),
desc='Build from IDEES database') desc='Build from IDEES database')
with mp.Pool(processes=nprocesses, initializer=mute) as pool: with mute_print():
with mp.Pool(processes=nprocesses) as pool:
totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
@ -568,7 +570,7 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
# https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16 # https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16
# downloaded 201228 (modified by EEA last on 201221) # downloaded 201228 (modified by EEA last on 201221)
df = pd.read_csv(input_co2, encoding="latin-1") df = pd.read_csv(input_co2, encoding="latin-1", low_memory=False)
df.replace(dict(Year="1985-1987"), 1986, inplace=True) df.replace(dict(Year="1985-1987"), 1986, inplace=True)
df.Year = df.Year.astype(int) df.Year = df.Year.astype(int)

View File

@ -4,7 +4,7 @@ import pandas as pd
import numpy as np import numpy as np
import multiprocessing as mp import multiprocessing as mp
from tqdm import tqdm from tqdm import tqdm
from helper import mute from helper import mute_print
tj_to_ktoe = 0.0238845 tj_to_ktoe = 0.0238845
ktoe_to_twh = 0.01163 ktoe_to_twh = 0.01163
@ -113,6 +113,7 @@ def get_energy_ratio(country):
else: else:
# estimate physical output, energy consumption in the sector and country # estimate physical output, energy consumption in the sector and country
fn = f"{eurostat_dir}/{eb_names[country]}.XLSX" fn = f"{eurostat_dir}/{eb_names[country]}.XLSX"
with mute_print():
df = pd.read_excel(fn, sheet_name='2016', index_col=2, df = pd.read_excel(fn, sheet_name='2016', index_col=2,
header=0, skiprows=1).squeeze('columns') header=0, skiprows=1).squeeze('columns')
e_country = df.loc[eb_sectors.keys( e_country = df.loc[eb_sectors.keys(
@ -120,6 +121,7 @@ def get_energy_ratio(country):
fn = f'{jrc_dir}/JRC-IDEES-2015_Industry_EU28.xlsx' fn = f'{jrc_dir}/JRC-IDEES-2015_Industry_EU28.xlsx'
with mute_print():
df = pd.read_excel(fn, sheet_name='Ind_Summary', df = pd.read_excel(fn, sheet_name='Ind_Summary',
index_col=0, header=0).squeeze('columns') index_col=0, header=0).squeeze('columns')
@ -140,6 +142,7 @@ def industry_production_per_country(country):
jrc_country = jrc_names.get(country, country) jrc_country = jrc_names.get(country, country)
fn = f'{jrc_dir}/JRC-IDEES-2015_Industry_{jrc_country}.xlsx' fn = f'{jrc_dir}/JRC-IDEES-2015_Industry_{jrc_country}.xlsx'
sheet = sub_sheet_name_dict[sector] sheet = sub_sheet_name_dict[sector]
with mute_print():
df = pd.read_excel(fn, sheet_name=sheet, df = pd.read_excel(fn, sheet_name=sheet,
index_col=0, header=0).squeeze('columns') index_col=0, header=0).squeeze('columns')
@ -168,7 +171,7 @@ def industry_production(countries):
func = industry_production_per_country func = industry_production_per_country
tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries), tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries),
desc="Build industry production") desc="Build industry production")
with mp.Pool(processes=nprocesses, initializer=mute) as pool: with mp.Pool(processes=nprocesses) as pool:
demand_l = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) demand_l = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
demand = pd.concat(demand_l, axis=1).T demand = pd.concat(demand_l, axis=1).T

View File

@ -1,9 +1,7 @@
"""Build industry sector ratios.""" """Build industry sector ratios."""
import pandas as pd import pandas as pd
from helper import mute from helper import mute_print
mute()
# GWh/ktoe OR MWh/toe # GWh/ktoe OR MWh/toe
toe_to_MWh = 11.630 toe_to_MWh = 11.630
@ -77,6 +75,7 @@ def load_idees_data(sector, country="EU28"):
def usecols(x): def usecols(x):
return isinstance(x, str) or x == year return isinstance(x, str) or x == year
with mute_print():
idees = pd.read_excel( idees = pd.read_excel(
f"{snakemake.input.idees}/JRC-IDEES-2015_Industry_{country}.xlsx", f"{snakemake.input.idees}/JRC-IDEES-2015_Industry_{country}.xlsx",
sheet_name=list(sheets.values()), sheet_name=list(sheets.values()),

View File

@ -1,5 +1,6 @@
import os import os
import sys import sys
import contextlib
import yaml import yaml
import pytz import pytz
import pandas as pd import pandas as pd
@ -11,10 +12,14 @@ from pypsa.components import components, component_attrs
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def mute():
"""hide irrelevant outputs of subprocess in multiprocessing pools. # Define a context manager to temporarily mute print statements
also hide irrelevant outputs caused by pd.read_excel""" @contextlib.contextmanager
sys.stdout = open(os.devnull, 'w') def mute_print():
with open(os.devnull, 'w') as devnull:
with contextlib.redirect_stdout(devnull):
yield
def override_component_attrs(directory): def override_component_attrs(directory):
"""Tell PyPSA that links can have multiple outputs by """Tell PyPSA that links can have multiple outputs by