From 703b206e1f878ce8d2d736a768c668fe0121af41 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 21 May 2024 20:16:57 +0200 Subject: [PATCH] tidy up update_residential_from_eurostat --- doc/release_notes.rst | 2 +- rules/retrieve.smk | 2 +- scripts/build_energy_totals.py | 64 +++++++++------------ scripts/retrieve_eurostat_household_data.py | 2 +- 4 files changed, 29 insertions(+), 41 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 551b8c48..ebb30609 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -316,7 +316,7 @@ Upcoming Release * Mark downloaded files as ``ancient`` rather than ``protected``. -* Fix file name enconding in optional rule :mod:`build_biomass_transport_costs` depending on the operating system. +* Fix file name encoding in optional rule :mod:`build_biomass_transport_costs` depending on the operating system. PyPSA-Eur 0.10.0 (19th February 2024) ===================================== diff --git a/rules/retrieve.smk b/rules/retrieve.smk index d1f35ba8..10ad9684 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -56,7 +56,7 @@ if config["enable"]["retrieve"] and config["enable"].get("retrieve_databundle", rule retrieve_eurostat_household_data: output: - "data/eurostat/eurostat-household_energy_balances-february_2024.csv" + "data/eurostat/eurostat-household_energy_balances-february_2024.csv", log: "logs/retrieve_eurostat_household_data.log", retries: 2 diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index dd5dfb64..60be44a5 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -964,54 +964,42 @@ def update_residential_from_eurostat(energy): Updates energy balances for residential from disaggregated data from Eurostat. """ - # Read disaggregated Eurostat's data - fn = snakemake.input.eurostat_households - eurostat_data = pd.read_csv(fn) + eurostat_households = pd.read_csv(snakemake.input.eurostat_households) # Column mapping for energy type nrg_type = { - "total residential": "FC_OTH_HH_E", - "total residential space": "FC_OTH_HH_E_SH", - "total residential water": "FC_OTH_HH_E_WH", - "total residential cooking": "FC_OTH_HH_E_CK", + "total residential": ("FC_OTH_HH_E", "TOTAL"), + "total residential space": ("FC_OTH_HH_E_SH", "TOTAL"), + "total residential water": ("FC_OTH_HH_E_WH", "TOTAL"), + "total residential cooking": ("FC_OTH_HH_E_CK", "TOTAL"), + "electricity residential": ("FC_OTH_HH_E", "E7000"), + "electricity residential space": ("FC_OTH_HH_E_SH", "E7000"), + "electricity residential water": ("FC_OTH_HH_E_WH", "E7000"), + "electricity residential cooking": ("FC_OTH_HH_E_CK", "E7000"), } - # Make temporary copy of energy_totals - energy_totals = energy.copy().reset_index() + for nrg_name, (code, siec) in nrg_type.items(): - for nrg_name, code in nrg_type.items(): - # Select energy balance type - nrg_data = eurostat_data.query("nrg_bal in @code").copy() - # Rename columns - nrg_data.rename( - columns={"geo": "country", "TIME_PERIOD": "year", "OBS_VALUE": nrg_name}, - inplace=True, - ) - # Convert TJ to TWh - nrg_data[nrg_name] = nrg_data[nrg_name] / 3.6e3 - # Select value, country, year columns - nrg_data = nrg_data[["country", "year", nrg_name]] - # To update energy data with Eurostat households data - # 1) Merge the two DataFrames on 'year' and 'country' - merged_df = energy_totals.merge( - nrg_data, - on=["year", "country"], - suffixes=("_energy_totals", "_nrg_data"), - how="left", - ) - # 2) Update the 'nrg_name' column in energy with the values from nrg_data - energy_totals[nrg_name] = merged_df[f"{nrg_name}_nrg_data"].combine_first( - merged_df[f"{nrg_name}_energy_totals"] - ) + # Select energy balance type, rename columns and countries to match IDEES data, + # convert TJ to TWh, and drop XK data already since included in RS data + col_to_rename = {"geo": "country", "TIME_PERIOD": "year", "OBS_VALUE": nrg_name} + idx_to_rename = {v: k for k, v in idees_rename.items()} + drop_geo = ["EU27_2020", "EA20", "XK"] + nrg_data = eurostat_households.query( + "nrg_bal == @code and siec == @siec and geo not in @drop_geo and OBS_VALUE > 0" + ).copy() + nrg_data.rename(columns=col_to_rename, inplace=True) + nrg_data = nrg_data.set_index(["country", "year"])[nrg_name] / 3.6e3 + nrg_data.rename(index=idx_to_rename, inplace=True) + + # update energy balance from household-specific eurostat data + idx = nrg_data.index.intersection(energy.index) + energy.loc[idx, nrg_name] = nrg_data[idx] - # Set indexes back - energy_totals.set_index(["country", "year"], inplace=True) logger.info( "Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat" ) - return energy_totals - if __name__ == "__main__": if "snakemake" not in globals(): @@ -1046,7 +1034,7 @@ if __name__ == "__main__": logger.info("Extrapolate IDEES data based on eurostat for years 2015-2021.") energy = rescale_idees_from_eurostat(idees_countries, energy, eurostat) - energy = update_residential_from_eurostat(energy) + update_residential_from_eurostat(energy) energy.to_csv(snakemake.output.energy_name) diff --git a/scripts/retrieve_eurostat_household_data.py b/scripts/retrieve_eurostat_household_data.py index af1f05bb..0ad9e34a 100644 --- a/scripts/retrieve_eurostat_household_data.py +++ b/scripts/retrieve_eurostat_household_data.py @@ -29,7 +29,7 @@ if __name__ == "__main__": disable_progress = snakemake.config["run"].get("disable_progressbar", False) - url_eurostat_household = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/3.0/data/dataflow/ESTAT/nrg_d_hhq/1.0/*.*.*.*.*?c[freq]=A&c[nrg_bal]=FC_OTH_HH_E,FC_OTH_HH_E_SH,FC_OTH_HH_E_WH,FC_OTH_HH_E_CK&c[siec]=TOTAL&c[unit]=TJ&c[geo]=EU27_2020,EA20,BE,BG,CZ,DK,DE,EE,IE,EL,ES,FR,HR,IT,CY,LV,LT,LU,HU,MT,NL,AT,PL,PT,RO,SI,SK,FI,SE,NO,UK,BA,MD,MK,AL,RS,UA,XK,GE&compress=true&format=csvdata&formatVersion=2.0&c[time]=2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010" + url_eurostat_household = "https://ec.europa.eu/eurostat/databrowser-backend/api/extraction/1.0/LIVE/false/sdmx/csv/nrg_d_hhq__custom_11480365?startPeriod=2013&endPeriod=2022&i&compressed=true" tarball_fn = Path(f"{rootpath}/data/eurostat/eurostat_household.gz") to_fn = Path( f"{rootpath}/data/eurostat/eurostat-household_energy_balances-february_2024.csv"