From 8b28f34f144d77df749ae042ff2800f8251033b2 Mon Sep 17 00:00:00 2001 From: yerbol-akhmetov Date: Sun, 14 Apr 2024 13:19:42 +0500 Subject: [PATCH 1/5] retrieve eurostat disagregated energy balances for household --- rules/retrieve.smk | 1 + scripts/retrieve_eurostat_data.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/rules/retrieve.smk b/rules/retrieve.smk index 4b244483..e4a897a2 100644 --- a/rules/retrieve.smk +++ b/rules/retrieve.smk @@ -148,6 +148,7 @@ if config["enable"]["retrieve"] and config["enable"].get( rule retrieve_eurostat_data: output: directory("data/eurostat/eurostat-energy_balances-april_2023_edition"), + "data/eurostat/eurostat-household_energy_balances-february_2024.csv", log: "logs/retrieve_eurostat_data.log", retries: 2 diff --git a/scripts/retrieve_eurostat_data.py b/scripts/retrieve_eurostat_data.py index 4b4cea4a..b00dd3ba 100644 --- a/scripts/retrieve_eurostat_data.py +++ b/scripts/retrieve_eurostat_data.py @@ -9,6 +9,8 @@ Retrieve and extract eurostat energy balances data. import logging import zipfile +import gzip +import shutil from pathlib import Path from _helpers import configure_logging, progress_retrieve, set_scenario_config @@ -41,3 +43,18 @@ if __name__ == "__main__": zip_ref.extractall(to_fn) logger.info(f"Eurostat data available in '{to_fn}'.") + + url_eurostat_household = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/3.0/data/dataflow/ESTAT/nrg_d_hhq/1.0/*.*.*.*.*?c[freq]=A&c[nrg_bal]=FC_OTH_HH_E,FC_OTH_HH_E_SH,FC_OTH_HH_E_WH,FC_OTH_HH_E_CK&c[siec]=TOTAL&c[unit]=TJ&c[geo]=EU27_2020,EA20,BE,BG,CZ,DK,DE,EE,IE,EL,ES,FR,HR,IT,CY,LV,LT,LU,HU,MT,NL,AT,PL,PT,RO,SI,SK,FI,SE,NO,UK,BA,MD,MK,AL,RS,UA,XK,GE&compress=true&format=csvdata&formatVersion=2.0&c[time]=2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010" + tarball_fn = Path(f"{rootpath}/data/eurostat/eurostat_household.gz") + to_fn = Path( + f"{rootpath}/data/eurostat/eurostat-household_energy_balances-february_2024.csv" + ) + + logger.info(f"Downloading Eurostats' disaggregated household energy balances data from '{url_eurostat_household}'.") + progress_retrieve(url_eurostat_household, tarball_fn, disable=disable_progress) + + logger.info("Extracting Eurostat's disaggregated household energy balance data.") + with gzip.open(tarball_fn, 'rb') as f_in, open(to_fn, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + logger.info(f"Eurostat's disaggregated household energy balance data available in '{to_fn}'.") From d2584d37b1e63490b61f1fc6d86c5605601b9205 Mon Sep 17 00:00:00 2001 From: yerbol-akhmetov Date: Sun, 14 Apr 2024 16:28:50 +0500 Subject: [PATCH 2/5] update energy totals from Eurostat residential data --- rules/build_sector.smk | 1 + scripts/build_energy_totals.py | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/rules/build_sector.smk b/rules/build_sector.smk index e1575a78..28540663 100644 --- a/rules/build_sector.smk +++ b/rules/build_sector.smk @@ -293,6 +293,7 @@ rule build_energy_totals: idees="data/bundle-sector/jrc-idees-2015", district_heat_share="data/district_heat_share.csv", eurostat="data/eurostat/eurostat-energy_balances-april_2023_edition", + eurostat_households="data/eurostat/eurostat-household_energy_balances-february_2024.csv", output: energy_name=resources("energy_totals.csv"), co2_name=resources("co2_totals.csv"), diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index b56d3294..95ddb6b7 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -943,6 +943,44 @@ def rescale_idees_from_eurostat( return energy +def update_residential_from_eurostat(energy): + """ + Updates energy balances for residential from disaggregated data from Eurostat + """ + # Read disaggregated Eurostat's data + fn = snakemake.input.eurostat_households + eurostat_data = pd.read_csv(fn) + + # Column mapping for energy type + nrg_type = {"total residential":"FC_OTH_HH_E", + "total residential space":"FC_OTH_HH_E_SH", + "total residential water":"FC_OTH_HH_E_WH", + "total residential cooking":"FC_OTH_HH_E_CK"} + + # Make temporary copy of energy_totals + energy_totals = energy.copy().reset_index() + + for nrg_name, code in nrg_type.items(): + # Select energy balance type + nrg_data = eurostat_data.query("nrg_bal in @code").copy() + # Rename columns + nrg_data.rename(columns={"geo":"country", "TIME_PERIOD":"year", "OBS_VALUE":nrg_name}, inplace=True) + # Convert TJ to TWh + nrg_data[nrg_name] = nrg_data[nrg_name] / 3.6e3 + # Select value, country, year columns + nrg_data = nrg_data[["country","year", nrg_name]] + # To update energy data with Eurostat households data + # 1) Merge the two DataFrames on 'year' and 'country' + merged_df = energy_totals.merge(nrg_data, on=['year', 'country'], suffixes=('_energy_totals', '_nrg_data'), how='left') + # 2) Update the 'nrg_name' column in energy with the values from nrg_data + energy_totals[nrg_name] = merged_df[f'{nrg_name}_nrg_data'].combine_first(merged_df[f'{nrg_name}_energy_totals']) + + # Set indexes back + energy_totals.set_index(['country', 'year'], inplace=True) + + return energy_totals + + if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake @@ -976,6 +1014,8 @@ if __name__ == "__main__": logger.info("Extrapolate IDEES data based on eurostat for years 2015-2021.") energy = rescale_idees_from_eurostat(idees_countries, energy, eurostat) + energy = update_residential_from_eurostat(energy) + energy.to_csv(snakemake.output.energy_name) # use rescaled idees data to calculate district heat share From 7436634bf87a42f807ccecfb1ede9ac43da62b86 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Apr 2024 11:33:16 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_energy_totals.py | 43 ++++++++++++++++++++----------- scripts/retrieve_eurostat_data.py | 14 ++++++---- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index 95ddb6b7..f32815b1 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -945,18 +945,21 @@ def rescale_idees_from_eurostat( def update_residential_from_eurostat(energy): """ - Updates energy balances for residential from disaggregated data from Eurostat + Updates energy balances for residential from disaggregated data from + Eurostat. """ # Read disaggregated Eurostat's data fn = snakemake.input.eurostat_households eurostat_data = pd.read_csv(fn) - # Column mapping for energy type - nrg_type = {"total residential":"FC_OTH_HH_E", - "total residential space":"FC_OTH_HH_E_SH", - "total residential water":"FC_OTH_HH_E_WH", - "total residential cooking":"FC_OTH_HH_E_CK"} - + # Column mapping for energy type + nrg_type = { + "total residential": "FC_OTH_HH_E", + "total residential space": "FC_OTH_HH_E_SH", + "total residential water": "FC_OTH_HH_E_WH", + "total residential cooking": "FC_OTH_HH_E_CK", + } + # Make temporary copy of energy_totals energy_totals = energy.copy().reset_index() @@ -964,19 +967,29 @@ def update_residential_from_eurostat(energy): # Select energy balance type nrg_data = eurostat_data.query("nrg_bal in @code").copy() # Rename columns - nrg_data.rename(columns={"geo":"country", "TIME_PERIOD":"year", "OBS_VALUE":nrg_name}, inplace=True) + nrg_data.rename( + columns={"geo": "country", "TIME_PERIOD": "year", "OBS_VALUE": nrg_name}, + inplace=True, + ) # Convert TJ to TWh nrg_data[nrg_name] = nrg_data[nrg_name] / 3.6e3 - # Select value, country, year columns - nrg_data = nrg_data[["country","year", nrg_name]] - # To update energy data with Eurostat households data + # Select value, country, year columns + nrg_data = nrg_data[["country", "year", nrg_name]] + # To update energy data with Eurostat households data # 1) Merge the two DataFrames on 'year' and 'country' - merged_df = energy_totals.merge(nrg_data, on=['year', 'country'], suffixes=('_energy_totals', '_nrg_data'), how='left') + merged_df = energy_totals.merge( + nrg_data, + on=["year", "country"], + suffixes=("_energy_totals", "_nrg_data"), + how="left", + ) # 2) Update the 'nrg_name' column in energy with the values from nrg_data - energy_totals[nrg_name] = merged_df[f'{nrg_name}_nrg_data'].combine_first(merged_df[f'{nrg_name}_energy_totals']) - + energy_totals[nrg_name] = merged_df[f"{nrg_name}_nrg_data"].combine_first( + merged_df[f"{nrg_name}_energy_totals"] + ) + # Set indexes back - energy_totals.set_index(['country', 'year'], inplace=True) + energy_totals.set_index(["country", "year"], inplace=True) return energy_totals diff --git a/scripts/retrieve_eurostat_data.py b/scripts/retrieve_eurostat_data.py index b00dd3ba..77fb468e 100644 --- a/scripts/retrieve_eurostat_data.py +++ b/scripts/retrieve_eurostat_data.py @@ -7,10 +7,10 @@ Retrieve and extract eurostat energy balances data. """ -import logging -import zipfile import gzip +import logging import shutil +import zipfile from pathlib import Path from _helpers import configure_logging, progress_retrieve, set_scenario_config @@ -50,11 +50,15 @@ if __name__ == "__main__": f"{rootpath}/data/eurostat/eurostat-household_energy_balances-february_2024.csv" ) - logger.info(f"Downloading Eurostats' disaggregated household energy balances data from '{url_eurostat_household}'.") + logger.info( + f"Downloading Eurostats' disaggregated household energy balances data from '{url_eurostat_household}'." + ) progress_retrieve(url_eurostat_household, tarball_fn, disable=disable_progress) logger.info("Extracting Eurostat's disaggregated household energy balance data.") - with gzip.open(tarball_fn, 'rb') as f_in, open(to_fn, 'wb') as f_out: + with gzip.open(tarball_fn, "rb") as f_in, open(to_fn, "wb") as f_out: shutil.copyfileobj(f_in, f_out) - logger.info(f"Eurostat's disaggregated household energy balance data available in '{to_fn}'.") + logger.info( + f"Eurostat's disaggregated household energy balance data available in '{to_fn}'." + ) From 2113b6657f2adbce93cebb025b45bf01af5b5f80 Mon Sep 17 00:00:00 2001 From: yerbol-akhmetov Date: Thu, 18 Apr 2024 20:31:42 +0500 Subject: [PATCH 4/5] add logging --- scripts/build_energy_totals.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index f32815b1..70a8e51f 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -990,6 +990,7 @@ def update_residential_from_eurostat(energy): # Set indexes back energy_totals.set_index(["country", "year"], inplace=True) + logger.info("Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat") return energy_totals From bc786079268b6768bc7d832be7e58deb6679c4f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:32:26 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/build_energy_totals.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py index 70a8e51f..d80a8e85 100644 --- a/scripts/build_energy_totals.py +++ b/scripts/build_energy_totals.py @@ -990,7 +990,9 @@ def update_residential_from_eurostat(energy): # Set indexes back energy_totals.set_index(["country", "year"], inplace=True) - logger.info("Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat") + logger.info( + "Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat" + ) return energy_totals