removing old eurostat data reports as an option, cleaning up code

This commit is contained in:
toniseibold 2024-02-27 12:04:07 +01:00
parent 3298572ced
commit d363aeb57d
7 changed files with 152 additions and 234 deletions

View File

@ -316,7 +316,6 @@ pypsa_eur:
energy: energy:
energy_totals_year: 2019 energy_totals_year: 2019
base_emissions_year: 1990 base_emissions_year: 1990
eurostat_report_year: 2023
emissions: CO2 emissions: CO2
# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass

View File

@ -270,7 +270,7 @@ rule build_energy_totals:
swiss="data/switzerland-new_format-all_years.csv", swiss="data/switzerland-new_format-all_years.csv",
idees="data/bundle-sector/jrc-idees-2015", idees="data/bundle-sector/jrc-idees-2015",
district_heat_share="data/district_heat_share.csv", district_heat_share="data/district_heat_share.csv",
eurostat=input_eurostat, eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
output: output:
energy_name=resources("energy_totals.csv"), energy_name=resources("energy_totals.csv"),
co2_name=resources("co2_totals.csv"), co2_name=resources("co2_totals.csv"),
@ -865,7 +865,7 @@ rule prepare_sector_network:
), ),
network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"), network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"),
energy_totals_name=resources("energy_totals.csv"), energy_totals_name=resources("energy_totals.csv"),
eurostat=input_eurostat, eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
pop_weighted_energy_totals=resources( pop_weighted_energy_totals=resources(
"pop_weighted_energy_totals_s{simpl}_{clusters}.csv" "pop_weighted_energy_totals_s{simpl}_{clusters}.csv"
), ),

View File

@ -129,14 +129,6 @@ def has_internet_access(url="www.zenodo.org") -> bool:
finally: finally:
conn.close() conn.close()
def input_eurostat(w):
if config["energy"]["eurostat_report_year"] != 2023:
report_year = config["energy"]["eurostat_report_year"]
return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
else:
return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
def solved_previous_horizon(w): def solved_previous_horizon(w):
planning_horizons = config_provider("scenario", "planning_horizons")(w) planning_horizons = config_provider("scenario", "planning_horizons")(w)
i = planning_horizons.index(int(w.planning_horizons)) i = planning_horizons.index(int(w.planning_horizons))

View File

@ -247,7 +247,7 @@ rule plot_summary:
costs=RESULTS + "csvs/costs.csv", costs=RESULTS + "csvs/costs.csv",
energy=RESULTS + "csvs/energy.csv", energy=RESULTS + "csvs/energy.csv",
balances=RESULTS + "csvs/supply_energy.csv", balances=RESULTS + "csvs/supply_energy.csv",
eurostat=input_eurostat, eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
co2="data/bundle-sector/eea/UNFCCC_v23.csv", co2="data/bundle-sector/eea/UNFCCC_v23.csv",
output: output:
costs=RESULTS + "graphs/costs.pdf", costs=RESULTS + "graphs/costs.pdf",

View File

@ -142,6 +142,9 @@ if config["enable"]["retrieve"] and config["enable"].get(
protected( protected(
directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition") directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
), ),
protected(
directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
),
protected(directory("data/bundle-sector/jrc-idees-2015")), protected(directory("data/bundle-sector/jrc-idees-2015")),
] ]

View File

@ -117,42 +117,10 @@ to_ipcc = {
} }
def build_eurostat(input_eurostat, countries, report_year, year): def build_eurostat(input_eurostat, countries, year):
""" """
Return multi-index for all countries' energy data in TWh/a. Return multi-index for all countries' energy data in TWh/a.
""" """
if report_year != 2023:
filenames = {
2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
}
with mute_print():
dfs = pd.read_excel(
input_eurostat + filenames[report_year],
sheet_name=None,
skiprows=1,
index_col=list(range(4)),
)
# sorted_index necessary for slicing
lookup = eurostat_codes
labelled_dfs = {
lookup[df.columns[0]]: df
for df in dfs.values()
if lookup[df.columns[0]] in countries
}
df = pd.concat(labelled_dfs, sort=True).sort_index()
# drop non-numeric and country columns
non_numeric_cols = df.columns[df.dtypes != float]
country_cols = df.columns.intersection(lookup.keys())
to_drop = non_numeric_cols.union(country_cols)
df.drop(to_drop, axis=1, inplace=True)
# convert ktoe/a to TWh/a
df *= 11.63 / 1e3
else:
# read in every country file in countries # read in every country file in countries
eurostat = pd.DataFrame() eurostat = pd.DataFrame()
countries = [country if country != 'GB' else 'UK' for country in countries] countries = [country if country != 'GB' else 'UK' for country in countries]
@ -709,8 +677,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
return emissions / 1e3 return emissions / 1e3
def build_eurostat_co2(input_eurostat, countries, report_year, year=1990): def build_eurostat_co2(input_eurostat, countries, year=1990):
eurostat = build_eurostat(input_eurostat, countries, report_year, year) eurostat = build_eurostat(input_eurostat, countries, year)
specific_emissions = pd.Series(index=eurostat.columns, dtype=float) specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
@ -727,33 +695,10 @@ def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
return eurostat.multiply(specific_emissions).sum(axis=1) return eurostat.multiply(specific_emissions).sum(axis=1)
def build_co2_totals(countries, eea_co2, eurostat_co2, report_year): def build_co2_totals(countries, eea_co2, eurostat_co2):
co2 = eea_co2.reindex(countries) co2 = eea_co2.reindex(countries)
for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]): for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]):
if report_year != 2023:
mappings = {
"electricity": (
ct,
"+",
"Conventional Thermal Power Stations",
"of which From Coal",
),
"residential non-elec": (ct, "+", "+", "Residential"),
"services non-elec": (ct, "+", "+", "Services"),
"road non-elec": (ct, "+", "+", "Road"),
"rail non-elec": (ct, "+", "+", "Rail"),
"domestic navigation": (ct, "+", "+", "Domestic Navigation"),
"international navigation": (ct, "-", "Bunkers"),
"domestic aviation": (ct, "+", "+", "Domestic aviation"),
"international aviation": (ct, "+", "+", "International aviation"),
# does not include industrial process emissions or fuel processing/refining
"industrial non-elec": (ct, "+", "Industry"),
# does not include non-energy emissions
"agriculture": (eurostat_co2.index.get_level_values(0) == ct)
& eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
}
else:
mappings = { mappings = {
"electricity": (ct, "+", "Electricity & heat generation", np.nan), "electricity": (ct, "+", "Electricity & heat generation", np.nan),
"residential non-elec": (ct, "+", "+", "Residential"), "residential non-elec": (ct, "+", "+", "Residential"),
@ -820,76 +765,58 @@ def rescale(idees_countries, energy, eurostat):
''' '''
# read in the eurostat data for 2015 # read in the eurostat data for 2015
eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]] eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
# eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
eurostat_year = eurostat[["Total all products", "Electricity"]] eurostat_year = eurostat[["Total all products", "Electricity"]]
# calculate the ratio of the two data sets # calculate the ratio of the two data sets
ratio = eurostat_year / eurostat_2015 ratio = eurostat_year / eurostat_2015
ratio = ratio.droplevel([1,4]) ratio = ratio.droplevel([1,4])
ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True) ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
ratio = ratio.rename(index={"GB": "UK"}, level=0) ratio = ratio.rename(index={"EL": "GR"}, level=0)
residential_total = [ mappings = {
"total residential space", "Residential": {
"total": ["total residential space",
"total residential water", "total residential water",
"total residential cooking", "total residential cooking",
"total residential", "total residential",
"derived heat residential", "derived heat residential",
"thermal uses residential", "thermal uses residential",],
] "elec": ["electricity residential space",
residential_ele = [
"electricity residential space",
"electricity residential water", "electricity residential water",
"electricity residential cooking", "electricity residential cooking",
"electricity residential", "electricity residential",]},
] "Services": {
"total": ["total services space",
service_total = [
"total services space",
"total services water", "total services water",
"total services cooking", "total services cooking",
"total services", "total services",
"derived heat services", "derived heat services",
"thermal uses services", "thermal uses services",],
] "elec": ["electricity services space",
service_ele = [
"electricity services space",
"electricity services water", "electricity services water",
"electricity services cooking", "electricity services cooking",
"electricity services", "electricity services",]},
] "Agriculture & forestry": {
"total": ["total agriculture heat",
agri_total = [
"total agriculture heat",
"total agriculture machinery", "total agriculture machinery",
"total agriculture", "total agriculture",],
] "elec": ["total agriculture electricity",]},
agri_ele = [ "Road": {
"total agriculture electricity", "total": ["total road",
]
road_total = [
"total road",
"total passenger cars", "total passenger cars",
"total other road passenger", "total other road passenger",
"total light duty road freight", "total light duty road freight",],
] "elec": ["electricity road",
road_ele = [
"electricity road",
"electricity passenger cars", "electricity passenger cars",
"electricity other road passenger", "electricity other road passenger",
"electricity light duty road freight", "electricity light duty road freight",]},
] "Rail": {
"total": ["total rail",
rail_total = [
"total rail",
"total rail passenger", "total rail passenger",
"total rail freight", "total rail freight",],
] "elec": ["electricity rail",
rail_ele = [
"electricity rail",
"electricity rail passenger", "electricity rail passenger",
"electricity rail freight", "electricity rail freight",]},
] }
avia_inter = [ avia_inter = [
'total aviation passenger', 'total aviation passenger',
@ -907,28 +834,12 @@ def rescale(idees_countries, energy, eurostat):
"total domestic navigation", "total domestic navigation",
] ]
idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
for country in idees_countries: for country in idees_countries:
res = ratio.loc[(country, slice(None), 'Residential')] for sector, mapping in mappings.items():
energy.loc[country, residential_total] *= res[['total']].iloc[0,0] sector_ratio = ratio.loc[(country, slice(None), sector)]
energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
ser = ratio.loc[(country, slice(None), 'Services')] energy.loc[country, mapping["total"]] *= sector_ratio[['total']].iloc[0,0]
energy.loc[country, service_total] *= ser[['total']].iloc[0,0] energy.loc[country, mapping["elec"]] *= sector_ratio[['ele']].iloc[0,0]
energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
road = ratio.loc[(country, slice(None), 'Road')]
energy.loc[country, road_total] *= road[['total']].iloc[0,0]
energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
rail = ratio.loc[(country, slice(None), 'Rail')]
energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')] avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
avi_i = ratio.loc[(country, 'International aviation', slice(None))] avi_i = ratio.loc[(country, 'International aviation', slice(None))]
@ -958,9 +869,8 @@ if __name__ == "__main__":
idees_countries = pd.Index(countries).intersection(eu28) idees_countries = pd.Index(countries).intersection(eu28)
data_year = params["energy_totals_year"] data_year = params["energy_totals_year"]
report_year = snakemake.params.energy["eurostat_report_year"]
input_eurostat = snakemake.input.eurostat input_eurostat = snakemake.input.eurostat
eurostat = build_eurostat(input_eurostat, countries, report_year, data_year) eurostat = build_eurostat(input_eurostat, countries, data_year)
swiss = build_swiss(data_year) swiss = build_swiss(data_year)
# data from idees only exists for 2015 # data from idees only exists for 2015
if data_year > 2015: if data_year > 2015:
@ -984,10 +894,10 @@ if __name__ == "__main__":
emissions_scope = snakemake.params.energy["emissions"] emissions_scope = snakemake.params.energy["emissions"]
eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope) eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
eurostat_co2 = build_eurostat_co2( eurostat_co2 = build_eurostat_co2(
input_eurostat, countries, report_year, base_year_emissions input_eurostat, countries, base_year_emissions
) )
co2 = build_co2_totals(countries, eea_co2, eurostat_co2, report_year) co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
co2.to_csv(snakemake.output.co2_name) co2.to_csv(snakemake.output.co2_name)
transport = build_transport_data(countries, population, idees) transport = build_transport_data(countries, population, idees)

View File

@ -8,6 +8,7 @@ Retrieve and extract data bundle for sector-coupled studies.
import logging import logging
import tarfile import tarfile
import zipfile
from pathlib import Path from pathlib import Path
from _helpers import ( from _helpers import (
@ -47,3 +48,16 @@ if __name__ == "__main__":
tarball_fn.unlink() tarball_fn.unlink()
logger.info(f"Databundle available in '{to_fn}'.") logger.info(f"Databundle available in '{to_fn}'.")
url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
logger.info("Extracting Eurostat data.")
with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
zip_ref.extractall(to_fn)
logger.info(f"Eurostat data available in '{to_fn}'.")