removing old eurostat data reports as an option, cleaning up code

This commit is contained in:
toniseibold 2024-02-27 12:04:07 +01:00
parent 3298572ced
commit d363aeb57d
7 changed files with 152 additions and 234 deletions

View File

@ -316,7 +316,6 @@ pypsa_eur:
energy:
energy_totals_year: 2019
base_emissions_year: 1990
eurostat_report_year: 2023
emissions: CO2
# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass

View File

@ -270,7 +270,7 @@ rule build_energy_totals:
swiss="data/switzerland-new_format-all_years.csv",
idees="data/bundle-sector/jrc-idees-2015",
district_heat_share="data/district_heat_share.csv",
eurostat=input_eurostat,
eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
output:
energy_name=resources("energy_totals.csv"),
co2_name=resources("co2_totals.csv"),
@ -865,7 +865,7 @@ rule prepare_sector_network:
),
network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"),
energy_totals_name=resources("energy_totals.csv"),
eurostat=input_eurostat,
eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
pop_weighted_energy_totals=resources(
"pop_weighted_energy_totals_s{simpl}_{clusters}.csv"
),

View File

@ -129,14 +129,6 @@ def has_internet_access(url="www.zenodo.org") -> bool:
finally:
conn.close()
def input_eurostat(w):
if config["energy"]["eurostat_report_year"] != 2023:
report_year = config["energy"]["eurostat_report_year"]
return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
else:
return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
def solved_previous_horizon(w):
planning_horizons = config_provider("scenario", "planning_horizons")(w)
i = planning_horizons.index(int(w.planning_horizons))

View File

@ -247,7 +247,7 @@ rule plot_summary:
costs=RESULTS + "csvs/costs.csv",
energy=RESULTS + "csvs/energy.csv",
balances=RESULTS + "csvs/supply_energy.csv",
eurostat=input_eurostat,
eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
co2="data/bundle-sector/eea/UNFCCC_v23.csv",
output:
costs=RESULTS + "graphs/costs.pdf",

View File

@ -142,6 +142,9 @@ if config["enable"]["retrieve"] and config["enable"].get(
protected(
directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
),
protected(
directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
),
protected(directory("data/bundle-sector/jrc-idees-2015")),
]

View File

@ -117,42 +117,10 @@ to_ipcc = {
}
def build_eurostat(input_eurostat, countries, report_year, year):
def build_eurostat(input_eurostat, countries, year):
"""
Return multi-index for all countries' energy data in TWh/a.
"""
if report_year != 2023:
filenames = {
2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
}
with mute_print():
dfs = pd.read_excel(
input_eurostat + filenames[report_year],
sheet_name=None,
skiprows=1,
index_col=list(range(4)),
)
# sorted_index necessary for slicing
lookup = eurostat_codes
labelled_dfs = {
lookup[df.columns[0]]: df
for df in dfs.values()
if lookup[df.columns[0]] in countries
}
df = pd.concat(labelled_dfs, sort=True).sort_index()
# drop non-numeric and country columns
non_numeric_cols = df.columns[df.dtypes != float]
country_cols = df.columns.intersection(lookup.keys())
to_drop = non_numeric_cols.union(country_cols)
df.drop(to_drop, axis=1, inplace=True)
# convert ktoe/a to TWh/a
df *= 11.63 / 1e3
else:
# read in every country file in countries
eurostat = pd.DataFrame()
countries = [country if country != 'GB' else 'UK' for country in countries]
@ -709,8 +677,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
return emissions / 1e3
def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
eurostat = build_eurostat(input_eurostat, countries, report_year, year)
def build_eurostat_co2(input_eurostat, countries, year=1990):
eurostat = build_eurostat(input_eurostat, countries, year)
specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
@ -727,33 +695,10 @@ def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
return eurostat.multiply(specific_emissions).sum(axis=1)
def build_co2_totals(countries, eea_co2, eurostat_co2, report_year):
def build_co2_totals(countries, eea_co2, eurostat_co2):
co2 = eea_co2.reindex(countries)
for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]):
if report_year != 2023:
mappings = {
"electricity": (
ct,
"+",
"Conventional Thermal Power Stations",
"of which From Coal",
),
"residential non-elec": (ct, "+", "+", "Residential"),
"services non-elec": (ct, "+", "+", "Services"),
"road non-elec": (ct, "+", "+", "Road"),
"rail non-elec": (ct, "+", "+", "Rail"),
"domestic navigation": (ct, "+", "+", "Domestic Navigation"),
"international navigation": (ct, "-", "Bunkers"),
"domestic aviation": (ct, "+", "+", "Domestic aviation"),
"international aviation": (ct, "+", "+", "International aviation"),
# does not include industrial process emissions or fuel processing/refining
"industrial non-elec": (ct, "+", "Industry"),
# does not include non-energy emissions
"agriculture": (eurostat_co2.index.get_level_values(0) == ct)
& eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
}
else:
mappings = {
"electricity": (ct, "+", "Electricity & heat generation", np.nan),
"residential non-elec": (ct, "+", "+", "Residential"),
@ -820,76 +765,58 @@ def rescale(idees_countries, energy, eurostat):
'''
# read in the eurostat data for 2015
eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
# eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
eurostat_year = eurostat[["Total all products", "Electricity"]]
# calculate the ratio of the two data sets
ratio = eurostat_year / eurostat_2015
ratio = ratio.droplevel([1,4])
ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
ratio = ratio.rename(index={"GB": "UK"}, level=0)
ratio = ratio.rename(index={"EL": "GR"}, level=0)
residential_total = [
"total residential space",
mappings = {
"Residential": {
"total": ["total residential space",
"total residential water",
"total residential cooking",
"total residential",
"derived heat residential",
"thermal uses residential",
]
residential_ele = [
"electricity residential space",
"thermal uses residential",],
"elec": ["electricity residential space",
"electricity residential water",
"electricity residential cooking",
"electricity residential",
]
service_total = [
"total services space",
"electricity residential",]},
"Services": {
"total": ["total services space",
"total services water",
"total services cooking",
"total services",
"derived heat services",
"thermal uses services",
]
service_ele = [
"electricity services space",
"thermal uses services",],
"elec": ["electricity services space",
"electricity services water",
"electricity services cooking",
"electricity services",
]
agri_total = [
"total agriculture heat",
"electricity services",]},
"Agriculture & forestry": {
"total": ["total agriculture heat",
"total agriculture machinery",
"total agriculture",
]
agri_ele = [
"total agriculture electricity",
]
road_total = [
"total road",
"total agriculture",],
"elec": ["total agriculture electricity",]},
"Road": {
"total": ["total road",
"total passenger cars",
"total other road passenger",
"total light duty road freight",
]
road_ele = [
"electricity road",
"total light duty road freight",],
"elec": ["electricity road",
"electricity passenger cars",
"electricity other road passenger",
"electricity light duty road freight",
]
rail_total = [
"total rail",
"electricity light duty road freight",]},
"Rail": {
"total": ["total rail",
"total rail passenger",
"total rail freight",
]
rail_ele = [
"electricity rail",
"total rail freight",],
"elec": ["electricity rail",
"electricity rail passenger",
"electricity rail freight",
]
"electricity rail freight",]},
}
avia_inter = [
'total aviation passenger',
@ -907,28 +834,12 @@ def rescale(idees_countries, energy, eurostat):
"total domestic navigation",
]
idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
for country in idees_countries:
res = ratio.loc[(country, slice(None), 'Residential')]
energy.loc[country, residential_total] *= res[['total']].iloc[0,0]
energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
for sector, mapping in mappings.items():
sector_ratio = ratio.loc[(country, slice(None), sector)]
ser = ratio.loc[(country, slice(None), 'Services')]
energy.loc[country, service_total] *= ser[['total']].iloc[0,0]
energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
road = ratio.loc[(country, slice(None), 'Road')]
energy.loc[country, road_total] *= road[['total']].iloc[0,0]
energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
rail = ratio.loc[(country, slice(None), 'Rail')]
energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
energy.loc[country, mapping["total"]] *= sector_ratio[['total']].iloc[0,0]
energy.loc[country, mapping["elec"]] *= sector_ratio[['ele']].iloc[0,0]
avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
avi_i = ratio.loc[(country, 'International aviation', slice(None))]
@ -958,9 +869,8 @@ if __name__ == "__main__":
idees_countries = pd.Index(countries).intersection(eu28)
data_year = params["energy_totals_year"]
report_year = snakemake.params.energy["eurostat_report_year"]
input_eurostat = snakemake.input.eurostat
eurostat = build_eurostat(input_eurostat, countries, report_year, data_year)
eurostat = build_eurostat(input_eurostat, countries, data_year)
swiss = build_swiss(data_year)
# data from idees only exists for 2015
if data_year > 2015:
@ -984,10 +894,10 @@ if __name__ == "__main__":
emissions_scope = snakemake.params.energy["emissions"]
eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
eurostat_co2 = build_eurostat_co2(
input_eurostat, countries, report_year, base_year_emissions
input_eurostat, countries, base_year_emissions
)
co2 = build_co2_totals(countries, eea_co2, eurostat_co2, report_year)
co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
co2.to_csv(snakemake.output.co2_name)
transport = build_transport_data(countries, population, idees)

View File

@ -8,6 +8,7 @@ Retrieve and extract data bundle for sector-coupled studies.
import logging
import tarfile
import zipfile
from pathlib import Path
from _helpers import (
@ -47,3 +48,16 @@ if __name__ == "__main__":
tarball_fn.unlink()
logger.info(f"Databundle available in '{to_fn}'.")
url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
logger.info("Extracting Eurostat data.")
with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
zip_ref.extractall(to_fn)
logger.info(f"Eurostat data available in '{to_fn}'.")