removing old eurostat data reports as an option, cleaning up code

This commit is contained in:
toniseibold 2024-02-27 12:04:07 +01:00
parent 3298572ced
commit d363aeb57d
7 changed files with 152 additions and 234 deletions

View File

@ -316,7 +316,6 @@ pypsa_eur:
energy: energy:
energy_totals_year: 2019 energy_totals_year: 2019
base_emissions_year: 1990 base_emissions_year: 1990
eurostat_report_year: 2023
emissions: CO2 emissions: CO2
# docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass

View File

@ -270,7 +270,7 @@ rule build_energy_totals:
swiss="data/switzerland-new_format-all_years.csv", swiss="data/switzerland-new_format-all_years.csv",
idees="data/bundle-sector/jrc-idees-2015", idees="data/bundle-sector/jrc-idees-2015",
district_heat_share="data/district_heat_share.csv", district_heat_share="data/district_heat_share.csv",
eurostat=input_eurostat, eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
output: output:
energy_name=resources("energy_totals.csv"), energy_name=resources("energy_totals.csv"),
co2_name=resources("co2_totals.csv"), co2_name=resources("co2_totals.csv"),
@ -865,7 +865,7 @@ rule prepare_sector_network:
), ),
network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"), network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"),
energy_totals_name=resources("energy_totals.csv"), energy_totals_name=resources("energy_totals.csv"),
eurostat=input_eurostat, eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
pop_weighted_energy_totals=resources( pop_weighted_energy_totals=resources(
"pop_weighted_energy_totals_s{simpl}_{clusters}.csv" "pop_weighted_energy_totals_s{simpl}_{clusters}.csv"
), ),

View File

@ -129,14 +129,6 @@ def has_internet_access(url="www.zenodo.org") -> bool:
finally: finally:
conn.close() conn.close()
def input_eurostat(w):
if config["energy"]["eurostat_report_year"] != 2023:
report_year = config["energy"]["eurostat_report_year"]
return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
else:
return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
def solved_previous_horizon(w): def solved_previous_horizon(w):
planning_horizons = config_provider("scenario", "planning_horizons")(w) planning_horizons = config_provider("scenario", "planning_horizons")(w)
i = planning_horizons.index(int(w.planning_horizons)) i = planning_horizons.index(int(w.planning_horizons))

View File

@ -247,7 +247,7 @@ rule plot_summary:
costs=RESULTS + "csvs/costs.csv", costs=RESULTS + "csvs/costs.csv",
energy=RESULTS + "csvs/energy.csv", energy=RESULTS + "csvs/energy.csv",
balances=RESULTS + "csvs/supply_energy.csv", balances=RESULTS + "csvs/supply_energy.csv",
eurostat=input_eurostat, eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
co2="data/bundle-sector/eea/UNFCCC_v23.csv", co2="data/bundle-sector/eea/UNFCCC_v23.csv",
output: output:
costs=RESULTS + "graphs/costs.pdf", costs=RESULTS + "graphs/costs.pdf",

View File

@ -142,6 +142,9 @@ if config["enable"]["retrieve"] and config["enable"].get(
protected( protected(
directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition") directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
), ),
protected(
directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
),
protected(directory("data/bundle-sector/jrc-idees-2015")), protected(directory("data/bundle-sector/jrc-idees-2015")),
] ]

View File

@ -117,97 +117,65 @@ to_ipcc = {
} }
def build_eurostat(input_eurostat, countries, report_year, year): def build_eurostat(input_eurostat, countries, year):
""" """
Return multi-index for all countries' energy data in TWh/a. Return multi-index for all countries' energy data in TWh/a.
""" """
if report_year != 2023: # read in every country file in countries
filenames = { eurostat = pd.DataFrame()
2016: f"/{year}-Energy-Balances-June2016edition.xlsx", countries = [country if country != 'GB' else 'UK' for country in countries]
2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx", countries = [country if country != 'GR' else 'EL' for country in countries]
for country in countries:
filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
if os.path.exists(input_eurostat + filename):
df = pd.read_excel(
input_eurostat + filename,
engine='pyxlsb',
sheet_name=str(year),
skiprows=4,
index_col=list(range(4)))
# replace entry 'Z' with 0
df.replace('Z', 0, inplace=True)
# write 'International aviation' to the 2nd level of the multiindex
index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
modified_index = list(df.index)
modified_index[index_number] = new_index
df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
# drop the annoying subhead line
df.drop(df[df[year] == year].index, inplace=True)
# replace 'Z' with 0
df = df.replace('Z', 0)
# add country to the multiindex
new_tuple = [(country, *idx) for idx in df.index]
new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
df.index = new_mindex
# make numeric values where possible
df = df.apply(pd.to_numeric, errors='coerce')
# drop non-numeric columns
non_numeric_cols = df.columns[df.dtypes != float]
df.drop(non_numeric_cols, axis=1, inplace=True)
# concatenate the dataframes
eurostat = pd.concat([eurostat, df], axis=0)
eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
# Renaming some indices
rename = {
'Households': 'Residential',
'Commercial & public services': 'Services',
'Domestic navigation': 'Domestic Navigation'
} }
for name, rename in rename.items():
eurostat.index = eurostat.index.set_levels(
eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
level=3)
new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
eurostat.index = new_index
with mute_print(): eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
dfs = pd.read_excel( eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
input_eurostat + filenames[report_year],
sheet_name=None,
skiprows=1,
index_col=list(range(4)),
)
# sorted_index necessary for slicing df = eurostat * 11.63 / 1e3
lookup = eurostat_codes
labelled_dfs = {
lookup[df.columns[0]]: df
for df in dfs.values()
if lookup[df.columns[0]] in countries
}
df = pd.concat(labelled_dfs, sort=True).sort_index()
# drop non-numeric and country columns
non_numeric_cols = df.columns[df.dtypes != float]
country_cols = df.columns.intersection(lookup.keys())
to_drop = non_numeric_cols.union(country_cols)
df.drop(to_drop, axis=1, inplace=True)
# convert ktoe/a to TWh/a
df *= 11.63 / 1e3
else:
# read in every country file in countries
eurostat = pd.DataFrame()
countries = [country if country != 'GB' else 'UK' for country in countries]
countries = [country if country != 'GR' else 'EL' for country in countries]
for country in countries:
filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
if os.path.exists(input_eurostat + filename):
df = pd.read_excel(
input_eurostat + filename,
engine='pyxlsb',
sheet_name=str(year),
skiprows=4,
index_col=list(range(4)))
# replace entry 'Z' with 0
df.replace('Z', 0, inplace=True)
# write 'International aviation' to the 2nd level of the multiindex
index_number = (df.index.get_level_values(1) == 'International aviation').argmax()
new_index = ('-', 'International aviation', 'International aviation', 'ktoe')
modified_index = list(df.index)
modified_index[index_number] = new_index
df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
# drop the annoying subhead line
df.drop(df[df[year] == year].index, inplace=True)
# replace 'Z' with 0
df = df.replace('Z', 0)
# add country to the multiindex
new_tuple = [(country, *idx) for idx in df.index]
new_mindex = pd.MultiIndex.from_tuples(new_tuple, names=['country', None, 'name', None, 'unit'])
df.index = new_mindex
# make numeric values where possible
df = df.apply(pd.to_numeric, errors='coerce')
# drop non-numeric columns
non_numeric_cols = df.columns[df.dtypes != float]
df.drop(non_numeric_cols, axis=1, inplace=True)
# concatenate the dataframes
eurostat = pd.concat([eurostat, df], axis=0)
eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
# Renaming some indices
rename = {
'Households': 'Residential',
'Commercial & public services': 'Services',
'Domestic navigation': 'Domestic Navigation'
}
for name, rename in rename.items():
eurostat.index = eurostat.index.set_levels(
eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
level=3)
new_index = eurostat.index.set_levels(eurostat.index.levels[2].where(eurostat.index.levels[2] != 'International maritime bunkers', 'Bunkers'), level=2)
eurostat.index = new_index
eurostat.rename(columns={'Total': 'Total all products'}, inplace=True)
eurostat.index = eurostat.index.set_levels(eurostat.index.levels[0].where(eurostat.index.levels[0] != 'UK', 'GB'), level=0)
df = eurostat * 11.63 / 1e3
return df return df
@ -709,8 +677,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
return emissions / 1e3 return emissions / 1e3
def build_eurostat_co2(input_eurostat, countries, report_year, year=1990): def build_eurostat_co2(input_eurostat, countries, year=1990):
eurostat = build_eurostat(input_eurostat, countries, report_year, year) eurostat = build_eurostat(input_eurostat, countries, year)
specific_emissions = pd.Series(index=eurostat.columns, dtype=float) specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
@ -727,49 +695,26 @@ def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
return eurostat.multiply(specific_emissions).sum(axis=1) return eurostat.multiply(specific_emissions).sum(axis=1)
def build_co2_totals(countries, eea_co2, eurostat_co2, report_year): def build_co2_totals(countries, eea_co2, eurostat_co2):
co2 = eea_co2.reindex(countries) co2 = eea_co2.reindex(countries)
for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]): for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]):
if report_year != 2023: mappings = {
mappings = { "electricity": (ct, "+", "Electricity & heat generation", np.nan),
"electricity": ( "residential non-elec": (ct, "+", "+", "Residential"),
ct, "services non-elec": (ct, "+", "+", "Services"),
"+", "road non-elec": (ct, "+", "+", "Road"),
"Conventional Thermal Power Stations", "rail non-elec": (ct, "+", "+", "Rail"),
"of which From Coal", "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
), "international navigation": (ct, "-", "Bunkers"),
"residential non-elec": (ct, "+", "+", "Residential"), "domestic aviation": (ct, "+", "+", "Domestic aviation"),
"services non-elec": (ct, "+", "+", "Services"), "international aviation": (ct, "-", "International aviation"),
"road non-elec": (ct, "+", "+", "Road"), # does not include industrial process emissions or fuel processing/refining
"rail non-elec": (ct, "+", "+", "Rail"), "industrial non-elec": (ct, "+", "Industry sector"),
"domestic navigation": (ct, "+", "+", "Domestic Navigation"), # does not include non-energy emissions
"international navigation": (ct, "-", "Bunkers"), "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
"domestic aviation": (ct, "+", "+", "Domestic aviation"), & eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
"international aviation": (ct, "+", "+", "International aviation"), }
# does not include industrial process emissions or fuel processing/refining
"industrial non-elec": (ct, "+", "Industry"),
# does not include non-energy emissions
"agriculture": (eurostat_co2.index.get_level_values(0) == ct)
& eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
}
else:
mappings = {
"electricity": (ct, "+", "Electricity & heat generation", np.nan),
"residential non-elec": (ct, "+", "+", "Residential"),
"services non-elec": (ct, "+", "+", "Services"),
"road non-elec": (ct, "+", "+", "Road"),
"rail non-elec": (ct, "+", "+", "Rail"),
"domestic navigation": (ct, "+", "+", "Domestic Navigation"),
"international navigation": (ct, "-", "Bunkers"),
"domestic aviation": (ct, "+", "+", "Domestic aviation"),
"international aviation": (ct, "-", "International aviation"),
# does not include industrial process emissions or fuel processing/refining
"industrial non-elec": (ct, "+", "Industry sector"),
# does not include non-energy emissions
"agriculture": (eurostat_co2.index.get_level_values(0) == ct)
& eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
}
for i, mi in mappings.items(): for i, mi in mappings.items():
co2.at[ct, i] = eurostat_co2.loc[mi].sum() co2.at[ct, i] = eurostat_co2.loc[mi].sum()
@ -820,83 +765,65 @@ def rescale(idees_countries, energy, eurostat):
''' '''
# read in the eurostat data for 2015 # read in the eurostat data for 2015
eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]] eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
# eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
eurostat_year = eurostat[["Total all products", "Electricity"]] eurostat_year = eurostat[["Total all products", "Electricity"]]
# calculate the ratio of the two data sets # calculate the ratio of the two data sets
ratio = eurostat_year / eurostat_2015 ratio = eurostat_year / eurostat_2015
ratio = ratio.droplevel([1,4]) ratio = ratio.droplevel([1,4])
ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True) ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
ratio = ratio.rename(index={"GB": "UK"}, level=0) ratio = ratio.rename(index={"EL": "GR"}, level=0)
residential_total = [ mappings = {
"total residential space", "Residential": {
"total residential water", "total": ["total residential space",
"total residential cooking", "total residential water",
"total residential", "total residential cooking",
"derived heat residential", "total residential",
"thermal uses residential", "derived heat residential",
] "thermal uses residential",],
residential_ele = [ "elec": ["electricity residential space",
"electricity residential space", "electricity residential water",
"electricity residential water", "electricity residential cooking",
"electricity residential cooking", "electricity residential",]},
"electricity residential", "Services": {
] "total": ["total services space",
"total services water",
service_total = [ "total services cooking",
"total services space", "total services",
"total services water", "derived heat services",
"total services cooking", "thermal uses services",],
"total services", "elec": ["electricity services space",
"derived heat services", "electricity services water",
"thermal uses services", "electricity services cooking",
] "electricity services",]},
service_ele = [ "Agriculture & forestry": {
"electricity services space", "total": ["total agriculture heat",
"electricity services water", "total agriculture machinery",
"electricity services cooking", "total agriculture",],
"electricity services", "elec": ["total agriculture electricity",]},
] "Road": {
"total": ["total road",
agri_total = [ "total passenger cars",
"total agriculture heat", "total other road passenger",
"total agriculture machinery", "total light duty road freight",],
"total agriculture", "elec": ["electricity road",
] "electricity passenger cars",
agri_ele = [ "electricity other road passenger",
"total agriculture electricity", "electricity light duty road freight",]},
] "Rail": {
"total": ["total rail",
road_total = [ "total rail passenger",
"total road", "total rail freight",],
"total passenger cars", "elec": ["electricity rail",
"total other road passenger", "electricity rail passenger",
"total light duty road freight", "electricity rail freight",]},
] }
road_ele = [
"electricity road",
"electricity passenger cars",
"electricity other road passenger",
"electricity light duty road freight",
]
rail_total = [
"total rail",
"total rail passenger",
"total rail freight",
]
rail_ele = [
"electricity rail",
"electricity rail passenger",
"electricity rail freight",
]
avia_inter = [ avia_inter = [
'total aviation passenger', 'total aviation passenger',
'total aviation freight', 'total aviation freight',
'total international aviation passenger', 'total international aviation passenger',
'total international aviation freight', 'total international aviation freight',
'total international aviation' 'total international aviation'
] ]
avia_domestic = [ avia_domestic = [
'total domestic aviation passenger', 'total domestic aviation passenger',
@ -907,28 +834,12 @@ def rescale(idees_countries, energy, eurostat):
"total domestic navigation", "total domestic navigation",
] ]
idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
for country in idees_countries: for country in idees_countries:
res = ratio.loc[(country, slice(None), 'Residential')] for sector, mapping in mappings.items():
energy.loc[country, residential_total] *= res[['total']].iloc[0,0] sector_ratio = ratio.loc[(country, slice(None), sector)]
energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
ser = ratio.loc[(country, slice(None), 'Services')] energy.loc[country, mapping["total"]] *= sector_ratio[['total']].iloc[0,0]
energy.loc[country, service_total] *= ser[['total']].iloc[0,0] energy.loc[country, mapping["elec"]] *= sector_ratio[['ele']].iloc[0,0]
energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
road = ratio.loc[(country, slice(None), 'Road')]
energy.loc[country, road_total] *= road[['total']].iloc[0,0]
energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
rail = ratio.loc[(country, slice(None), 'Rail')]
energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')] avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
avi_i = ratio.loc[(country, 'International aviation', slice(None))] avi_i = ratio.loc[(country, 'International aviation', slice(None))]
@ -958,9 +869,8 @@ if __name__ == "__main__":
idees_countries = pd.Index(countries).intersection(eu28) idees_countries = pd.Index(countries).intersection(eu28)
data_year = params["energy_totals_year"] data_year = params["energy_totals_year"]
report_year = snakemake.params.energy["eurostat_report_year"]
input_eurostat = snakemake.input.eurostat input_eurostat = snakemake.input.eurostat
eurostat = build_eurostat(input_eurostat, countries, report_year, data_year) eurostat = build_eurostat(input_eurostat, countries, data_year)
swiss = build_swiss(data_year) swiss = build_swiss(data_year)
# data from idees only exists for 2015 # data from idees only exists for 2015
if data_year > 2015: if data_year > 2015:
@ -984,10 +894,10 @@ if __name__ == "__main__":
emissions_scope = snakemake.params.energy["emissions"] emissions_scope = snakemake.params.energy["emissions"]
eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope) eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
eurostat_co2 = build_eurostat_co2( eurostat_co2 = build_eurostat_co2(
input_eurostat, countries, report_year, base_year_emissions input_eurostat, countries, base_year_emissions
) )
co2 = build_co2_totals(countries, eea_co2, eurostat_co2, report_year) co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
co2.to_csv(snakemake.output.co2_name) co2.to_csv(snakemake.output.co2_name)
transport = build_transport_data(countries, population, idees) transport = build_transport_data(countries, population, idees)

View File

@ -8,6 +8,7 @@ Retrieve and extract data bundle for sector-coupled studies.
import logging import logging
import tarfile import tarfile
import zipfile
from pathlib import Path from pathlib import Path
from _helpers import ( from _helpers import (
@ -47,3 +48,16 @@ if __name__ == "__main__":
tarball_fn.unlink() tarball_fn.unlink()
logger.info(f"Databundle available in '{to_fn}'.") logger.info(f"Databundle available in '{to_fn}'.")
url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
logger.info("Extracting Eurostat data.")
with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
zip_ref.extractall(to_fn)
logger.info(f"Eurostat data available in '{to_fn}'.")