energy_totals: update eurostat to 2021

This commit is contained in:
Fabian Neumann 2022-07-23 10:37:32 +02:00
parent 6e47c71195
commit d47a9aac72
2 changed files with 68 additions and 95 deletions

View File

@ -54,8 +54,7 @@ datafiles = [
"data/emobility/KFZ__count",
"data/emobility/Pkw__count",
"data/h2_salt_caverns_GWh_per_sqkm.geojson",
directory("data/eurostat-energy_balances-june_2016_edition"),
directory("data/eurostat-energy_balances-may_2018_edition"),
directory("data/eurostat-energy_balances-april_2022_edition"),
directory("data/jrc-idees-2015"),
]
@ -235,11 +234,6 @@ rule build_solar_thermal_profiles:
script: "scripts/build_solar_thermal_profiles.py"
def input_eurostat(w):
# 2016 includes BA, 2017 does not
report_year = config["energy"]["eurostat_report_year"]
return f"data/eurostat-energy_balances-june_{report_year}_edition"
rule build_energy_totals:
input:
nuts3_shapes=pypsaeur('resources/nuts3_shapes.geojson'),
@ -247,7 +241,7 @@ rule build_energy_totals:
swiss="data/switzerland-sfoe/switzerland-new_format.csv",
idees="data/jrc-idees-2015",
district_heat_share='data/district_heat_share.csv',
eurostat=input_eurostat
eurostat=directory("data/eurostat-energy_balances-june_2021_edition"),
output:
energy_name='resources/energy_totals.csv',
co2_name='resources/co2_totals.csv',

View File

@ -17,55 +17,6 @@ def reverse(dictionary):
"""reverses a keys and values of a dictionary"""
return {v: k for k, v in dictionary.items()}
# translations for Eurostat
eurostat_country_to_alpha2 = {
"EU28": "EU",
"EA19": "EA",
"Belgium": "BE",
"Bulgaria": "BG",
"Czech Republic": "CZ",
"Denmark": "DK",
"Germany": "DE",
"Estonia": "EE",
"Ireland": "IE",
"Greece": "GR",
"Spain": "ES",
"France": "FR",
"Croatia": "HR",
"Italy": "IT",
"Cyprus": "CY",
"Latvia": "LV",
"Lithuania": "LT",
"Luxembourg": "LU",
"Hungary": "HU",
"Malta": "MA",
"Netherlands": "NL",
"Austria": "AT",
"Poland": "PL",
"Portugal": "PT",
"Romania": "RO",
"Slovenia": "SI",
"Slovakia": "SK",
"Finland": "FI",
"Sweden": "SE",
"United Kingdom": "GB",
"Iceland": "IS",
"Norway": "NO",
"Montenegro": "ME",
"FYR of Macedonia": "MK",
"Albania": "AL",
"Serbia": "RS",
"Turkey": "TU",
"Bosnia and Herzegovina": "BA",
"Kosovo\n(UNSCR 1244/99)": "KO", # 2017 version
# 2016 version
"Kosovo\n(under United Nations Security Council Resolution 1244/99)": "KO",
"Moldova": "MO",
"Ukraine": "UK",
"Switzerland": "CH",
}
non_EU = ["NO", "CH", "ME", "MK", "RS", "BA", "AL"]
idees_rename = {"GR": "EL", "GB": "UK"}
@ -127,47 +78,68 @@ to_ipcc = {
}
def build_eurostat(countries, year):
"""Return multi-index for all countries' energy data in TWh/a."""
def eurostat_per_country(country):
report_year = snakemake.config["energy"]["eurostat_report_year"]
filenames = {
2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx"
}
country_fn = idees_rename.get(country, country)
fn = snakemake.input.eurostat + f"/{country_fn}-Energy-balance-sheets-June-2021-edition.xlsb"
dfs = pd.read_excel(
snakemake.input.eurostat + filenames[report_year],
df = pd.read_excel(
fn,
sheet_name=None,
skiprows=1,
index_col=list(range(4)),
skiprows=4,
index_col=list(range(3)),
na_values=["+", "-", "=", "Z", ":"],
)
# sorted_index necessary for slicing
lookup = eurostat_country_to_alpha2
labelled_dfs = {lookup[df.columns[0]]: df
for df in dfs.values()
if lookup[df.columns[0]] in countries}
df = pd.concat(labelled_dfs, sort=True).sort_index()
df.pop("Cover")
# drop non-numeric and country columns
non_numeric_cols = df.columns[df.dtypes != float]
country_cols = df.columns.intersection(lookup.keys())
to_drop = non_numeric_cols.union(country_cols)
df.drop(to_drop, axis=1, inplace=True)
return pd.concat(df)
def build_eurostat(countries, year=None):
"""Return multi-index for all countries' energy data in TWh/a."""
nprocesses = snakemake.threads
tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries),
desc='Build from eurostat database')
with mp.Pool(processes=nprocesses) as pool:
dfs = list(tqdm(pool.imap(eurostat_per_country, countries), **tqdm_kwargs))
index_names = ['country', 'year', 'lvl1', 'lvl2', 'lvl3']
df = pd.concat(dfs, keys=countries, names=index_names)
df.dropna(how='all', axis=0, inplace=True)
df.dropna(how='all', axis=1, inplace=True)
df = df[df.index.get_level_values('lvl1') != 'ktoe']
i = df.index.to_frame(index=False)
i.loc[i.lvl2 == 'Primary production', ['lvl1', 'lvl3']] = 'Main'
i.loc[i.lvl2 == 'Gross electricity production', 'lvl1'] = "Gross production"
i.ffill(inplace=True)
df.index = pd.MultiIndex.from_frame(i)
df.drop(list(range(1990, 2020)), axis=1, inplace=True)
df.drop("Unnamed: 7", axis=1, inplace=True)
df.fillna(0., inplace=True)
# convert ktoe/a to TWh/a
df *= 11.63 / 1e3
if year:
df = df.xs(str(year), level='year')
return df
def build_swiss(year):
def build_swiss(year=None):
"""Return a pd.Series of Swiss energy data in TWh/a"""
fn = snakemake.input.swiss
df = pd.read_csv(fn, index_col=[0,1]).loc["CH", str(year)]
df = pd.read_csv(fn, index_col=[0,1]).loc["CH"]
if year:
df = df[str(year)]
# convert PJ/a to TWh/a
df /= 3.6
@ -406,8 +378,17 @@ def build_idees(countries, year):
def build_energy_totals(countries, eurostat, swiss, idees):
eurostat_fuels = {"electricity": "Electricity",
"total": "Total all products"}
eurostat_fuels = dict(
electricity="Electricity",
total="Total"
)
eurostat_sectors = dict(
residential="Households",
services="Commercial & public services",
road="Road",
rail="Rail"
)
to_drop = ["passenger cars", "passenger car efficiency"]
df = idees.reindex(countries).drop(to_drop, axis=1)
@ -417,8 +398,8 @@ def build_energy_totals(countries, eurostat, swiss, idees):
# add international navigation
slicer = idx[in_eurostat, :, "Bunkers", :]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
slicer = idx[in_eurostat, :, "International maritime bunkers", :]
fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
df.loc[in_eurostat, "total international navigation"] = fill_values
# add swiss energy data
@ -434,12 +415,10 @@ def build_energy_totals(countries, eurostat, swiss, idees):
for sector in ["residential", "services", "road", "rail"]:
eurostat_sector = sector.capitalize()
# fuel use
for fuel in ["electricity", "total"]:
slicer = idx[to_fill, :, :, eurostat_sector]
slicer = idx[to_fill, :, :, eurostat_sectors[sector]]
fill_values = eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=0).sum()
df.loc[to_fill, f"{fuel} {sector}"] = fill_values
@ -489,17 +468,17 @@ def build_energy_totals(countries, eurostat, swiss, idees):
# Missing aviation
slicer = idx[to_fill, :, :, "Domestic aviation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
df.loc[to_fill, "total domestic aviation"] = fill_values
slicer = idx[to_fill, :, :, "International aviation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
slicer = idx[to_fill, :, "International aviation", :]
fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
df.loc[to_fill, "total international aviation"] = fill_values
# missing domestic navigation
slicer = idx[to_fill, :, :, "Domestic Navigation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
slicer = idx[to_fill, :, :, "Domestic navigation"]
fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
df.loc[to_fill, "total domestic navigation"] = fill_values
# split road traffic for non-IDEES
@ -702,7 +681,7 @@ if __name__ == "__main__":
idees_countries = countries.intersection(eu28)
data_year = config["energy_totals_year"]
eurostat = build_eurostat(countries, data_year)
eurostat = build_eurostat(countries.difference(['CH']), data_year)
swiss = build_swiss(data_year)
idees = build_idees(idees_countries, data_year)