energy_totals: update eurostat to 2021

2022-07-23 10:37:32 +02:00 · 2022-07-23 10:37:32 +02:00 · d47a9aac72
commit d47a9aac72
parent 6e47c71195
2 changed files with 68 additions and 95 deletions
--- a/10
+++ b/10
@ -54,8 +54,7 @@ datafiles = [
    "data/emobility/KFZ__count",
    "data/emobility/Pkw__count",
    "data/h2_salt_caverns_GWh_per_sqkm.geojson",
-    directory("data/eurostat-energy_balances-june_2016_edition"),
+    directory("data/eurostat-energy_balances-april_2022_edition"),
    directory("data/eurostat-energy_balances-may_2018_edition"),
    directory("data/jrc-idees-2015"),
 ]
@ -235,11 +234,6 @@ rule build_solar_thermal_profiles:
    script: "scripts/build_solar_thermal_profiles.py"
 def input_eurostat(w):
    # 2016 includes BA, 2017 does not
    report_year = config["energy"]["eurostat_report_year"]
    return f"data/eurostat-energy_balances-june_{report_year}_edition"
 rule build_energy_totals:
    input:
        nuts3_shapes=pypsaeur('resources/nuts3_shapes.geojson'),
@ -247,7 +241,7 @@ rule build_energy_totals:
        swiss="data/switzerland-sfoe/switzerland-new_format.csv",
        idees="data/jrc-idees-2015",
        district_heat_share='data/district_heat_share.csv',
-        eurostat=input_eurostat
+        eurostat=directory("data/eurostat-energy_balances-june_2021_edition"),
    output:
        energy_name='resources/energy_totals.csv',
 	    co2_name='resources/co2_totals.csv',
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@ -17,55 +17,6 @@ def reverse(dictionary):
    """reverses a keys and values of a dictionary"""
    return {v: k for k, v in dictionary.items()}
 # translations for Eurostat
 eurostat_country_to_alpha2 = {
    "EU28": "EU",
    "EA19": "EA",
    "Belgium": "BE",
    "Bulgaria": "BG",
    "Czech Republic": "CZ",
    "Denmark": "DK",
    "Germany": "DE",
    "Estonia": "EE",
    "Ireland": "IE",
    "Greece": "GR",
    "Spain": "ES",
    "France": "FR",
    "Croatia": "HR",
    "Italy": "IT",
    "Cyprus": "CY",
    "Latvia": "LV",
    "Lithuania": "LT",
    "Luxembourg": "LU",
    "Hungary": "HU",
    "Malta": "MA",
    "Netherlands": "NL",
    "Austria": "AT",
    "Poland": "PL",
    "Portugal": "PT",
    "Romania": "RO",
    "Slovenia": "SI",
    "Slovakia": "SK",
    "Finland": "FI",
    "Sweden": "SE",
    "United Kingdom": "GB",
    "Iceland": "IS",
    "Norway": "NO",
    "Montenegro": "ME",
    "FYR of Macedonia": "MK",
    "Albania": "AL",
    "Serbia": "RS",
    "Turkey": "TU",
    "Bosnia and Herzegovina": "BA",
    "Kosovo\n(UNSCR 1244/99)": "KO",  # 2017 version
    # 2016 version
    "Kosovo\n(under United Nations Security Council Resolution 1244/99)": "KO",
    "Moldova": "MO",
    "Ukraine": "UK",
    "Switzerland": "CH",
 }
 non_EU = ["NO", "CH", "ME", "MK", "RS", "BA", "AL"]
 idees_rename = {"GR": "EL", "GB": "UK"}
@ -127,47 +78,68 @@ to_ipcc = {
 }
-def build_eurostat(countries, year):
+def eurostat_per_country(country):
    """Return multi-index for all countries' energy data in TWh/a."""
-    report_year = snakemake.config["energy"]["eurostat_report_year"]
+    country_fn = idees_rename.get(country, country)
-    filenames = {
+    fn = snakemake.input.eurostat + f"/{country_fn}-Energy-balance-sheets-June-2021-edition.xlsb"
        2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
        2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx"
    }
-    dfs = pd.read_excel(
+    df = pd.read_excel(
-        snakemake.input.eurostat + filenames[report_year],
+        fn,
        sheet_name=None,
-        skiprows=1,
+        skiprows=4,
-        index_col=list(range(4)),
+        index_col=list(range(3)),
        na_values=["+", "-", "=", "Z", ":"],
    )
-    # sorted_index necessary for slicing
+    df.pop("Cover")
    lookup = eurostat_country_to_alpha2
    labelled_dfs = {lookup[df.columns[0]]: df
                    for df in dfs.values()
                    if lookup[df.columns[0]] in countries}
    df = pd.concat(labelled_dfs, sort=True).sort_index()
-    # drop non-numeric and country columns
+    return pd.concat(df)
-    non_numeric_cols = df.columns[df.dtypes != float]
+
-    country_cols = df.columns.intersection(lookup.keys())
+
-    to_drop = non_numeric_cols.union(country_cols)
+def build_eurostat(countries, year=None):
-    df.drop(to_drop, axis=1, inplace=True)
+    """Return multi-index for all countries' energy data in TWh/a."""
    nprocesses = snakemake.threads
    tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries),
                    desc='Build from eurostat database')
    with mp.Pool(processes=nprocesses) as pool:
        dfs = list(tqdm(pool.imap(eurostat_per_country, countries), **tqdm_kwargs))
    index_names = ['country', 'year', 'lvl1', 'lvl2', 'lvl3']
    df = pd.concat(dfs, keys=countries, names=index_names)
    df.dropna(how='all', axis=0, inplace=True)
    df.dropna(how='all', axis=1, inplace=True)
    df = df[df.index.get_level_values('lvl1') != 'ktoe']
    i = df.index.to_frame(index=False)
    i.loc[i.lvl2 == 'Primary production', ['lvl1', 'lvl3']] = 'Main'
    i.loc[i.lvl2 == 'Gross electricity production', 'lvl1'] = "Gross production"
    i.ffill(inplace=True)
    df.index = pd.MultiIndex.from_frame(i)
    df.drop(list(range(1990, 2020)), axis=1, inplace=True)
    df.drop("Unnamed: 7", axis=1, inplace=True)
    df.fillna(0., inplace=True)
    # convert ktoe/a to TWh/a
    df *= 11.63 / 1e3
    if year:
        df = df.xs(str(year), level='year')
    return df
-def build_swiss(year):
+def build_swiss(year=None):
    """Return a pd.Series of Swiss energy data in TWh/a"""
    fn = snakemake.input.swiss
-    df = pd.read_csv(fn, index_col=[0,1]).loc["CH", str(year)]
+    df = pd.read_csv(fn, index_col=[0,1]).loc["CH"]
    if year:
        df = df[str(year)]
    # convert PJ/a to TWh/a
    df /= 3.6
@ -406,8 +378,17 @@ def build_idees(countries, year):
 def build_energy_totals(countries, eurostat, swiss, idees):
-    eurostat_fuels = {"electricity": "Electricity",
+    eurostat_fuels = dict(
-                      "total": "Total all products"}
+        electricity="Electricity",
        total="Total"
    )
    eurostat_sectors = dict(
        residential="Households",
        services="Commercial & public services",
        road="Road",
        rail="Rail"
    )
    to_drop = ["passenger cars", "passenger car efficiency"]
    df = idees.reindex(countries).drop(to_drop, axis=1)
@ -417,8 +398,8 @@ def build_energy_totals(countries, eurostat, swiss, idees):
    # add international navigation
-    slicer = idx[in_eurostat, :, "Bunkers", :]
+    slicer = idx[in_eurostat, :, "International maritime bunkers", :]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[in_eurostat, "total international navigation"] = fill_values
    # add swiss energy data
@ -434,12 +415,10 @@ def build_energy_totals(countries, eurostat, swiss, idees):
    for sector in ["residential", "services", "road", "rail"]:
        eurostat_sector = sector.capitalize()
        # fuel use
        for fuel in ["electricity", "total"]:
-            slicer = idx[to_fill, :, :, eurostat_sector]
+            slicer = idx[to_fill, :, :, eurostat_sectors[sector]]
            fill_values = eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=0).sum()
            df.loc[to_fill, f"{fuel} {sector}"] = fill_values
@ -489,17 +468,17 @@ def build_energy_totals(countries, eurostat, swiss, idees):
    # Missing aviation
    slicer = idx[to_fill, :, :, "Domestic aviation"]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[to_fill, "total domestic aviation"] = fill_values
-    slicer = idx[to_fill, :, :, "International aviation"]
+    slicer = idx[to_fill, :, "International aviation", :]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[to_fill, "total international aviation"] = fill_values
    # missing domestic navigation
-    slicer = idx[to_fill, :, :, "Domestic Navigation"]
+    slicer = idx[to_fill, :, :, "Domestic navigation"]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[to_fill, "total domestic navigation"] = fill_values
    # split road traffic for non-IDEES
@ -702,7 +681,7 @@ if __name__ == "__main__":
    idees_countries = countries.intersection(eu28)
    data_year = config["energy_totals_year"]
-    eurostat = build_eurostat(countries, data_year)
+    eurostat = build_eurostat(countries.difference(['CH']), data_year)
    swiss = build_swiss(data_year)
    idees = build_idees(idees_countries, data_year)