energy_totals: update eurostat to 2021

2022-07-23 10:37:32 +02:00 · 2022-07-23 10:37:32 +02:00 · d47a9aac72
commit d47a9aac72
parent 6e47c71195
2 changed files with 68 additions and 95 deletions
--- a/10
+++ b/10
@ -54,8 +54,7 @@ datafiles = [
    "data/emobility/KFZ__count",
    "data/emobility/Pkw__count",
    "data/h2_salt_caverns_GWh_per_sqkm.geojson",
-    directory("data/eurostat-energy_balances-june_2016_edition"),
-    directory("data/eurostat-energy_balances-may_2018_edition"),
+    directory("data/eurostat-energy_balances-april_2022_edition"),
    directory("data/jrc-idees-2015"),
 ]

@ -235,11 +234,6 @@ rule build_solar_thermal_profiles:
    script: "scripts/build_solar_thermal_profiles.py"


-def input_eurostat(w):
-    # 2016 includes BA, 2017 does not
-    report_year = config["energy"]["eurostat_report_year"]
-    return f"data/eurostat-energy_balances-june_{report_year}_edition"
-
 rule build_energy_totals:
    input:
        nuts3_shapes=pypsaeur('resources/nuts3_shapes.geojson'),
@ -247,7 +241,7 @@ rule build_energy_totals:
        swiss="data/switzerland-sfoe/switzerland-new_format.csv",
        idees="data/jrc-idees-2015",
        district_heat_share='data/district_heat_share.csv',
-        eurostat=input_eurostat
+        eurostat=directory("data/eurostat-energy_balances-june_2021_edition"),
    output:
        energy_name='resources/energy_totals.csv',
 	    co2_name='resources/co2_totals.csv',
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@ -17,55 +17,6 @@ def reverse(dictionary):
    """reverses a keys and values of a dictionary"""
    return {v: k for k, v in dictionary.items()}

-
-# translations for Eurostat
-eurostat_country_to_alpha2 = {
-    "EU28": "EU",
-    "EA19": "EA",
-    "Belgium": "BE",
-    "Bulgaria": "BG",
-    "Czech Republic": "CZ",
-    "Denmark": "DK",
-    "Germany": "DE",
-    "Estonia": "EE",
-    "Ireland": "IE",
-    "Greece": "GR",
-    "Spain": "ES",
-    "France": "FR",
-    "Croatia": "HR",
-    "Italy": "IT",
-    "Cyprus": "CY",
-    "Latvia": "LV",
-    "Lithuania": "LT",
-    "Luxembourg": "LU",
-    "Hungary": "HU",
-    "Malta": "MA",
-    "Netherlands": "NL",
-    "Austria": "AT",
-    "Poland": "PL",
-    "Portugal": "PT",
-    "Romania": "RO",
-    "Slovenia": "SI",
-    "Slovakia": "SK",
-    "Finland": "FI",
-    "Sweden": "SE",
-    "United Kingdom": "GB",
-    "Iceland": "IS",
-    "Norway": "NO",
-    "Montenegro": "ME",
-    "FYR of Macedonia": "MK",
-    "Albania": "AL",
-    "Serbia": "RS",
-    "Turkey": "TU",
-    "Bosnia and Herzegovina": "BA",
-    "Kosovo\n(UNSCR 1244/99)": "KO",  # 2017 version
-    # 2016 version
-    "Kosovo\n(under United Nations Security Council Resolution 1244/99)": "KO",
-    "Moldova": "MO",
-    "Ukraine": "UK",
-    "Switzerland": "CH",
-}
-
 non_EU = ["NO", "CH", "ME", "MK", "RS", "BA", "AL"]

 idees_rename = {"GR": "EL", "GB": "UK"}
@ -127,47 +78,68 @@ to_ipcc = {
 }


-def build_eurostat(countries, year):
-    """Return multi-index for all countries' energy data in TWh/a."""
+def eurostat_per_country(country):
+    
+    country_fn = idees_rename.get(country, country)
+    fn = snakemake.input.eurostat + f"/{country_fn}-Energy-balance-sheets-June-2021-edition.xlsb"

-    report_year = snakemake.config["energy"]["eurostat_report_year"]
-    filenames = {
-        2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
-        2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx"
-    }
-
-    dfs = pd.read_excel(
-        snakemake.input.eurostat + filenames[report_year],
+    df = pd.read_excel(
+        fn,
        sheet_name=None,
-        skiprows=1,
-        index_col=list(range(4)),
+        skiprows=4,
+        index_col=list(range(3)),
+        na_values=["+", "-", "=", "Z", ":"],
    )

-    # sorted_index necessary for slicing
-    lookup = eurostat_country_to_alpha2
-    labelled_dfs = {lookup[df.columns[0]]: df
-                    for df in dfs.values()
-                    if lookup[df.columns[0]] in countries}
-    df = pd.concat(labelled_dfs, sort=True).sort_index()
+    df.pop("Cover")

-    # drop non-numeric and country columns
-    non_numeric_cols = df.columns[df.dtypes != float]
-    country_cols = df.columns.intersection(lookup.keys())
-    to_drop = non_numeric_cols.union(country_cols)
-    df.drop(to_drop, axis=1, inplace=True)
+    return pd.concat(df)
+
+
+def build_eurostat(countries, year=None):
+    """Return multi-index for all countries' energy data in TWh/a."""
+
+    nprocesses = snakemake.threads
+    tqdm_kwargs = dict(ascii=False, unit=' country', total=len(countries),
+                    desc='Build from eurostat database')
+    with mp.Pool(processes=nprocesses) as pool:
+        dfs = list(tqdm(pool.imap(eurostat_per_country, countries), **tqdm_kwargs))
+
+    index_names = ['country', 'year', 'lvl1', 'lvl2', 'lvl3']
+    df = pd.concat(dfs, keys=countries, names=index_names)
+
+    df.dropna(how='all', axis=0, inplace=True)
+    df.dropna(how='all', axis=1, inplace=True)
+    df = df[df.index.get_level_values('lvl1') != 'ktoe']
+
+    i = df.index.to_frame(index=False)
+    i.loc[i.lvl2 == 'Primary production', ['lvl1', 'lvl3']] = 'Main'
+    i.loc[i.lvl2 == 'Gross electricity production', 'lvl1'] = "Gross production"
+    i.ffill(inplace=True)
+    df.index = pd.MultiIndex.from_frame(i)
+
+    df.drop(list(range(1990, 2020)), axis=1, inplace=True)
+    df.drop("Unnamed: 7", axis=1, inplace=True)
+    df.fillna(0., inplace=True)

    # convert ktoe/a to TWh/a
    df *= 11.63 / 1e3

+    if year:
+        df = df.xs(str(year), level='year')
+
    return df


-def build_swiss(year):
+def build_swiss(year=None):
    """Return a pd.Series of Swiss energy data in TWh/a"""

    fn = snakemake.input.swiss

-    df = pd.read_csv(fn, index_col=[0,1]).loc["CH", str(year)]
+    df = pd.read_csv(fn, index_col=[0,1]).loc["CH"]
+
+    if year:
+        df = df[str(year)]

    # convert PJ/a to TWh/a
    df /= 3.6
@ -406,8 +378,17 @@ def build_idees(countries, year):

 def build_energy_totals(countries, eurostat, swiss, idees):

-    eurostat_fuels = {"electricity": "Electricity",
-                      "total": "Total all products"}
+    eurostat_fuels = dict(
+        electricity="Electricity",
+        total="Total"
+    )
+
+    eurostat_sectors = dict(
+        residential="Households",
+        services="Commercial & public services",
+        road="Road",
+        rail="Rail"
+    )

    to_drop = ["passenger cars", "passenger car efficiency"]
    df = idees.reindex(countries).drop(to_drop, axis=1)
@ -417,8 +398,8 @@ def build_energy_totals(countries, eurostat, swiss, idees):

    # add international navigation

-    slicer = idx[in_eurostat, :, "Bunkers", :]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    slicer = idx[in_eurostat, :, "International maritime bunkers", :]
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[in_eurostat, "total international navigation"] = fill_values

    # add swiss energy data
@ -434,12 +415,10 @@ def build_energy_totals(countries, eurostat, swiss, idees):

    for sector in ["residential", "services", "road", "rail"]:

-        eurostat_sector = sector.capitalize()
-
        # fuel use

        for fuel in ["electricity", "total"]:
-            slicer = idx[to_fill, :, :, eurostat_sector]
+            slicer = idx[to_fill, :, :, eurostat_sectors[sector]]
            fill_values = eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=0).sum()
            df.loc[to_fill, f"{fuel} {sector}"] = fill_values

@ -489,17 +468,17 @@ def build_energy_totals(countries, eurostat, swiss, idees):
    # Missing aviation

    slicer = idx[to_fill, :, :, "Domestic aviation"]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[to_fill, "total domestic aviation"] = fill_values

-    slicer = idx[to_fill, :, :, "International aviation"]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    slicer = idx[to_fill, :, "International aviation", :]
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[to_fill, "total international aviation"] = fill_values

    # missing domestic navigation

-    slicer = idx[to_fill, :, :, "Domestic Navigation"]
-    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
+    slicer = idx[to_fill, :, :, "Domestic navigation"]
+    fill_values = eurostat.loc[slicer, "Total"].groupby(level=0).sum()
    df.loc[to_fill, "total domestic navigation"] = fill_values

    # split road traffic for non-IDEES
@ -702,7 +681,7 @@ if __name__ == "__main__":
    idees_countries = countries.intersection(eu28)

    data_year = config["energy_totals_year"]
-    eurostat = build_eurostat(countries, data_year)
+    eurostat = build_eurostat(countries.difference(['CH']), data_year)
    swiss = build_swiss(data_year)
    idees = build_idees(idees_countries, data_year)