removing old eurostat data reports as an option, cleaning up code

2024-02-27 12:04:07 +01:00 · 2024-02-27 12:04:07 +01:00 · d363aeb57d
commit d363aeb57d
parent 3298572ced
7 changed files with 152 additions and 234 deletions
--- a/config/config.default.yaml
+++ b/config/config.default.yaml
@ -316,7 +316,6 @@ pypsa_eur:
 energy:
  energy_totals_year: 2019
  base_emissions_year: 1990
-  eurostat_report_year: 2023
  emissions: CO2

 # docs in https://pypsa-eur.readthedocs.io/en/latest/configuration.html#biomass
--- a/rules/build_sector.smk
+++ b/rules/build_sector.smk
@ -270,7 +270,7 @@ rule build_energy_totals:
        swiss="data/switzerland-new_format-all_years.csv",
        idees="data/bundle-sector/jrc-idees-2015",
        district_heat_share="data/district_heat_share.csv",
-        eurostat=input_eurostat,
+        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
    output:
        energy_name=resources("energy_totals.csv"),
        co2_name=resources("co2_totals.csv"),
@ -865,7 +865,7 @@ rule prepare_sector_network:
        ),
        network=resources("networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc"),
        energy_totals_name=resources("energy_totals.csv"),
-        eurostat=input_eurostat,
+        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
        pop_weighted_energy_totals=resources(
            "pop_weighted_energy_totals_s{simpl}_{clusters}.csv"
        ),
--- a/rules/common.smk
+++ b/rules/common.smk
@ -129,14 +129,6 @@ def has_internet_access(url="www.zenodo.org") -> bool:
    finally:
        conn.close()

-
-def input_eurostat(w):
-    if config["energy"]["eurostat_report_year"] != 2023:
-        report_year = config["energy"]["eurostat_report_year"]
-        return f"data/bundle-sector/eurostat-energy_balances-june_{report_year}_edition"
-    else:
-        return "data/bundle-sector/eurostat-energy_balances-april_2023_edition"
-
 def solved_previous_horizon(w):
    planning_horizons = config_provider("scenario", "planning_horizons")(w)
    i = planning_horizons.index(int(w.planning_horizons))
--- a/rules/postprocess.smk
+++ b/rules/postprocess.smk
@ -247,7 +247,7 @@ rule plot_summary:
        costs=RESULTS + "csvs/costs.csv",
        energy=RESULTS + "csvs/energy.csv",
        balances=RESULTS + "csvs/supply_energy.csv",
-        eurostat=input_eurostat,
+        eurostat="data/bundle-sector/eurostat-energy_balances-april_2023_edition",
        co2="data/bundle-sector/eea/UNFCCC_v23.csv",
    output:
        costs=RESULTS + "graphs/costs.pdf",
--- a/rules/retrieve.smk
+++ b/rules/retrieve.smk
@ -142,6 +142,9 @@ if config["enable"]["retrieve"] and config["enable"].get(
        protected(
            directory("data/bundle-sector/eurostat-energy_balances-may_2018_edition")
        ),
+        protected(
+            directory("data/bundle-sector/eurostat-energy_balances-april_2023_edition")
+        ),
        protected(directory("data/bundle-sector/jrc-idees-2015")),
    ]

--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@ -117,42 +117,10 @@ to_ipcc = {
 }


-def build_eurostat(input_eurostat, countries, report_year, year):
+def build_eurostat(input_eurostat, countries, year):
    """
    Return multi-index for all countries' energy data in TWh/a.
    """
-    if report_year != 2023:
-        filenames = {
-        2016: f"/{year}-Energy-Balances-June2016edition.xlsx",
-        2017: f"/{year}-ENERGY-BALANCES-June2017edition.xlsx",
-    }
-
-        with mute_print():
-            dfs = pd.read_excel(
-                input_eurostat + filenames[report_year],
-                sheet_name=None,
-                skiprows=1,
-                index_col=list(range(4)),
-            )
-
-        # sorted_index necessary for slicing
-        lookup = eurostat_codes
-        labelled_dfs = {
-            lookup[df.columns[0]]: df
-            for df in dfs.values()
-            if lookup[df.columns[0]] in countries
-        }
-        df = pd.concat(labelled_dfs, sort=True).sort_index()
-        # drop non-numeric and country columns
-        non_numeric_cols = df.columns[df.dtypes != float]
-        country_cols = df.columns.intersection(lookup.keys())
-        to_drop = non_numeric_cols.union(country_cols)
-        df.drop(to_drop, axis=1, inplace=True)
-
-        # convert ktoe/a to TWh/a
-        df *= 11.63 / 1e3
-    
-    else:
    # read in every country file in countries
    eurostat = pd.DataFrame()
    countries = [country if country != 'GB' else 'UK' for country in countries]
@ -709,8 +677,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
    return emissions / 1e3


-def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
-    eurostat = build_eurostat(input_eurostat, countries, report_year, year)
+def build_eurostat_co2(input_eurostat, countries, year=1990):
+    eurostat = build_eurostat(input_eurostat, countries, year)

    specific_emissions = pd.Series(index=eurostat.columns, dtype=float)

@ -727,33 +695,10 @@ def build_eurostat_co2(input_eurostat, countries, report_year, year=1990):
    return eurostat.multiply(specific_emissions).sum(axis=1)


-def build_co2_totals(countries, eea_co2, eurostat_co2, report_year):
+def build_co2_totals(countries, eea_co2, eurostat_co2):
    co2 = eea_co2.reindex(countries)

    for ct in pd.Index(countries).intersection(["BA", "RS", "AL", "ME", "MK"]):
-        if report_year != 2023:
-            mappings = {
-                "electricity": (
-                    ct,
-                    "+",
-                    "Conventional Thermal Power Stations",
-                    "of which From Coal",
-                ),
-                "residential non-elec": (ct, "+", "+", "Residential"),
-                "services non-elec": (ct, "+", "+", "Services"),
-                "road non-elec": (ct, "+", "+", "Road"),
-                "rail non-elec": (ct, "+", "+", "Rail"),
-                "domestic navigation": (ct, "+", "+", "Domestic Navigation"),
-                "international navigation": (ct, "-", "Bunkers"),
-                "domestic aviation": (ct, "+", "+", "Domestic aviation"),
-                "international aviation": (ct, "+", "+", "International aviation"),
-                # does not include industrial process emissions or fuel processing/refining
-                "industrial non-elec": (ct, "+", "Industry"),
-                # does not include non-energy emissions
-                "agriculture": (eurostat_co2.index.get_level_values(0) == ct)
-                & eurostat_co2.index.isin(["Agriculture / Forestry", "Fishing"], level=3),
-            }
-        else:
        mappings = {
            "electricity": (ct, "+", "Electricity & heat generation", np.nan),
            "residential non-elec": (ct, "+", "+", "Residential"),
@ -820,76 +765,58 @@ def rescale(idees_countries, energy, eurostat):
    '''
    # read in the eurostat data for 2015
    eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[["Total all products", "Electricity"]]
-    # eurostat_2015 = eurostat_2015.rename(index={'GB': 'UK'}, level=0)
    eurostat_year = eurostat[["Total all products", "Electricity"]]
    # calculate the ratio of the two data sets
    ratio = eurostat_year / eurostat_2015
    ratio = ratio.droplevel([1,4])
    ratio.rename(columns={"Total all products": "total", "Electricity": "ele"}, inplace=True)
-    ratio = ratio.rename(index={"GB": "UK"}, level=0)
+    ratio = ratio.rename(index={"EL": "GR"}, level=0)

-    residential_total = [
-        "total residential space",
+    mappings = {
+        "Residential": {
+            "total": ["total residential space",
                      "total residential water",
                      "total residential cooking",
                      "total residential",
                      "derived heat residential",
-        "thermal uses residential",
-    ]
-    residential_ele = [
-        "electricity residential space",
+                      "thermal uses residential",],
+            "elec": ["electricity residential space",
                     "electricity residential water",
                     "electricity residential cooking",
-        "electricity residential",
-    ]
-
-    service_total = [
-        "total services space",
+                     "electricity residential",]},
+        "Services": {
+            "total": ["total services space",
                      "total services water",
                      "total services cooking",
                      "total services",
                      "derived heat services",
-        "thermal uses services",
-    ]
-    service_ele = [
-        "electricity services space",
+                      "thermal uses services",],
+            "elec": ["electricity services space",
                     "electricity services water",
                     "electricity services cooking",
-        "electricity services",
-    ]
-
-    agri_total = [
-        "total agriculture heat",
+                     "electricity services",]},
+        "Agriculture & forestry": {
+            "total": ["total agriculture heat",
                      "total agriculture machinery",
-        "total agriculture",
-    ]
-    agri_ele = [
-        "total agriculture electricity",
-    ]
-
-    road_total = [
-        "total road",
+                      "total agriculture",],
+            "elec": ["total agriculture electricity",]},
+        "Road": {
+            "total": ["total road",
                      "total passenger cars",
                      "total other road passenger",
-        "total light duty road freight",
-    ]
-    road_ele = [
-        "electricity road",
+                      "total light duty road freight",],
+            "elec": ["electricity road",
                     "electricity passenger cars",
                     "electricity other road passenger",
-        "electricity light duty road freight",
-    ]
-
-    rail_total = [
-        "total rail",
+                     "electricity light duty road freight",]},
+        "Rail": {
+            "total": ["total rail",
                      "total rail passenger",
-        "total rail freight",
-    ]
-    rail_ele = [
-        "electricity rail",
+                      "total rail freight",],
+            "elec": ["electricity rail",
                     "electricity rail passenger",
-        "electricity rail freight",
-    ]
+                     "electricity rail freight",]},
+    }

    avia_inter = [
    'total aviation passenger',
@ -907,28 +834,12 @@ def rescale(idees_countries, energy, eurostat):
        "total domestic navigation",
    ]

-    idees_countries = idees_countries.repalce({'GB': 'UK', 'GR': 'EL'})
-
    for country in idees_countries:
-        res = ratio.loc[(country, slice(None), 'Residential')]
-        energy.loc[country, residential_total] *= res[['total']].iloc[0,0]
-        energy.loc[country, residential_ele] *= res[['ele']].iloc[0,0]
+        for sector, mapping in mappings.items():
+            sector_ratio = ratio.loc[(country, slice(None), sector)]

-        ser = ratio.loc[(country, slice(None), 'Services')]
-        energy.loc[country, service_total] *= ser[['total']].iloc[0,0]
-        energy.loc[country, service_ele] *= ser[['ele']].iloc[0,0]
-
-        agri = ratio.loc[(country, slice(None), 'Agriculture & forestry')]
-        energy.loc[country, agri_total] *= agri[['total']].iloc[0,0]
-        energy.loc[country, agri_ele] *= agri[['ele']].iloc[0,0]
-
-        road = ratio.loc[(country, slice(None), 'Road')]
-        energy.loc[country, road_total] *= road[['total']].iloc[0,0]
-        energy.loc[country, road_ele] *= road[['ele']].iloc[0,0]
-
-        rail = ratio.loc[(country, slice(None), 'Rail')]
-        energy.loc[country, rail_total] *= rail[['total']].iloc[0,0]
-        energy.loc[country, rail_ele] *= rail[['ele']].iloc[0,0]
+            energy.loc[country, mapping["total"]] *= sector_ratio[['total']].iloc[0,0]
+            energy.loc[country, mapping["elec"]] *= sector_ratio[['ele']].iloc[0,0]
        
        avi_d = ratio.loc[(country, slice(None), 'Domestic aviation')]
        avi_i = ratio.loc[(country, 'International aviation', slice(None))]
@ -958,9 +869,8 @@ if __name__ == "__main__":
    idees_countries = pd.Index(countries).intersection(eu28)

    data_year = params["energy_totals_year"]
-    report_year = snakemake.params.energy["eurostat_report_year"]
    input_eurostat = snakemake.input.eurostat
-    eurostat = build_eurostat(input_eurostat, countries, report_year, data_year)
+    eurostat = build_eurostat(input_eurostat, countries, data_year)
    swiss = build_swiss(data_year)
    # data from idees only exists for 2015
    if data_year > 2015:
@ -984,10 +894,10 @@ if __name__ == "__main__":
    emissions_scope = snakemake.params.energy["emissions"]
    eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
    eurostat_co2 = build_eurostat_co2(
-        input_eurostat, countries, report_year, base_year_emissions
+        input_eurostat, countries, base_year_emissions
    )

-    co2 = build_co2_totals(countries, eea_co2, eurostat_co2, report_year)
+    co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
    co2.to_csv(snakemake.output.co2_name)

    transport = build_transport_data(countries, population, idees)
--- a/scripts/retrieve_sector_databundle.py
+++ b/scripts/retrieve_sector_databundle.py
@ -8,6 +8,7 @@ Retrieve and extract data bundle for sector-coupled studies.

 import logging
 import tarfile
+import zipfile
 from pathlib import Path

 from _helpers import (
@ -47,3 +48,16 @@ if __name__ == "__main__":
    tarball_fn.unlink()

    logger.info(f"Databundle available in '{to_fn}'.")
+
+    url_eurostat = "https://ec.europa.eu/eurostat/documents/38154/4956218/Balances-December2022.zip/f7cf0d19-5c0f-60ad-4e48-098a5ddd6e48?t=1671184070589"
+    tarball_fn = Path(f"{rootpath}/data/bundle-sector/eurostat_2023.zip")
+    to_fn = Path(f"{rootpath}/data/bundle-sector/eurostat-energy_balances-april_2023_edition/")
+
+    logger.info(f"Downloading Eurostat data from '{url_eurostat}'.")
+    progress_retrieve(url_eurostat, tarball_fn, disable=disable_progress)
+
+    logger.info("Extracting Eurostat data.")
+    with zipfile.ZipFile(tarball_fn, 'r') as zip_ref:
+        zip_ref.extractall(to_fn)
+
+    logger.info(f"Eurostat data available in '{to_fn}'.")