From 7abbb47efda6a51d1d1ef1b6f66e5a04bb4919e6 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 6 Mar 2024 16:42:33 +0100
Subject: [PATCH] revert to master version of build_energy_totals

---
 scripts/build_energy_totals.py | 208 +++++++++++++--------------------
 1 file changed, 83 insertions(+), 125 deletions(-)

diff --git a/scripts/build_energy_totals.py b/scripts/build_energy_totals.py
index b14644d5..1ffc4ae2 100644
--- a/scripts/build_energy_totals.py
+++ b/scripts/build_energy_totals.py
@@ -122,27 +122,16 @@ def build_eurostat(input_eurostat, countries, year):
     # convert to TWh/a from ktoe/a
     df *= 11.63 / 1e3
 
-    df.index = df.index.set_levels(df.index.levels[1].astype(int), level=1)
-
-    if year:
-        df = df.xs(year, level="year")
-
     return df
 
 
-def build_swiss(year=None):
+def build_swiss(year):
     """
-    Return a pd.DataFrame of Swiss energy data in TWh/a.
+    Return a pd.Series of Swiss energy data in TWh/a.
     """
-
     fn = snakemake.input.swiss
 
-    df = pd.read_csv(fn, index_col=[0, 1]).stack().unstack("item")
-    df.index.names = ["country", "year"]
-    df.index = df.index.set_levels(df.index.levels[1].astype(int), level=1)
-
-    if year:
-        df = df.xs(year, level="year")
+    df = pd.read_csv(fn, index_col=[0, 1]).loc["CH", str(year)]
 
     # convert PJ/a to TWh/a
     df /= 3.6
@@ -151,7 +140,6 @@ def build_swiss(year=None):
 
 
 def idees_per_country(ct, year, base_dir):
-    ct_totals = {}
     ct_idees = idees_rename.get(ct, ct)
     fn_residential = f"{base_dir}/JRC-IDEES-2015_Residential_{ct_idees}.xlsx"
     fn_tertiary = f"{base_dir}/JRC-IDEES-2015_Tertiary_{ct_idees}.xlsx"
@@ -159,27 +147,27 @@ def idees_per_country(ct, year, base_dir):
 
     # residential
 
-    df = pd.read_excel(fn_residential, "RES_hh_fec", index_col=0)
-
-    ct_totals["total residential space"] = df.loc["Space heating"]
+    df = pd.read_excel(fn_residential, "RES_hh_fec", index_col=0)[year]
 
     rows = ["Advanced electric heating", "Conventional electric heating"]
-    ct_totals["electricity residential space"] = df.loc[rows].sum()
-
-    ct_totals["total residential water"] = df.loc["Water heating"]
+    ct_totals = {
+        "total residential space": df["Space heating"],
+        "electricity residential space": df[rows].sum(),
+    }
+    ct_totals["total residential water"] = df.at["Water heating"]
 
     assert df.index[23] == "Electricity"
     ct_totals["electricity residential water"] = df.iloc[23]
 
-    ct_totals["total residential cooking"] = df.loc["Cooking"]
+    ct_totals["total residential cooking"] = df["Cooking"]
 
     assert df.index[30] == "Electricity"
     ct_totals["electricity residential cooking"] = df.iloc[30]
 
-    df = pd.read_excel(fn_residential, "RES_summary", index_col=0)
+    df = pd.read_excel(fn_residential, "RES_summary", index_col=0)[year]
 
     row = "Energy consumption by fuel - Eurostat structure (ktoe)"
-    ct_totals["total residential"] = df.loc[row]
+    ct_totals["total residential"] = df[row]
 
     assert df.index[47] == "Electricity"
     ct_totals["electricity residential"] = df.iloc[47]
@@ -192,27 +180,27 @@ def idees_per_country(ct, year, base_dir):
 
     # services
 
-    df = pd.read_excel(fn_tertiary, "SER_hh_fec", index_col=0)
+    df = pd.read_excel(fn_tertiary, "SER_hh_fec", index_col=0)[year]
 
-    ct_totals["total services space"] = df.loc["Space heating"]
+    ct_totals["total services space"] = df["Space heating"]
 
     rows = ["Advanced electric heating", "Conventional electric heating"]
-    ct_totals["electricity services space"] = df.loc[rows].sum()
+    ct_totals["electricity services space"] = df[rows].sum()
 
-    ct_totals["total services water"] = df.loc["Hot water"]
+    ct_totals["total services water"] = df["Hot water"]
 
     assert df.index[24] == "Electricity"
     ct_totals["electricity services water"] = df.iloc[24]
 
-    ct_totals["total services cooking"] = df.loc["Catering"]
+    ct_totals["total services cooking"] = df["Catering"]
 
     assert df.index[31] == "Electricity"
     ct_totals["electricity services cooking"] = df.iloc[31]
 
-    df = pd.read_excel(fn_tertiary, "SER_summary", index_col=0)
+    df = pd.read_excel(fn_tertiary, "SER_summary", index_col=0)[year]
 
     row = "Energy consumption by fuel - Eurostat structure (ktoe)"
-    ct_totals["total services"] = df.loc[row]
+    ct_totals["total services"] = df[row]
 
     assert df.index[50] == "Electricity"
     ct_totals["electricity services"] = df.iloc[50]
@@ -228,7 +216,7 @@ def idees_per_country(ct, year, base_dir):
     start = "Detailed split of energy consumption (ktoe)"
     end = "Market shares of energy uses (%)"
 
-    df = pd.read_excel(fn_tertiary, "AGR_fec", index_col=0).loc[start:end]
+    df = pd.read_excel(fn_tertiary, "AGR_fec", index_col=0).loc[start:end, year]
 
     rows = [
         "Lighting",
@@ -236,30 +224,30 @@ def idees_per_country(ct, year, base_dir):
         "Specific electricity uses",
         "Pumping devices (electric)",
     ]
-    ct_totals["total agriculture electricity"] = df.loc[rows].sum()
+    ct_totals["total agriculture electricity"] = df[rows].sum()
 
     rows = ["Specific heat uses", "Low enthalpy heat"]
-    ct_totals["total agriculture heat"] = df.loc[rows].sum()
+    ct_totals["total agriculture heat"] = df[rows].sum()
 
     rows = [
         "Motor drives",
         "Farming machine drives (diesel oil incl. biofuels)",
         "Pumping devices (diesel oil incl. biofuels)",
     ]
-    ct_totals["total agriculture machinery"] = df.loc[rows].sum()
+    ct_totals["total agriculture machinery"] = df[rows].sum()
 
     row = "Agriculture, forestry and fishing"
-    ct_totals["total agriculture"] = df.loc[row]
+    ct_totals["total agriculture"] = df[row]
 
     # transport
 
-    df = pd.read_excel(fn_transport, "TrRoad_ene", index_col=0)
+    df = pd.read_excel(fn_transport, "TrRoad_ene", index_col=0)[year]
 
-    ct_totals["total road"] = df.loc["by fuel (EUROSTAT DATA)"]
+    ct_totals["total road"] = df["by fuel (EUROSTAT DATA)"]
 
-    ct_totals["electricity road"] = df.loc["Electricity"]
+    ct_totals["electricity road"] = df["Electricity"]
 
-    ct_totals["total two-wheel"] = df.loc["Powered 2-wheelers (Gasoline)"]
+    ct_totals["total two-wheel"] = df["Powered 2-wheelers (Gasoline)"]
 
     assert df.index[19] == "Passenger cars"
     ct_totals["total passenger cars"] = df.iloc[19]
@@ -280,16 +268,16 @@ def idees_per_country(ct, year, base_dir):
     ct_totals["electricity light duty road freight"] = df.iloc[49]
 
     row = "Heavy duty vehicles (Diesel oil incl. biofuels)"
-    ct_totals["total heavy duty road freight"] = df.loc[row]
+    ct_totals["total heavy duty road freight"] = df[row]
 
     assert df.index[61] == "Passenger cars"
     ct_totals["passenger car efficiency"] = df.iloc[61]
 
-    df = pd.read_excel(fn_transport, "TrRail_ene", index_col=0)
+    df = pd.read_excel(fn_transport, "TrRail_ene", index_col=0)[year]
 
-    ct_totals["total rail"] = df.loc["by fuel (EUROSTAT DATA)"]
+    ct_totals["total rail"] = df["by fuel (EUROSTAT DATA)"]
 
-    ct_totals["electricity rail"] = df.loc["Electricity"]
+    ct_totals["electricity rail"] = df["Electricity"]
 
     assert df.index[15] == "Passenger transport"
     ct_totals["total rail passenger"] = df.iloc[15]
@@ -305,7 +293,7 @@ def idees_per_country(ct, year, base_dir):
     assert df.index[23] == "Electric"
     ct_totals["electricity rail freight"] = df.iloc[23]
 
-    df = pd.read_excel(fn_transport, "TrAvia_ene", index_col=0)
+    df = pd.read_excel(fn_transport, "TrAvia_ene", index_col=0)[year]
 
     assert df.index[6] == "Passenger transport"
     ct_totals["total aviation passenger"] = df.iloc[6]
@@ -336,21 +324,20 @@ def idees_per_country(ct, year, base_dir):
         + ct_totals["total international aviation passenger"]
     )
 
-    df = pd.read_excel(fn_transport, "TrNavi_ene", index_col=0)
+    df = pd.read_excel(fn_transport, "TrNavi_ene", index_col=0)[year]
 
     # coastal and inland
-    ct_totals["total domestic navigation"] = df.loc["by fuel (EUROSTAT DATA)"]
+    ct_totals["total domestic navigation"] = df["by fuel (EUROSTAT DATA)"]
 
-    df = pd.read_excel(fn_transport, "TrRoad_act", index_col=0)
+    df = pd.read_excel(fn_transport, "TrRoad_act", index_col=0)[year]
 
     assert df.index[85] == "Passenger cars"
     ct_totals["passenger cars"] = df.iloc[85]
 
-    return pd.DataFrame(ct_totals)
+    return pd.Series(ct_totals, name=ct)
 
 
-def build_idees(countries, year=None):
-
+def build_idees(countries, year):
     nprocesses = snakemake.threads
     disable_progress = snakemake.config["run"].get("disable_progressbar", False)
 
@@ -362,49 +349,35 @@ def build_idees(countries, year=None):
         desc="Build from IDEES database",
         disable=disable_progress,
     )
-
     with mute_print():
         with mp.Pool(processes=nprocesses) as pool:
             totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
 
-    totals = pd.concat(totals_list, keys=countries, names=["country", "year"])
+    totals = pd.concat(totals_list, axis=1)
 
     # convert ktoe to TWh
-    exclude = totals.columns.str.fullmatch("passenger cars")
-    totals.loc[:, ~exclude] *= 11.63 / 1e3
+    exclude = totals.index.str.fullmatch("passenger cars")
+    totals.loc[~exclude] *= 11.63 / 1e3
 
     # convert TWh/100km to kWh/km
-    totals.loc[:, "passenger car efficiency"] *= 10
+    totals.loc["passenger car efficiency"] *= 10
 
-    return totals
+    return totals.T
 
 
 def build_energy_totals(countries, eurostat, swiss, idees):
-
-    eurostat_fuels = dict(electricity="Electricity", total="Total")
-
-    eurostat_sectors = dict(
-        residential="Households",
-        services="Commercial & public services",
-        road="Road",
-        rail="Rail",
-    )
+    eurostat_fuels = {"electricity": "Electricity", "total": "Total all products"}
 
     to_drop = ["passenger cars", "passenger car efficiency"]
-
-    new_index = pd.MultiIndex.from_product(
-        [countries, eurostat.index.levels[1]], names=["country", "year"]
-    )
-
-    df = idees.reindex(new_index).drop(to_drop, axis=1)
+    df = idees.reindex(countries).drop(to_drop, axis=1)
 
     eurostat_countries = eurostat.index.levels[0]
-    in_eurostat = df.index.levels[0].intersection(eurostat_countries)
+    in_eurostat = df.index.intersection(eurostat_countries)
 
     # add international navigation
 
-    slicer = idx[in_eurostat, :, :, "International maritime bunkers", :]
-    fill_values = eurostat.loc[slicer, "Total"].groupby(level=[0, 1]).sum()
+    slicer = idx[in_eurostat, :, "Bunkers", :]
+    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
     df.loc[in_eurostat, "total international navigation"] = fill_values
 
     # add swiss energy data
@@ -414,24 +387,19 @@ def build_energy_totals(countries, eurostat, swiss, idees):
     # get values for missing countries based on Eurostat EnergyBalances
     # divide cooking/space/water according to averages in EU28
 
-    to_fill = df.index[
-        df["total residential"].isna()
-        & df.index.get_level_values("country").isin(eurostat_countries)
-    ]
+    missing = df.index[df["total residential"].isna()]
+    to_fill = missing.intersection(eurostat_countries)
     uses = ["space", "cooking", "water"]
 
-    c = to_fill.get_level_values("country")
-    y = to_fill.get_level_values("year")
-
     for sector in ["residential", "services", "road", "rail"]:
+        eurostat_sector = sector.capitalize()
 
         # fuel use
 
         for fuel in ["electricity", "total"]:
-
-            slicer = idx[c, y, :, :, eurostat_sectors[sector]]
+            slicer = idx[to_fill, :, :, eurostat_sector]
             fill_values = (
-                eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=[0, 1]).sum()
+                eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=0).sum()
             )
             df.loc[to_fill, f"{fuel} {sector}"] = fill_values
 
@@ -493,32 +461,30 @@ def build_energy_totals(countries, eurostat, swiss, idees):
                     no_norway[f"total {sector}"] - no_norway[f"electricity {sector}"]
                 )
                 fraction = nonelectric_use.div(nonelectric).mean()
-                df.loc["NO", f"total {sector} {use}"] = (
-                    total_heating * fraction
-                ).values
+                df.loc["NO", f"total {sector} {use}"] = total_heating * fraction
                 df.loc["NO", f"electricity {sector} {use}"] = (
                     total_heating * fraction * elec_fraction
-                ).values
+                )
 
     # Missing aviation
 
-    slicer = idx[c, y, :, :, "Domestic aviation"]
-    fill_values = eurostat.loc[slicer, "Total"].groupby(level=[0, 1]).sum()
+    slicer = idx[to_fill, :, :, "Domestic aviation"]
+    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
     df.loc[to_fill, "total domestic aviation"] = fill_values
 
-    slicer = idx[c, y, :, "International aviation", :]
-    fill_values = eurostat.loc[slicer, "Total"].groupby(level=[0, 1]).sum()
+    slicer = idx[to_fill, :, :, "International aviation"]
+    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
     df.loc[to_fill, "total international aviation"] = fill_values
 
     # missing domestic navigation
 
-    slicer = idx[c, y, :, :, "Domestic navigation"]
-    fill_values = eurostat.loc[slicer, "Total"].groupby(level=[0, 1]).sum()
+    slicer = idx[to_fill, :, :, "Domestic Navigation"]
+    fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum()
     df.loc[to_fill, "total domestic navigation"] = fill_values
 
     # split road traffic for non-IDEES
     missing = df.index[df["total passenger cars"].isna()]
-    for fuel in ["electricity", "total"]:
+    for fuel in ["total", "electricity"]:
         selection = [
             f"{fuel} passenger cars",
             f"{fuel} other road passenger",
@@ -564,9 +530,11 @@ def build_energy_totals(countries, eurostat, swiss, idees):
         )
 
     if "BA" in df.index:
-        # fill missing data for BA proportional to RS
-        ratio = (df.loc["BA"].loc[2014:2020] / df.loc["RS"].loc[2014:2020]).mean()
-        df.loc["BA"] = (ratio * df.loc["RS"]).values
+        # fill missing data for BA (services and road energy data)
+        # proportional to RS with ratio of total residential demand
+        missing = df.loc["BA"] == 0.0
+        ratio = df.at["BA", "total residential"] / df.at["RS", "total residential"]
+        df.loc["BA", missing] = ratio * df.loc["RS", missing]
 
     return df
 
@@ -582,21 +550,18 @@ def build_district_heat_share(countries, idees):
 
     district_heat_share = district_heat / total_heat
 
-    district_heat_share = district_heat_share.reindex(countries, level="country")
+    district_heat_share = district_heat_share.reindex(countries)
 
     # Missing district heating share
-    dh_share = pd.read_csv(
-        snakemake.input.district_heat_share, index_col=0, usecols=[0, 1]
+    dh_share = (
+        pd.read_csv(snakemake.input.district_heat_share, index_col=0, usecols=[0, 1])
+        .div(100)
+        .squeeze()
     )
-
-    dh_share = pd.concat(
-        {y: dh_share for y in range(1990, 2021)}, names=["year", "country"]
-    ).swaplevel()
-    dh_share = dh_share.div(100)
-    dh_share = dh_share.reindex(district_heat_share.index)
-
     # make conservative assumption and take minimum from both data sets
-    district_heat_share = pd.concat([district_heat_share, dh_share], axis=1).min(axis=1)
+    district_heat_share = pd.concat(
+        [district_heat_share, dh_share.reindex_like(district_heat_share)], axis=1
+    ).min(axis=1)
 
     district_heat_share.name = "district heat share"
 
@@ -613,6 +578,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
     index_col = ["Country_code", "Pollutant_name", "Year", "Sector_name"]
     df = df.set_index(index_col).sort_index()
 
+    emissions_scope = emissions_scope
+
     cts = ["CH", "EUA", "NO"] + eu28_eea
 
     slicer = idx[cts, emissions_scope, year, to_ipcc.values()]
@@ -658,26 +625,19 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
 def build_eurostat_co2(input_eurostat, countries, year=1990):
     eurostat = build_eurostat(input_eurostat, countries, year)
 
-    if eurostat is None:
-        df = build_eurostat(countries, year)
-    else:
-        df = eurostat.xs(year, level="year")
-
-    specific_emissions = pd.Series(index=df.columns, dtype=float)
+    specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
 
     # emissions in tCO2_equiv per MWh_th
-    specific_emissions["Solid fossil fuels"] = 0.36  # Approximates coal
-    specific_emissions["Oil and petroleum products"] = (
-        0.285  # Average of distillate and residue
-    )
-    specific_emissions["Natural gas"] = 0.2  # For natural gas
+    specific_emissions["Solid fuels"] = 0.36  # Approximates coal
+    specific_emissions["Oil (total)"] = 0.285  # Average of distillate and residue
+    specific_emissions["Gas"] = 0.2  # For natural gas
 
     # oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11
     # Distillate oil (No. 2)  0.276
     # Residual oil (No. 6)  0.298
     # https://www.eia.gov/electricity/annual/html/epa_a_03.html
 
-    return df.multiply(specific_emissions).sum(axis=1)
+    return eurostat.multiply(specific_emissions).sum(axis=1)
 
 
 def build_co2_totals(countries, eea_co2, eurostat_co2):
@@ -884,7 +844,6 @@ if __name__ == "__main__":
 
     countries = snakemake.params.countries
     idees_countries = pd.Index(countries).intersection(eu28)
-    countries_without_ch = pd.Index(countries).difference(["CH"])
 
     data_year = params["energy_totals_year"]
     input_eurostat = snakemake.input.eurostat
@@ -917,6 +876,5 @@ if __name__ == "__main__":
     co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
     co2.to_csv(snakemake.output.co2_name)
 
-    idees_transport = idees.xs(data_year, level="year")
-    transport = build_transport_data(countries, population, idees_transport)
+    transport = build_transport_data(countries, population, idees)
     transport.to_csv(snakemake.output.transport_name)