# -*- coding: utf-8 -*- # SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors # # SPDX-License-Identifier: MIT """ Build total energy demands and carbon emissions per country using JRC IDEES, eurostat, and EEA data. - Country-specific data is read in :func:`build_eurostat`, :func:`build_idees` and `build_swiss`. - :func:`build_energy_totals` then combines energy data from Eurostat, Swiss, and IDEES data and :func:`rescale_idees_from_eurostat` rescales IDEES data to match Eurostat data. - :func:`build_district_heat_share` calculates the share of district heating for each country from IDEES data. - Historical CO2 emissions are calculated in :func:`build_eea_co2` and :func:`build_eurostat_co2` and combined in :func:`build_co2_totals`. Relevant Settings ----------------- .. code:: yaml countries: energy: Inputs ------ - `resources//nuts3_shapes.gejson`: NUTS3 shapes. - `data/bundle/eea_UNFCCC_v23.csv`: CO2 emissions data from EEA. - `data/switzerland-new_format-all_years.csv`: Swiss energy data. - `data/gr-e-11.03.02.01.01-cc.csv`: Swiss transport data - `data/jrc-idees`: JRC IDEES data. - `data/district_heat_share.csv`: District heating shares. - `data/eurostat/Balances-April2023`: Eurostat energy balances. - `data/eurostat/eurostat-household_energy_balances-february_2024.csv`: Eurostat household energy balances. Outputs ------- - `resources//energy_totals.csv`: Energy totals per country, sector and year. - `resources//co2_totals.csv`: CO2 emissions per country, sector and year. - `resources//transport_data.csv`: Transport data per country and year. - `resources//district_heat_share.csv`: District heating share per by country and year. """ import logging import multiprocessing as mp from functools import partial from typing import List import country_converter as coco import geopandas as gpd import numpy as np import pandas as pd from _helpers import configure_logging, mute_print, set_scenario_config from tqdm import tqdm cc = coco.CountryConverter() logger = logging.getLogger(__name__) idx = pd.IndexSlice def cartesian(s1: pd.Series, s2: pd.Series) -> pd.DataFrame: """ Compute the Cartesian product of two pandas Series. Parameters ---------- s1: pd.Series The first pandas Series s2: pd.Series: The second pandas Series. Returns ---------- pd.DataFrame A DataFrame representing the Cartesian product of s1 and s2. Examples -------- >>> s1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) >>> s2 = pd.Series([4, 5, 6], index=["d", "e", "f"]) >>> cartesian(s1, s2) d e f a 4 5 6 b 8 10 12 c 12 15 18 """ return pd.DataFrame(np.outer(s1, s2), index=s1.index, columns=s2.index) def reverse(dictionary: dict) -> dict: """ Reverses the keys and values of a dictionary. Parameters ---------- dictionary : dict The dictionary to be reversed. Returns ------- dict A new dictionary with the keys and values reversed. Examples -------- >>> d = {"a": 1, "b": 2, "c": 3} >>> reverse(d) {1: 'a', 2: 'b', 3: 'c'} """ return {v: k for k, v in dictionary.items()} idees_rename = {"GR": "EL", "GB": "UK"} eu28 = cc.EU28as("ISO2").ISO2.tolist() eu27 = cc.EU27as("ISO2").ISO2.tolist() eu28_eea = eu28.copy() eu28_eea.remove("GB") eu28_eea.append("UK") to_ipcc = { "electricity": "1.A.1.a - Public Electricity and Heat Production", "residential non-elec": "1.A.4.b - Residential", "services non-elec": "1.A.4.a - Commercial/Institutional", "rail non-elec": "1.A.3.c - Railways", "road non-elec": "1.A.3.b - Road Transportation", "domestic navigation": "1.A.3.d - Domestic Navigation", "international navigation": "1.D.1.b - International Navigation", "domestic aviation": "1.A.3.a - Domestic Aviation", "international aviation": "1.D.1.a - International Aviation", "total energy": "1 - Energy", "industrial processes": "2 - Industrial Processes and Product Use", "agriculture": "3 - Agriculture", "agriculture, forestry and fishing": "1.A.4.c - Agriculture/Forestry/Fishing", "LULUCF": "4 - Land Use, Land-Use Change and Forestry", "waste management": "5 - Waste management", "other": "6 - Other Sector", "indirect": "ind_CO2 - Indirect CO2", "total wL": "Total (with LULUCF)", "total woL": "Total (without LULUCF)", } def eurostat_per_country(input_eurostat: str, country: str) -> pd.DataFrame: """ Read energy balance data for a specific country from Eurostat. Parameters ---------- input_eurostat : str Path to the directory containing Eurostat data files. country : str Country code for the specific country. Returns ------- pd.DataFrame Concatenated energy balance data for the specified country. Notes ----- - The function reads `/.-Energy-balance-sheets-April-2023-edition.xlsb` - It removes the "Cover" sheet from the data and concatenates all the remaining sheets into a single DataFrame. """ filename = ( f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb" ) sheet = pd.read_excel( filename, engine="pyxlsb", sheet_name=None, skiprows=4, index_col=list(range(4)), na_values=":", ) sheet.pop("Cover") return pd.concat(sheet) def build_eurostat( input_eurostat: str, countries: List[str], nprocesses: int = 1, disable_progressbar: bool = False, ) -> pd.DataFrame: """ Return multi-index for all countries' energy data in TWh/a. Parameters: ----------- input_eurostat : str Path to the Eurostat database. countries : List[str] List of countries for which energy data is to be retrieved. nprocesses : int, optional Number of processes to use for parallel execution, by default 1. disable_progressbar : bool, optional Whether to disable the progress bar, by default False. Returns: -------- pd.DataFrame Multi-index DataFrame containing energy data for all countries in TWh/a. Notes: ------ - The function first renames the countries in the input list using the `idees_rename` mapping and removes "CH". - It then reads country-wise data using :func:`eurostat_per_country` into a single DataFrame. - The data is reordered, converted to TWh/a, and missing values are filled. """ countries = {idees_rename.get(country, country) for country in countries} - {"CH"} func = partial(eurostat_per_country, input_eurostat) tqdm_kwargs = dict( ascii=False, unit=" country", total=len(countries), desc="Build from eurostat database", disable=disable_progressbar, ) with mute_print(): with mp.Pool(processes=nprocesses) as pool: dfs = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) index_names = ["country", "year", "lvl1", "lvl2", "lvl3", "lvl4"] df = pd.concat(dfs, keys=countries, names=index_names) df.index = df.index.set_levels(df.index.levels[1].astype(int), level=1) # drop columns with all NaNs unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")] df.drop(unnamed_cols, axis=1, inplace=True) df.drop(list(range(1990, 2022)), axis=1, inplace=True, errors="ignore") # make numeric values where possible df.replace("Z", 0, inplace=True) df = df.apply(pd.to_numeric, errors="coerce") df = df.select_dtypes(include=[np.number]) # write 'International aviation' to the lower level of the multiindex int_avia = df.index.get_level_values(3) == "International aviation" temp = df.loc[int_avia] temp.index = pd.MultiIndex.from_frame( temp.index.to_frame().fillna("International aviation") ) df = pd.concat([temp, df.loc[~int_avia]]).sort_index() # Fill in missing data on "Domestic aviation" for each country. for country in countries: slicer = idx[country, :, :, :, "Domestic aviation"] # For the Total and Fossil energy columns, fill in zeros with # the closest non-zero value in the year index. for col in ["Total", "Fossil energy"]: df.loc[slicer, col] = ( df.loc[slicer, col].replace(0.0, np.nan).ffill().bfill() ) # Renaming some indices index_rename = { "Households": "Residential", "Commercial & public services": "Services", "Domestic navigation": "Domestic Navigation", "International maritime bunkers": "Bunkers", "UK": "GB", "EL": "GR", } columns_rename = {"Total": "Total all products"} df.rename(index=index_rename, columns=columns_rename, inplace=True) df.sort_index(inplace=True) # convert to TWh/a from ktoe/a df *= 11.63 / 1e3 return df def build_swiss() -> pd.DataFrame: """ Return a pd.DataFrame of Swiss energy data in TWh/a. Returns -------- pd.DataFrame Swiss energy data in TWh/a. Notes ----- - Reads Swiss energy data from `data/switzerland-new_format-all_years.csv`. - Reshapes and renames data. - Converts energy units from PJ/a to TWh/a. """ fn = snakemake.input.swiss df = pd.read_csv(fn, index_col=[0, 1]) df.columns = df.columns.astype(int) df.columns.name = "year" df = df.stack().unstack("item") df.columns.name = None # convert PJ/a to TWh/a df /= 3.6 return df def idees_per_country(ct: str, base_dir: str) -> pd.DataFrame: """ Calculate energy totals per country using JRC-IDEES data. Parameters ---------- ct : str The country code. base_dir : str The base directory where the JRC-IDEES data files are located. Returns ------- pd.DataFrame A DataFrame containing the energy totals per country. Columns are energy uses. Notes ----- - Retrieves JRC-IDEES data for the specified country from `base_dir` for residential, tertiary, and transport sectors. - Calculates energy totals for each sector, stores them in a dictionary and returns them as data frame. - Assertions ensure indices of JRC-IDEES data are as expected. """ ct_idees = idees_rename.get(ct, ct) fn_residential = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Residential_{ct_idees}.xlsx" fn_tertiary = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Tertiary_{ct_idees}.xlsx" fn_transport = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Transport_{ct_idees}.xlsx" ct_totals = {} # residential df = pd.read_excel(fn_residential, "RES_hh_fec", index_col=0) rows = ["Advanced electric heating", "Conventional electric heating"] ct_totals["electricity residential space"] = df.loc[rows].sum() ct_totals["total residential space"] = df.loc["Space heating"] ct_totals["total residential water"] = df.loc["Water heating"] assert df.index[23] == "Electricity" ct_totals["electricity residential water"] = df.iloc[23] ct_totals["total residential cooking"] = df.loc["Cooking"] assert df.index[30] == "Electricity" ct_totals["electricity residential cooking"] = df.iloc[30] df = pd.read_excel(fn_residential, "RES_summary", index_col=0) row = "Energy consumption by fuel - Eurostat structure (ktoe)" ct_totals["total residential"] = df.loc[row] assert df.index[40] == "Electricity" ct_totals["electricity residential"] = df.iloc[40] assert df.index[39] == "Distributed heat" ct_totals["distributed heat residential"] = df.iloc[39] assert df.index[43] == "Thermal uses" ct_totals["thermal uses residential"] = df.iloc[43] df = pd.read_excel(fn_residential, "RES_hh_eff", index_col=0) ct_totals["total residential space efficiency"] = df.loc["Space heating"] assert df.index[5] == "Diesel oil" ct_totals["oil residential space efficiency"] = df.iloc[5] assert df.index[6] == "Natural gas" ct_totals["gas residential space efficiency"] = df.iloc[6] ct_totals["total residential water efficiency"] = df.loc["Water heating"] assert df.index[18] == "Diesel oil" ct_totals["oil residential water efficiency"] = df.iloc[18] assert df.index[19] == "Natural gas" ct_totals["gas residential water efficiency"] = df.iloc[19] # services df = pd.read_excel(fn_tertiary, "SER_hh_fec", index_col=0) ct_totals["total services space"] = df.loc["Space heating"] rows = ["Advanced electric heating", "Conventional electric heating"] ct_totals["electricity services space"] = df.loc[rows].sum() ct_totals["total services water"] = df.loc["Hot water"] assert df.index[24] == "Electricity" ct_totals["electricity services water"] = df.iloc[24] ct_totals["total services cooking"] = df.loc["Catering"] assert df.index[31] == "Electricity" ct_totals["electricity services cooking"] = df.iloc[31] df = pd.read_excel(fn_tertiary, "SER_summary", index_col=0) row = "Energy consumption by fuel - Eurostat structure (ktoe)" ct_totals["total services"] = df.loc[row] assert df.index[43] == "Electricity" ct_totals["electricity services"] = df.iloc[43] assert df.index[42] == "Distributed heat" ct_totals["distributed heat services"] = df.iloc[42] assert df.index[46] == "Thermal uses" ct_totals["thermal uses services"] = df.iloc[46] df = pd.read_excel(fn_tertiary, "SER_hh_eff", index_col=0) ct_totals["total services space efficiency"] = df.loc["Space heating"] assert df.index[5] == "Diesel oil" ct_totals["oil services space efficiency"] = df.iloc[5] assert df.index[7] == "Conventional gas heaters" ct_totals["gas services space efficiency"] = df.iloc[7] ct_totals["total services water efficiency"] = df.loc["Hot water"] assert df.index[20] == "Diesel oil" ct_totals["oil services water efficiency"] = df.iloc[20] assert df.index[21] == "Natural gas" ct_totals["gas services water efficiency"] = df.iloc[21] # agriculture, forestry and fishing start = "Detailed split of energy consumption (ktoe)" end = "Market shares of energy uses (%)" df = pd.read_excel(fn_tertiary, "AGR_fec", index_col=0).loc[start:end] rows = [ "Lighting", "Ventilation", "Specific electricity uses", "Pumping devices (electricity)", ] ct_totals["total agriculture electricity"] = df.loc[rows].sum() rows = ["Specific heat uses", "Low enthalpy heat"] ct_totals["total agriculture heat"] = df.loc[rows].sum() rows = [ "Motor drives", "Farming machine drives (diesel oil and liquid biofuels)", "Pumping devices (diesel oil and liquid biofuels)", ] ct_totals["total agriculture machinery"] = df.loc[rows].sum() row = "Agriculture, forestry and fishing" ct_totals["total agriculture"] = df.loc[row] # transport df = pd.read_excel(fn_transport, "TrRoad_ene", index_col=0) ct_totals["total road"] = df.loc["by fuel (EUROSTAT DATA)"] ct_totals["electricity road"] = df.loc["Electricity"] ct_totals["total two-wheel"] = df.loc["Powered two-wheelers (Gasoline)"] assert df.index[19] == "Passenger cars" ct_totals["total passenger cars"] = df.iloc[19] assert df.index[30] == "Battery electric vehicles" ct_totals["electricity passenger cars"] = df.iloc[30] assert df.index[31] == "Motor coaches, buses and trolley buses" ct_totals["total other road passenger"] = df.iloc[31] assert df.index[39] == "Battery electric vehicles" ct_totals["electricity other road passenger"] = df.iloc[39] assert df.index[41] == "Light commercial vehicles" ct_totals["total light duty road freight"] = df.iloc[41] assert df.index[49] == "Battery electric vehicles" ct_totals["electricity light duty road freight"] = df.iloc[49] row = "Heavy goods vehicles (Diesel oil incl. biofuels)" ct_totals["total heavy duty road freight"] = df.loc[row] assert df.index[61] == "Passenger cars" ct_totals["passenger car efficiency"] = df.iloc[61] df = pd.read_excel(fn_transport, "TrRail_ene", index_col=0) ct_totals["total rail"] = df.loc["by fuel"] ct_totals["electricity rail"] = df.loc["Electricity"] assert df.index[9] == "Passenger transport" ct_totals["total rail passenger"] = df.iloc[9] assert df.index[10] == "Metro and tram, urban light rail" assert df.index[13] == "Electric" assert df.index[14] == "High speed passenger trains" ct_totals["electricity rail passenger"] = df.iloc[[10, 13, 14]].sum() assert df.index[15] == "Freight transport" ct_totals["total rail freight"] = df.iloc[15] assert df.index[17] == "Electric" ct_totals["electricity rail freight"] = df.iloc[17] df = pd.read_excel(fn_transport, "TrAvia_ene", index_col=0) assert df.index[4] == "Passenger transport" ct_totals["total aviation passenger"] = df.iloc[4] assert df.index[8] == "Freight transport" ct_totals["total aviation freight"] = df.iloc[8] assert df.index[2] == "Domestic" ct_totals["total domestic aviation passenger"] = df.iloc[2] assert df.index[6] == "International - Intra-EEAwUK" assert df.index[7] == "International - Extra-EEAwUK" ct_totals["total international aviation passenger"] = df.iloc[[6, 7]].sum() assert df.index[9] == "Domestic" assert df.index[10] == "International - Intra-EEAwUK" ct_totals["total domestic aviation freight"] = df.iloc[[9, 10]].sum() assert df.index[11] == "International - Extra-EEAwUK" ct_totals["total international aviation freight"] = df.iloc[11] ct_totals["total domestic aviation"] = ( ct_totals["total domestic aviation freight"] + ct_totals["total domestic aviation passenger"] ) ct_totals["total international aviation"] = ( ct_totals["total international aviation freight"] + ct_totals["total international aviation passenger"] ) df = pd.read_excel(fn_transport, "TrNavi_ene", index_col=0) # coastal and inland ct_totals["total domestic navigation"] = df.loc["Energy consumption (ktoe)"] df = pd.read_excel(fn_transport, "TrRoad_act", index_col=0) assert df.index[85] == "Passenger cars" ct_totals["passenger cars"] = df.iloc[85] return pd.DataFrame(ct_totals) def build_idees(countries: List[str]) -> pd.DataFrame: """ Build energy totals from IDEES database for the given list of countries using :func:`idees_per_country`. Parameters ---------- countries : List[str] List of country names for which energy totals need to be built. Returns ------- pd.DataFrame Energy totals for the given countries. Notes ----- - Retrieves energy totals per country and year using :func:`idees_per_country`. - Returns a DataFrame with columns: country, year, and energy totals for different categories. """ nprocesses = snakemake.threads disable_progress = snakemake.config["run"].get("disable_progressbar", False) func = partial(idees_per_country, base_dir=snakemake.input.idees) tqdm_kwargs = dict( ascii=False, unit=" country", total=len(countries), desc="Build from IDEES database", disable=disable_progress, ) with mute_print(): with mp.Pool(processes=nprocesses) as pool: totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) totals = pd.concat( totals_list, keys=countries, names=["country", "year"], ) # clean up dataframe years = np.arange(2000, 2022) totals = totals[totals.index.get_level_values(1).isin(years)] # efficiency kgoe/100km -> ktoe/100km so that after conversion TWh/100km totals.loc[:, "passenger car efficiency"] /= 1e6 # convert ktoe to TWh patterns = ["passenger cars", ".*space efficiency", ".*water efficiency"] exclude = totals.columns.str.fullmatch("|".join(patterns)) totals = totals.copy() totals.loc[:, ~exclude] *= 11.63 / 1e3 return totals def fill_missing_years(fill_values: pd.Series) -> pd.Series: """ Fill missing years for some countries by first using forward fill (ffill) and then backward fill (bfill). Parameters ---------- fill_values : pd.Series A pandas Series with a MultiIndex (levels: country and year) representing energy values, where some values may be zero and need to be filled. Returns ------- pd.Series A pandas Series with zero values replaced by the forward-filled and backward-filled values of the corresponding country. Notes ----- - The function groups the data by the 'country' level and performs forward fill and backward fill to fill zero values. - Zero values in the original Series are replaced by the ffilled and bfilled value of their respective country group. """ # Forward fill and then backward fill within each country group fill_values = fill_values.groupby(level="country").ffill().bfill() return fill_values def build_energy_totals( countries: List[str], eurostat: pd.DataFrame, swiss: pd.DataFrame, idees: pd.DataFrame, ) -> pd.DataFrame: """ Combine energy totals for the specified countries from Eurostat, Swiss, and IDEES data. Parameters ---------- countries : List[str] List of country codes for which energy totals are to be calculated. eurostat : pd.DataFrame Eurostat energy balances dataframe. swiss : pd.DataFrame Swiss energy data dataframe. idees : pd.DataFrame IDEES energy data dataframe. Returns ------- pd.DataFrame Energy totals dataframe for the given countries. Notes ----- - Missing values are filled based on Eurostat energy balances and average values in EU28. - The function also performs specific calculations for Norway and splits road, rail, and aviation traffic for non-IDEES data. References ---------- - `Norway heating data `_ """ eurostat_fuels = {"electricity": "Electricity", "total": "Total all products"} eurostat_countries = eurostat.index.unique(0) eurostat_years = eurostat.index.unique(1) new_index = pd.MultiIndex.from_product( [countries, eurostat_years], names=["country", "year"] ) efficiency_keywords = ["space efficiency", "water efficiency"] to_drop = idees.columns[idees.columns.str.contains("|".join(efficiency_keywords))] to_drop = to_drop.append(pd.Index(["passenger cars", "passenger car efficiency"])) df = idees.reindex(new_index).drop(to_drop, axis=1) in_eurostat = df.index.levels[0].intersection(eurostat_countries) # add international navigation slicer = idx[in_eurostat, :, :, "Bunkers", :] fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum() # fill missing years for some countries by mean over the other years fill_values = fill_missing_years(fill_values) df.loc[in_eurostat, "total international navigation"] = fill_values # add swiss energy data df = pd.concat([df.drop("CH", errors="ignore"), swiss]).sort_index() # get values for missing countries based on Eurostat EnergyBalances # agriculture to_fill = df.index[ df["total agriculture"].isna() & df.index.get_level_values("country").isin(eurostat_countries) ] c = to_fill.get_level_values("country") y = to_fill.get_level_values("year") # take total final energy consumption from Eurostat eurostat_sector = "Agriculture & forestry" slicer = idx[c, y, :, :, eurostat_sector] fill_values = eurostat.loc[slicer]["Total all products"].groupby(level=[0, 1]).sum() # fill missing years for some countries by mean over the other years fill_values = fill_missing_years(fill_values) df.loc[to_fill, "total agriculture"] = fill_values # split into end uses by average EU data from IDEES uses = ["electricity", "heat", "machinery"] for use in uses: avg = ( idees["total agriculture electricity"] / idees["total agriculture"] ).mean() df.loc[to_fill, f"total agriculture {use}"] = ( df.loc[to_fill, "total agriculture"] * avg ) # divide cooking/space/water according to averages in EU28 uses = ["space", "cooking", "water"] to_fill = df.index[ df["total residential"].isna() & df.index.get_level_values("country").isin(eurostat_countries) ] c = to_fill.get_level_values("country") y = to_fill.get_level_values("year") for sector in ["residential", "services", "road", "rail"]: eurostat_sector = sector.capitalize() # fuel use for fuel in ["electricity", "total"]: slicer = idx[c, y, :, :, eurostat_sector] fill_values = ( eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=[0, 1]).sum() ) # fill missing years for some countries by mean over the other years fill_values = fill_missing_years(fill_values) df.loc[to_fill, f"{fuel} {sector}"] = fill_values for sector in ["residential", "services"]: # electric use for use in uses: fuel_use = df[f"electricity {sector} {use}"] fuel = ( df[f"electricity {sector}"].replace(0, np.nan).infer_objects(copy=False) ) avg = fuel_use.div(fuel).mean() logger.debug( f"{sector}: average fraction of electricity for {use} is {avg:.3f}" ) df.loc[to_fill, f"electricity {sector} {use}"] = ( avg * df.loc[to_fill, f"electricity {sector}"] ) # non-electric use for use in uses: nonelectric_use = ( df[f"total {sector} {use}"] - df[f"electricity {sector} {use}"] ) nonelectric = df[f"total {sector}"] - df[f"electricity {sector}"] nonelectric = nonelectric.copy().replace(0, np.nan) avg = nonelectric_use.div(nonelectric).mean() logger.debug( f"{sector}: average fraction of non-electric for {use} is {avg:.3f}" ) electric_use = df.loc[to_fill, f"electricity {sector} {use}"] nonelectric = ( df.loc[to_fill, f"total {sector}"] - df.loc[to_fill, f"electricity {sector}"] ) df.loc[to_fill, f"total {sector} {use}"] = electric_use + avg * nonelectric # Fix Norway space and water heating fractions # http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14 # The main heating source for about 73 per cent of the households is based on electricity # => 26% is non-electric if "NO" in df.index: elec_fraction = 0.73 no_norway = df.drop("NO") for sector in ["residential", "services"]: # assume non-electric is heating nonelectric = ( df.loc["NO", f"total {sector}"] - df.loc["NO", f"electricity {sector}"] ) total_heating = nonelectric / (1 - elec_fraction) for use in uses: nonelectric_use = ( no_norway[f"total {sector} {use}"] - no_norway[f"electricity {sector} {use}"] ) nonelectric = ( no_norway[f"total {sector}"] - no_norway[f"electricity {sector}"] ) nonelectric = nonelectric.copy().replace(0, np.nan) fraction = nonelectric_use.div(nonelectric).mean() df.loc["NO", f"total {sector} {use}"] = ( total_heating * fraction ).values df.loc["NO", f"electricity {sector} {use}"] = ( total_heating * fraction * elec_fraction ).values # Missing aviation slicer = idx[c, y, :, :, "Domestic aviation"] fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum() # fill missing years for some countries by mean over the other years fill_values = fill_missing_years(fill_values) df.loc[to_fill, "total domestic aviation"] = fill_values slicer = idx[c, y, :, :, "International aviation"] fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum() # fill missing years for some countries by mean over the other years fill_values = fill_missing_years(fill_values) df.loc[to_fill, "total international aviation"] = fill_values # missing domestic navigation slicer = idx[c, y, :, :, "Domestic Navigation"] fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum() # fill missing years for some countries by mean over the other years fill_values = fill_missing_years(fill_values) df.loc[to_fill, "total domestic navigation"] = fill_values # split road traffic for non-IDEES missing = df.index[df["total passenger cars"].isna()] for fuel in ["total", "electricity"]: selection = [ f"{fuel} passenger cars", f"{fuel} other road passenger", f"{fuel} light duty road freight", ] if fuel == "total": selection.extend([f"{fuel} two-wheel", f"{fuel} heavy duty road freight"]) road = df[selection].sum() road_fraction = road / road.sum() fill_values = cartesian(df.loc[missing, f"{fuel} road"], road_fraction) df.loc[missing, road_fraction.index] = fill_values # split rail traffic for non-IDEES missing = df.index[df["total rail passenger"].isna()] for fuel in ["total", "electricity"]: selection = [f"{fuel} rail passenger", f"{fuel} rail freight"] rail = df[selection].sum() rail_fraction = rail / rail.sum() fill_values = cartesian(df.loc[missing, f"{fuel} rail"], rail_fraction) df.loc[missing, rail_fraction.index] = fill_values # split aviation traffic for non-IDEES missing = df.index[df["total domestic aviation passenger"].isna()] for destination in ["domestic", "international"]: selection = [ f"total {destination} aviation passenger", f"total {destination} aviation freight", ] aviation = df[selection].sum() aviation_fraction = aviation / aviation.sum() fill_values = cartesian( df.loc[missing, f"total {destination} aviation"], aviation_fraction ) df.loc[missing, aviation_fraction.index] = fill_values for purpose in ["passenger", "freight"]: attrs = [ f"total domestic aviation {purpose}", f"total international aviation {purpose}", ] df.loc[missing, f"total aviation {purpose}"] = df.loc[missing, attrs].sum( axis=1 ) if "BA" in df.index: # fill missing data for BA (services and road energy data) # proportional to RS with ratio of total residential demand mean_BA = df.loc["BA"].loc[2014:2021, "total residential"].mean() mean_RS = df.loc["RS"].loc[2014:2021, "total residential"].mean() ratio = mean_BA / mean_RS df.loc["BA"] = ( df.loc["BA"].replace(0.0, np.nan).infer_objects(copy=False).values ) df.loc["BA"] = df.loc["BA"].combine_first(ratio * df.loc["RS"]).values return df def build_district_heat_share(countries: List[str], idees: pd.DataFrame) -> pd.Series: """ Calculate the share of district heating for each country. Parameters ---------- countries : List[str] List of country codes for which to calculate district heating share. idees : pd.DataFrame IDEES energy data dataframe. Returns ------- pd.Series Series with the district heating share for each country. Notes ----- - The function calculates the district heating share as the sum of residential and services distributed heat, divided by the sum of residential and services thermal uses. - The district heating share is then reindexed to match the provided list of countries. - Missing district heating shares are filled from `data/district_heat_share.csv`. - The function makes a conservative assumption and takes the minimum district heating share from both the IDEES data and `data/district_heat_share.csv`. """ # district heating share district_heat = idees[ ["distributed heat residential", "distributed heat services"] ].sum(axis=1) total_heat = ( idees[["thermal uses residential", "thermal uses services"]] .sum(axis=1) .replace(0, np.nan) ) district_heat_share = district_heat / total_heat district_heat_share = district_heat_share.reindex(countries, level="country") # Missing district heating share dh_share = ( pd.read_csv(snakemake.input.district_heat_share, index_col=0, usecols=[0, 1]) .div(100) .squeeze() ) # make conservative assumption and take minimum from both data sets new_index = pd.MultiIndex.from_product( [dh_share.index, district_heat_share.index.get_level_values(1).unique()] ) district_heat_share = pd.concat( [district_heat_share, dh_share.reindex(new_index, level=0)], axis=1 ).min(axis=1) district_heat_share = district_heat_share.reindex(countries, level=0) district_heat_share.name = "district heat share" # restrict to available years district_heat_share = ( district_heat_share.unstack().dropna(how="all", axis=1).ffill(axis=1) ) return district_heat_share def build_eea_co2( input_co2: str, year: int = 1990, emissions_scope: str = "CO2" ) -> pd.DataFrame: """ Calculate CO2 emissions for a given year based on EEA data in Mt. Parameters ---------- input_co2 : str Path to the input CSV file with CO2 data. year : int, optional Year for which to calculate emissions, by default 1990. emissions_scope : str, optional Scope of the emissions to consider, by default "CO2". Returns ------- pd.DataFrame DataFrame with CO2 emissions for the given year. Notes ----- - The function reads the `input_co2` data and for a specific `year` and `emission scope` - It calculates "industrial non-elec" and "agriculture" emissions from that data - It drops unneeded columns and converts the emissions to Mt. References --------- - `EEA CO2 data `_ (downloaded 201228, modified by EEA last on 201221) """ df = pd.read_csv(input_co2, encoding="latin-1", low_memory=False) df.replace(dict(Year="1985-1987"), 1986, inplace=True) df.Year = df.Year.astype(int) index_col = ["Country_code", "Pollutant_name", "Year", "Sector_name"] df = df.set_index(index_col).sort_index() cts = ["CH", "EUA", "NO"] + eu28_eea slicer = idx[cts, emissions_scope, year, to_ipcc.values()] emissions = ( df.loc[slicer, "emissions"] .unstack("Sector_name") .rename(columns=reverse(to_ipcc)) .droplevel([1, 2]) ) emissions.rename(index={"EUA": "EU28", "UK": "GB"}, inplace=True) to_subtract = [ "electricity", "services non-elec", "residential non-elec", "road non-elec", "rail non-elec", "domestic aviation", "international aviation", "domestic navigation", "international navigation", "agriculture, forestry and fishing", ] emissions["industrial non-elec"] = emissions["total energy"] - emissions[ to_subtract ].sum(axis=1) emissions["agriculture"] += emissions["agriculture, forestry and fishing"] to_drop = [ "total energy", "total wL", "total woL", "agriculture, forestry and fishing", ] emissions.drop(columns=to_drop, inplace=True) # convert from Gt to Mt return emissions / 1e3 def build_eurostat_co2(eurostat: pd.DataFrame, year: int = 1990) -> pd.Series: """ Calculate CO2 emissions for a given year based on Eurostat fuel consumption data and fuel-specific emissions. Parameters ---------- eurostat : pd.DataFrame DataFrame with Eurostat data. year : int, optional Year for which to calculate emissions, by default 1990. Returns ------- pd.Series Series with CO2 emissions for the given year. Notes ----- - The function hard-sets fuel-specific emissions: - solid fuels: 0.36 tCO2_equi/MW_th (approximates coal) - oil: 0.285 tCO2_equi/MW_th (average of distillate and residue) - natural gas: 0.2 tCO2_equi/MW_th - It then multiplies the Eurostat fuel consumption data for `year` by the specific emissions and sums the result. References ---------- - Oil values from `EIA `_ - Distillate oil (No. 2) 0.276 - Residual oil (No. 6) 0.298 - `EIA Electricity Annual `_ """ eurostat_year = eurostat.xs(year, level="year") specific_emissions = pd.Series(index=eurostat.columns, dtype=float) # emissions in tCO2_equiv per MWh_th specific_emissions["Solid fossil fuels"] = 0.36 # Approximates coal specific_emissions["Oil and petroleum products"] = ( 0.285 # Average of distillate and residue ) specific_emissions["Natural gas"] = 0.2 # For natural gas return eurostat_year.multiply(specific_emissions).sum(axis=1) def build_co2_totals( countries: List[str], eea_co2: pd.DataFrame, eurostat_co2: pd.DataFrame ) -> pd.DataFrame: """ Combine CO2 emissions data from EEA and Eurostat for a list of countries. Parameters ---------- countries : List[str] List of country codes for which CO2 totals need to be built. eea_co2 : pd.DataFrame DataFrame with EEA CO2 emissions data. eurostat_co2 : pd.DataFrame DataFrame with Eurostat CO2 emissions data. Returns ------- pd.DataFrame Combined CO2 emissions data for the given countries. Notes ----- - The function combines the CO2 emissions from EEA and Eurostat into a single DataFrame for the given countries. """ co2 = eea_co2.reindex(countries) for ct in pd.Index(countries).intersection( ["BA", "RS", "XK", "AL", "ME", "MK", "UA", "MD"] ): mappings = { "electricity": (ct, "+", "Electricity & heat generation", np.nan), "residential non-elec": (ct, "+", "+", "Residential"), "services non-elec": (ct, "+", "+", "Services"), "road non-elec": (ct, "+", "+", "Road"), "rail non-elec": (ct, "+", "+", "Rail"), "domestic navigation": (ct, "+", "+", "Domestic Navigation"), "international navigation": (ct, "-", "Bunkers"), "domestic aviation": (ct, "+", "+", "Domestic aviation"), "international aviation": (ct, "-", "International aviation"), # does not include industrial process emissions or fuel processing/refining "industrial non-elec": (ct, "+", "Industry sector"), # does not include non-energy emissions "agriculture": (eurostat_co2.index.get_level_values(0) == ct) & eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3), } for i, mi in mappings.items(): co2.at[ct, i] = eurostat_co2.loc[mi].sum() return co2 def build_transport_data( countries: List[str], population: pd.DataFrame, idees: pd.DataFrame ) -> pd.DataFrame: """ Build transport data for a set of countries based on IDEES data. Parameters ---------- countries : List[str] List of country codes. population : pd.DataFrame DataFrame with population data. idees : pd.DataFrame DataFrame with IDEES data. Returns ------- pd.DataFrame DataFrame with transport data. Notes ----- - The function first collects the number of passenger cars. - For Switzerland, it reads the data from `data/gr-e-11.03.02.01.01-cc.csv`. - It fills missing data on the number of cars and fuel efficiency with average data. References ---------- - Swiss transport data: `BFS `_ """ years = np.arange(2000, 2022) # first collect number of cars transport_data = pd.DataFrame(idees["passenger cars"]) countries_without_ch = set(countries) - {"CH"} new_index = pd.MultiIndex.from_product( [countries_without_ch, transport_data.index.unique(1)], names=["country", "year"], ) transport_data = transport_data.reindex(index=new_index) if "CH" in countries: fn = snakemake.input.swiss_transport swiss_cars = pd.read_csv(fn, index_col=0).loc[years, ["passenger cars"]] swiss_cars.index = pd.MultiIndex.from_product( [["CH"], swiss_cars.index], names=["country", "year"] ) transport_data = pd.concat([transport_data, swiss_cars]).sort_index() transport_data = transport_data.rename(columns={"passenger cars": "number cars"}) # clean up dataframe transport_data = transport_data[ transport_data.index.get_level_values(1).isin(years) ] missing = transport_data.index[transport_data["number cars"].isna()] if not missing.empty: logger.info( f"Missing data on cars from:\n{list(missing)}\nFilling gaps with averaged data." ) cars_pp = transport_data["number cars"] / population fill_values = { year: cars_pp.mean() * population for year in transport_data.index.unique(1) } fill_values = pd.DataFrame(fill_values).stack() fill_values = pd.DataFrame(fill_values, columns=["number cars"]) fill_values.index.names = ["country", "year"] fill_values = fill_values.reindex(transport_data.index) transport_data = transport_data.combine_first(fill_values) # collect average fuel efficiency in MWh/100km, taking passengar car efficiency in TWh/100km transport_data["average fuel efficiency"] = idees["passenger car efficiency"] * 1e6 missing = transport_data.index[transport_data["average fuel efficiency"].isna()] if not missing.empty: logger.info( f"Missing data on fuel efficiency from:\n{list(missing)}\nFilling gaps with averaged data." ) fill_values = transport_data["average fuel efficiency"].mean() transport_data.loc[missing, "average fuel efficiency"] = fill_values return transport_data def rescale_idees_from_eurostat( idees_countries: List[str], energy: pd.DataFrame, eurostat: pd.DataFrame ) -> pd.DataFrame: """ Takes JRC IDEES data from 2021 and rescales it by the ratio of the Eurostat data and the 2021 Eurostat data. Missing data: ['passenger car efficiency', 'passenger cars'] Parameters ---------- idees_countries : List[str] List of IDEES country codes. energy : pd.DataFrame DataFrame with JRC IDEES data. eurostat : pd.DataFrame DataFrame with Eurostat data. Returns ------- pd.DataFrame DataFrame with rescaled IDEES data. Notes ----- - The function first reads in the Eurostat data for 2015 and calculates the ratio of that data with other Eurostat data. - This ratio is mapped to the IDEES data. References ---------- - JRC IDEES data: `JRC IDEES `_ - Eurostat data: `Eurostat `_ """ main_cols = ["Total all products", "Electricity"] # read in the eurostat data for 2015 eurostat_2021 = eurostat.xs(2021, level="year")[main_cols] # calculate the ratio of the two data sets ratio = eurostat[main_cols] / eurostat_2021 ratio = ratio.droplevel([2, 5]) cols_rename = {"Total all products": "total", "Electricity": "ele"} index_rename = {v: k for k, v in idees_rename.items()} ratio.rename(columns=cols_rename, index=index_rename, inplace=True) mappings = { "Residential": { "total": [ "total residential space", "total residential water", "total residential cooking", "total residential", "distributed heat residential", "thermal uses residential", ], "elec": [ "electricity residential space", "electricity residential water", "electricity residential cooking", "electricity residential", ], }, "Services": { "total": [ "total services space", "total services water", "total services cooking", "total services", "distributed heat services", "thermal uses services", ], "elec": [ "electricity services space", "electricity services water", "electricity services cooking", "electricity services", ], }, "Agriculture & forestry": { "total": [ "total agriculture heat", "total agriculture machinery", "total agriculture", ], "elec": [ "total agriculture electricity", ], }, "Road": { "total": [ "total road", "total passenger cars", "total other road passenger", "total light duty road freight", "total heavy duty road freight", ], "elec": [ "electricity road", "electricity passenger cars", "electricity other road passenger", "electricity light duty road freight", ], }, "Rail": { "total": [ "total rail", "total rail passenger", "total rail freight", ], "elec": [ "electricity rail", "electricity rail passenger", "electricity rail freight", ], }, } avia_inter = [ "total aviation passenger", "total aviation freight", "total international aviation passenger", "total international aviation freight", "total international aviation", ] avia_domestic = [ "total domestic aviation passenger", "total domestic aviation freight", "total domestic aviation", ] navigation = [ "total domestic navigation", ] # international navigation is already read in from the eurostat data directly for country in idees_countries: filling_years = [(2015, slice(2016, 2021)), (2000, slice(1990, 1999))] for source_year, target_years in filling_years: slicer_source = idx[country, source_year, :, :] slicer_target = idx[country, target_years, :, :] for sector, mapping in mappings.items(): sector_ratio = ratio.loc[ (country, slice(None), slice(None), sector) ].droplevel("lvl2") energy.loc[slicer_target, mapping["total"]] = cartesian( sector_ratio.loc[target_years, "total"], energy.loc[slicer_source, mapping["total"]].squeeze(axis=0), ).values energy.loc[slicer_target, mapping["elec"]] = cartesian( sector_ratio.loc[target_years, "ele"], energy.loc[slicer_source, mapping["elec"]].squeeze(axis=0), ).values level_drops = ["country", "lvl2", "lvl3"] slicer = idx[country, :, :, "Domestic aviation"] avi_d = ratio.loc[slicer, "total"].droplevel(level_drops) slicer = idx[country, :, :, "International aviation"] avi_i = ratio.loc[slicer, "total"].droplevel(level_drops) slicer = idx[country, :, :, "Domestic Navigation"] nav = ratio.loc[slicer, "total"].droplevel(level_drops) energy.loc[slicer_target, avia_inter] = cartesian( avi_i.loc[target_years], energy.loc[slicer_source, avia_inter].squeeze(axis=0), ).values energy.loc[slicer_target, avia_domestic] = cartesian( avi_d.loc[target_years], energy.loc[slicer_source, avia_domestic].squeeze(axis=0), ).values energy.loc[slicer_target, navigation] = cartesian( nav.loc[target_years], energy.loc[slicer_source, navigation].squeeze(axis=0), ).values # set the total of agriculture/road to the sum of all agriculture/road categories (corresponding to the IDEES data) rows = idx[country, :] cols = [ "total agriculture electricity", "total agriculture heat", "total agriculture machinery", ] energy.loc[rows, "total agriculture"] = energy.loc[rows, cols].sum(axis=1) cols = [ "total passenger cars", "total other road passenger", "total light duty road freight", "total heavy duty road freight", ] energy.loc[rows, "total road"] = energy.loc[rows, cols].sum(axis=1) return energy def update_residential_from_eurostat(energy: pd.DataFrame) -> pd.DataFrame: """ Updates energy balances for residential from disaggregated data from Eurostat by mutating input data DataFrame. Parameters ---------- energy : pd.DataFrame DataFrame with energy data. Returns ------- pd.DataFrame DataFrame with updated energy balances. Notes ----- - The function first reads in the Eurostat data for households and maps the energy types to the corresponding Eurostat codes. - For each energy type, it selects the corresponding data, converts units, and drops unnecessary data. """ eurostat_households = pd.read_csv(snakemake.input.eurostat_households) # Column mapping for energy type nrg_type = { "total residential": ("FC_OTH_HH_E", "TOTAL"), "total residential space": ("FC_OTH_HH_E_SH", "TOTAL"), "total residential water": ("FC_OTH_HH_E_WH", "TOTAL"), "total residential cooking": ("FC_OTH_HH_E_CK", "TOTAL"), "electricity residential": ("FC_OTH_HH_E", "E7000"), "electricity residential space": ("FC_OTH_HH_E_SH", "E7000"), "electricity residential water": ("FC_OTH_HH_E_WH", "E7000"), "electricity residential cooking": ("FC_OTH_HH_E_CK", "E7000"), } for nrg_name, (code, siec) in nrg_type.items(): # Select energy balance type, rename columns and countries to match IDEES data, # convert TJ to TWh col_to_rename = {"geo": "country", "TIME_PERIOD": "year", "OBS_VALUE": nrg_name} idx_to_rename = {v: k for k, v in idees_rename.items()} drop_geo = ["EU27_2020", "EA20"] nrg_data = eurostat_households.query( "nrg_bal == @code and siec == @siec and geo not in @drop_geo and OBS_VALUE > 0" ).copy() nrg_data.rename(columns=col_to_rename, inplace=True) nrg_data = nrg_data.set_index(["country", "year"])[nrg_name] / 3.6e3 nrg_data.rename(index=idx_to_rename, inplace=True) # update energy balance from household-specific eurostat data idx = nrg_data.index.intersection(energy.index) energy.loc[idx, nrg_name] = nrg_data[idx] logger.info( "Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat" ) def build_transformation_output_coke(eurostat, fn): """ Extracts and builds the transformation output data for coke ovens from the Eurostat dataset. This function specifically filters the Eurostat data to extract transformation output related to coke ovens. Since the transformation output for coke ovens is not included in the final energy consumption of the iron and steel sector, it needs to be processed and added separately. The filtered data is saved as a CSV file. Parameters: eurostat (pd.DataFrame): A pandas DataFrame containing Eurostat data with a multi-level index fn (str): The file path where the resulting CSV file should be saved. Output: The resulting transformation output data for coke ovens is saved as a CSV file at the path specified in fn. """ slicer = pd.IndexSlice[:, :, :, "Coke ovens", "Other sources", :] df = eurostat.loc[slicer, :].droplevel(level=[2, 3, 4, 5]) df.to_csv(fn) def build_heating_efficiencies( countries: List[str], idees: pd.DataFrame ) -> pd.DataFrame: """ Build heating efficiencies for a set of countries based on IDEES data. Parameters ---------- countries : List[str] List of country codes. idees : pd.DataFrame DataFrame with IDEES data. Returns ------- pd.DataFrame DataFrame with heating efficiencies. Notes ----- - It fills missing data with average data. """ years = np.arange(2000, 2022) cols = idees.columns[ idees.columns.str.contains("space efficiency") ^ idees.columns.str.contains("water efficiency") ] heating_efficiencies = pd.DataFrame(idees[cols]) new_index = pd.MultiIndex.from_product( [countries, heating_efficiencies.index.unique(1)], names=["country", "year"], ) heating_efficiencies = heating_efficiencies.reindex(index=new_index) for col in cols: unstacked = heating_efficiencies[col].unstack() fillvalue = unstacked.mean() for ct in unstacked.index: mask = unstacked.loc[ct].isna() unstacked.loc[ct, mask] = fillvalue[mask] heating_efficiencies[col] = unstacked.stack() return heating_efficiencies # %% if __name__ == "__main__": if "snakemake" not in globals(): from _helpers import mock_snakemake snakemake = mock_snakemake("build_energy_totals") configure_logging(snakemake) set_scenario_config(snakemake) params = snakemake.params.energy nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index("index") population = nuts3["pop"].groupby(nuts3.country).sum() countries = snakemake.params.countries idees_countries = pd.Index(countries).intersection(eu27) input_eurostat = snakemake.input.eurostat eurostat = build_eurostat( input_eurostat, countries, nprocesses=snakemake.threads, disable_progressbar=snakemake.config["run"].get("disable_progressbar", False), ) build_transformation_output_coke( eurostat, snakemake.output.transformation_output_coke ) swiss = build_swiss() idees = build_idees(idees_countries) energy = build_energy_totals(countries, eurostat, swiss, idees) update_residential_from_eurostat(energy) energy.to_csv(snakemake.output.energy_name) # use rescaled idees data to calculate district heat share district_heat_share = build_district_heat_share( countries, energy.loc[idees_countries] ) district_heat_share.to_csv(snakemake.output.district_heat_share) base_year_emissions = params["base_emissions_year"] emissions_scope = snakemake.params.energy["emissions"] eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope) eurostat_co2 = build_eurostat_co2(eurostat, base_year_emissions) co2 = build_co2_totals(countries, eea_co2, eurostat_co2) co2.to_csv(snakemake.output.co2_name) transport = build_transport_data(countries, population, idees) transport.to_csv(snakemake.output.transport_name) heating_efficiencies = build_heating_efficiencies(countries, idees) heating_efficiencies.to_csv(snakemake.output.heating_efficiencies)