pypsa-eur/scripts/build_energy_totals.py
2024-09-11 14:22:15 +02:00

1655 lines
56 KiB
Python

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
#
# SPDX-License-Identifier: MIT
"""
Build total energy demands and carbon emissions per country using JRC IDEES,
eurostat, and EEA data.
- Country-specific data is read in :func:`build_eurostat`, :func:`build_idees` and `build_swiss`.
- :func:`build_energy_totals` then combines energy data from Eurostat, Swiss, and IDEES data and :func:`rescale_idees_from_eurostat` rescales IDEES data to match Eurostat data.
- :func:`build_district_heat_share` calculates the share of district heating for each country from IDEES data.
- Historical CO2 emissions are calculated in :func:`build_eea_co2` and :func:`build_eurostat_co2` and combined in :func:`build_co2_totals`.
Relevant Settings
-----------------
.. code:: yaml
countries:
energy:
Inputs
------
- `resources/<run_name>/nuts3_shapes.gejson`: NUTS3 shapes.
- `data/bundle/eea_UNFCCC_v23.csv`: CO2 emissions data from EEA.
- `data/switzerland-new_format-all_years.csv`: Swiss energy data.
- `data/gr-e-11.03.02.01.01-cc.csv`: Swiss transport data
- `data/jrc-idees`: JRC IDEES data.
- `data/district_heat_share.csv`: District heating shares.
- `data/eurostat/Balances-April2023`: Eurostat energy balances.
- `data/eurostat/eurostat-household_energy_balances-february_2024.csv`: Eurostat household energy balances.
Outputs
-------
- `resources/<run_name>/energy_totals.csv`: Energy totals per country, sector and year.
- `resources/<run_name>/co2_totals.csv`: CO2 emissions per country, sector and year.
- `resources/<run_name>/transport_data.csv`: Transport data per country and year.
- `resources/<run_name>/district_heat_share.csv`: District heating share per by country and year.
"""
import logging
import multiprocessing as mp
from functools import partial
from typing import List
import country_converter as coco
import geopandas as gpd
import numpy as np
import pandas as pd
from _helpers import configure_logging, mute_print, set_scenario_config
from tqdm import tqdm
cc = coco.CountryConverter()
logger = logging.getLogger(__name__)
idx = pd.IndexSlice
def cartesian(s1: pd.Series, s2: pd.Series) -> pd.DataFrame:
"""
Compute the Cartesian product of two pandas Series.
Parameters
----------
s1: pd.Series
The first pandas Series
s2: pd.Series:
The second pandas Series.
Returns
----------
pd.DataFrame
A DataFrame representing the Cartesian product of s1 and s2.
Examples
--------
>>> s1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
>>> s2 = pd.Series([4, 5, 6], index=["d", "e", "f"])
>>> cartesian(s1, s2)
d e f
a 4 5 6
b 8 10 12
c 12 15 18
"""
return pd.DataFrame(np.outer(s1, s2), index=s1.index, columns=s2.index)
def reverse(dictionary: dict) -> dict:
"""
Reverses the keys and values of a dictionary.
Parameters
----------
dictionary : dict
The dictionary to be reversed.
Returns
-------
dict
A new dictionary with the keys and values reversed.
Examples
--------
>>> d = {"a": 1, "b": 2, "c": 3}
>>> reverse(d)
{1: 'a', 2: 'b', 3: 'c'}
"""
return {v: k for k, v in dictionary.items()}
idees_rename = {"GR": "EL", "GB": "UK"}
eu28 = cc.EU28as("ISO2").ISO2.tolist()
eu27 = cc.EU27as("ISO2").ISO2.tolist()
eu28_eea = eu28.copy()
eu28_eea.remove("GB")
eu28_eea.append("UK")
to_ipcc = {
"electricity": "1.A.1.a - Public Electricity and Heat Production",
"residential non-elec": "1.A.4.b - Residential",
"services non-elec": "1.A.4.a - Commercial/Institutional",
"rail non-elec": "1.A.3.c - Railways",
"road non-elec": "1.A.3.b - Road Transportation",
"domestic navigation": "1.A.3.d - Domestic Navigation",
"international navigation": "1.D.1.b - International Navigation",
"domestic aviation": "1.A.3.a - Domestic Aviation",
"international aviation": "1.D.1.a - International Aviation",
"total energy": "1 - Energy",
"industrial processes": "2 - Industrial Processes and Product Use",
"agriculture": "3 - Agriculture",
"agriculture, forestry and fishing": "1.A.4.c - Agriculture/Forestry/Fishing",
"LULUCF": "4 - Land Use, Land-Use Change and Forestry",
"waste management": "5 - Waste management",
"other": "6 - Other Sector",
"indirect": "ind_CO2 - Indirect CO2",
"total wL": "Total (with LULUCF)",
"total woL": "Total (without LULUCF)",
}
def eurostat_per_country(input_eurostat: str, country: str) -> pd.DataFrame:
"""
Read energy balance data for a specific country from Eurostat.
Parameters
----------
input_eurostat : str
Path to the directory containing Eurostat data files.
country : str
Country code for the specific country.
Returns
-------
pd.DataFrame
Concatenated energy balance data for the specified country.
Notes
-----
- The function reads `<input_eurostat>/<country>.-Energy-balance-sheets-April-2023-edition.xlsb`
- It removes the "Cover" sheet from the data and concatenates all the remaining sheets into a single DataFrame.
"""
filename = (
f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
)
sheet = pd.read_excel(
filename,
engine="pyxlsb",
sheet_name=None,
skiprows=4,
index_col=list(range(4)),
na_values=":",
)
sheet.pop("Cover")
return pd.concat(sheet)
def build_eurostat(
input_eurostat: str,
countries: List[str],
nprocesses: int = 1,
disable_progressbar: bool = False,
) -> pd.DataFrame:
"""
Return multi-index for all countries' energy data in TWh/a.
Parameters:
-----------
input_eurostat : str
Path to the Eurostat database.
countries : List[str]
List of countries for which energy data is to be retrieved.
nprocesses : int, optional
Number of processes to use for parallel execution, by default 1.
disable_progressbar : bool, optional
Whether to disable the progress bar, by default False.
Returns:
--------
pd.DataFrame
Multi-index DataFrame containing energy data for all countries in TWh/a.
Notes:
------
- The function first renames the countries in the input list using the `idees_rename` mapping and removes "CH".
- It then reads country-wise data using :func:`eurostat_per_country` into a single DataFrame.
- The data is reordered, converted to TWh/a, and missing values are filled.
"""
countries = {idees_rename.get(country, country) for country in countries} - {"CH"}
func = partial(eurostat_per_country, input_eurostat)
tqdm_kwargs = dict(
ascii=False,
unit=" country",
total=len(countries),
desc="Build from eurostat database",
disable=disable_progressbar,
)
with mute_print():
with mp.Pool(processes=nprocesses) as pool:
dfs = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
index_names = ["country", "year", "lvl1", "lvl2", "lvl3", "lvl4"]
df = pd.concat(dfs, keys=countries, names=index_names)
df.index = df.index.set_levels(df.index.levels[1].astype(int), level=1)
# drop columns with all NaNs
unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")]
df.drop(unnamed_cols, axis=1, inplace=True)
df.drop(list(range(1990, 2022)), axis=1, inplace=True, errors="ignore")
# make numeric values where possible
df.replace("Z", 0, inplace=True)
df = df.apply(pd.to_numeric, errors="coerce")
df = df.select_dtypes(include=[np.number])
# write 'International aviation' to the lower level of the multiindex
int_avia = df.index.get_level_values(3) == "International aviation"
temp = df.loc[int_avia]
temp.index = pd.MultiIndex.from_frame(
temp.index.to_frame().fillna("International aviation")
)
df = pd.concat([temp, df.loc[~int_avia]]).sort_index()
# Fill in missing data on "Domestic aviation" for each country.
for country in countries:
slicer = idx[country, :, :, :, "Domestic aviation"]
# For the Total and Fossil energy columns, fill in zeros with
# the closest non-zero value in the year index.
for col in ["Total", "Fossil energy"]:
df.loc[slicer, col] = (
df.loc[slicer, col].replace(0.0, np.nan).ffill().bfill()
)
# Renaming some indices
index_rename = {
"Households": "Residential",
"Commercial & public services": "Services",
"Domestic navigation": "Domestic Navigation",
"International maritime bunkers": "Bunkers",
"UK": "GB",
"EL": "GR",
}
columns_rename = {"Total": "Total all products"}
df.rename(index=index_rename, columns=columns_rename, inplace=True)
df.sort_index(inplace=True)
# convert to TWh/a from ktoe/a
df *= 11.63 / 1e3
return df
def build_swiss() -> pd.DataFrame:
"""
Return a pd.DataFrame of Swiss energy data in TWh/a.
Returns
--------
pd.DataFrame
Swiss energy data in TWh/a.
Notes
-----
- Reads Swiss energy data from `data/switzerland-new_format-all_years.csv`.
- Reshapes and renames data.
- Converts energy units from PJ/a to TWh/a.
"""
fn = snakemake.input.swiss
df = pd.read_csv(fn, index_col=[0, 1])
df.columns = df.columns.astype(int)
df.columns.name = "year"
df = df.stack().unstack("item")
df.columns.name = None
# convert PJ/a to TWh/a
df /= 3.6
return df
def idees_per_country(ct: str, base_dir: str) -> pd.DataFrame:
"""
Calculate energy totals per country using JRC-IDEES data.
Parameters
----------
ct : str
The country code.
base_dir : str
The base directory where the JRC-IDEES data files are located.
Returns
-------
pd.DataFrame
A DataFrame containing the energy totals per country. Columns are energy uses.
Notes
-----
- Retrieves JRC-IDEES data for the specified country from `base_dir` for residential, tertiary, and transport sectors.
- Calculates energy totals for each sector, stores them in a dictionary and returns them as data frame.
- Assertions ensure indices of JRC-IDEES data are as expected.
"""
ct_idees = idees_rename.get(ct, ct)
fn_residential = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Residential_{ct_idees}.xlsx"
fn_tertiary = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Tertiary_{ct_idees}.xlsx"
fn_transport = f"{base_dir}/{ct_idees}/JRC-IDEES-2021_Transport_{ct_idees}.xlsx"
ct_totals = {}
# residential
df = pd.read_excel(fn_residential, "RES_hh_fec", index_col=0)
rows = ["Advanced electric heating", "Conventional electric heating"]
ct_totals["electricity residential space"] = df.loc[rows].sum()
ct_totals["total residential space"] = df.loc["Space heating"]
ct_totals["total residential water"] = df.loc["Water heating"]
assert df.index[23] == "Electricity"
ct_totals["electricity residential water"] = df.iloc[23]
ct_totals["total residential cooking"] = df.loc["Cooking"]
assert df.index[30] == "Electricity"
ct_totals["electricity residential cooking"] = df.iloc[30]
df = pd.read_excel(fn_residential, "RES_summary", index_col=0)
row = "Energy consumption by fuel - Eurostat structure (ktoe)"
ct_totals["total residential"] = df.loc[row]
assert df.index[40] == "Electricity"
ct_totals["electricity residential"] = df.iloc[40]
assert df.index[39] == "Distributed heat"
ct_totals["distributed heat residential"] = df.iloc[39]
assert df.index[43] == "Thermal uses"
ct_totals["thermal uses residential"] = df.iloc[43]
df = pd.read_excel(fn_residential, "RES_hh_eff", index_col=0)
ct_totals["total residential space efficiency"] = df.loc["Space heating"]
assert df.index[5] == "Diesel oil"
ct_totals["oil residential space efficiency"] = df.iloc[5]
assert df.index[6] == "Natural gas"
ct_totals["gas residential space efficiency"] = df.iloc[6]
ct_totals["total residential water efficiency"] = df.loc["Water heating"]
assert df.index[18] == "Diesel oil"
ct_totals["oil residential water efficiency"] = df.iloc[18]
assert df.index[19] == "Natural gas"
ct_totals["gas residential water efficiency"] = df.iloc[19]
# services
df = pd.read_excel(fn_tertiary, "SER_hh_fec", index_col=0)
ct_totals["total services space"] = df.loc["Space heating"]
rows = ["Advanced electric heating", "Conventional electric heating"]
ct_totals["electricity services space"] = df.loc[rows].sum()
ct_totals["total services water"] = df.loc["Hot water"]
assert df.index[24] == "Electricity"
ct_totals["electricity services water"] = df.iloc[24]
ct_totals["total services cooking"] = df.loc["Catering"]
assert df.index[31] == "Electricity"
ct_totals["electricity services cooking"] = df.iloc[31]
df = pd.read_excel(fn_tertiary, "SER_summary", index_col=0)
row = "Energy consumption by fuel - Eurostat structure (ktoe)"
ct_totals["total services"] = df.loc[row]
assert df.index[43] == "Electricity"
ct_totals["electricity services"] = df.iloc[43]
assert df.index[42] == "Distributed heat"
ct_totals["distributed heat services"] = df.iloc[42]
assert df.index[46] == "Thermal uses"
ct_totals["thermal uses services"] = df.iloc[46]
df = pd.read_excel(fn_tertiary, "SER_hh_eff", index_col=0)
ct_totals["total services space efficiency"] = df.loc["Space heating"]
assert df.index[5] == "Diesel oil"
ct_totals["oil services space efficiency"] = df.iloc[5]
assert df.index[7] == "Conventional gas heaters"
ct_totals["gas services space efficiency"] = df.iloc[7]
ct_totals["total services water efficiency"] = df.loc["Hot water"]
assert df.index[20] == "Diesel oil"
ct_totals["oil services water efficiency"] = df.iloc[20]
assert df.index[21] == "Natural gas"
ct_totals["gas services water efficiency"] = df.iloc[21]
# agriculture, forestry and fishing
start = "Detailed split of energy consumption (ktoe)"
end = "Market shares of energy uses (%)"
df = pd.read_excel(fn_tertiary, "AGR_fec", index_col=0).loc[start:end]
rows = [
"Lighting",
"Ventilation",
"Specific electricity uses",
"Pumping devices (electricity)",
]
ct_totals["total agriculture electricity"] = df.loc[rows].sum()
rows = ["Specific heat uses", "Low enthalpy heat"]
ct_totals["total agriculture heat"] = df.loc[rows].sum()
rows = [
"Motor drives",
"Farming machine drives (diesel oil and liquid biofuels)",
"Pumping devices (diesel oil and liquid biofuels)",
]
ct_totals["total agriculture machinery"] = df.loc[rows].sum()
row = "Agriculture, forestry and fishing"
ct_totals["total agriculture"] = df.loc[row]
# transport
df = pd.read_excel(fn_transport, "TrRoad_ene", index_col=0)
ct_totals["total road"] = df.loc["by fuel (EUROSTAT DATA)"]
ct_totals["electricity road"] = df.loc["Electricity"]
ct_totals["total two-wheel"] = df.loc["Powered two-wheelers (Gasoline)"]
assert df.index[19] == "Passenger cars"
ct_totals["total passenger cars"] = df.iloc[19]
assert df.index[30] == "Battery electric vehicles"
ct_totals["electricity passenger cars"] = df.iloc[30]
assert df.index[31] == "Motor coaches, buses and trolley buses"
ct_totals["total other road passenger"] = df.iloc[31]
assert df.index[39] == "Battery electric vehicles"
ct_totals["electricity other road passenger"] = df.iloc[39]
assert df.index[41] == "Light commercial vehicles"
ct_totals["total light duty road freight"] = df.iloc[41]
assert df.index[49] == "Battery electric vehicles"
ct_totals["electricity light duty road freight"] = df.iloc[49]
row = "Heavy goods vehicles (Diesel oil incl. biofuels)"
ct_totals["total heavy duty road freight"] = df.loc[row]
assert df.index[61] == "Passenger cars"
ct_totals["passenger car efficiency"] = df.iloc[61]
df = pd.read_excel(fn_transport, "TrRail_ene", index_col=0)
ct_totals["total rail"] = df.loc["by fuel"]
ct_totals["electricity rail"] = df.loc["Electricity"]
assert df.index[9] == "Passenger transport"
ct_totals["total rail passenger"] = df.iloc[9]
assert df.index[10] == "Metro and tram, urban light rail"
assert df.index[13] == "Electric"
assert df.index[14] == "High speed passenger trains"
ct_totals["electricity rail passenger"] = df.iloc[[10, 13, 14]].sum()
assert df.index[15] == "Freight transport"
ct_totals["total rail freight"] = df.iloc[15]
assert df.index[17] == "Electric"
ct_totals["electricity rail freight"] = df.iloc[17]
df = pd.read_excel(fn_transport, "TrAvia_ene", index_col=0)
assert df.index[4] == "Passenger transport"
ct_totals["total aviation passenger"] = df.iloc[4]
assert df.index[8] == "Freight transport"
ct_totals["total aviation freight"] = df.iloc[8]
assert df.index[2] == "Domestic"
ct_totals["total domestic aviation passenger"] = df.iloc[2]
assert df.index[6] == "International - Intra-EEAwUK"
assert df.index[7] == "International - Extra-EEAwUK"
ct_totals["total international aviation passenger"] = df.iloc[[6, 7]].sum()
assert df.index[9] == "Domestic"
assert df.index[10] == "International - Intra-EEAwUK"
ct_totals["total domestic aviation freight"] = df.iloc[[9, 10]].sum()
assert df.index[11] == "International - Extra-EEAwUK"
ct_totals["total international aviation freight"] = df.iloc[11]
ct_totals["total domestic aviation"] = (
ct_totals["total domestic aviation freight"]
+ ct_totals["total domestic aviation passenger"]
)
ct_totals["total international aviation"] = (
ct_totals["total international aviation freight"]
+ ct_totals["total international aviation passenger"]
)
df = pd.read_excel(fn_transport, "TrNavi_ene", index_col=0)
# coastal and inland
ct_totals["total domestic navigation"] = df.loc["Energy consumption (ktoe)"]
df = pd.read_excel(fn_transport, "TrRoad_act", index_col=0)
assert df.index[85] == "Passenger cars"
ct_totals["passenger cars"] = df.iloc[85]
return pd.DataFrame(ct_totals)
def build_idees(countries: List[str]) -> pd.DataFrame:
"""
Build energy totals from IDEES database for the given list of countries
using :func:`idees_per_country`.
Parameters
----------
countries : List[str]
List of country names for which energy totals need to be built.
Returns
-------
pd.DataFrame
Energy totals for the given countries.
Notes
-----
- Retrieves energy totals per country and year using :func:`idees_per_country`.
- Returns a DataFrame with columns: country, year, and energy totals for different categories.
"""
nprocesses = snakemake.threads
disable_progress = snakemake.config["run"].get("disable_progressbar", False)
func = partial(idees_per_country, base_dir=snakemake.input.idees)
tqdm_kwargs = dict(
ascii=False,
unit=" country",
total=len(countries),
desc="Build from IDEES database",
disable=disable_progress,
)
with mute_print():
with mp.Pool(processes=nprocesses) as pool:
totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
totals = pd.concat(
totals_list,
keys=countries,
names=["country", "year"],
)
# clean up dataframe
years = np.arange(2000, 2022)
totals = totals[totals.index.get_level_values(1).isin(years)]
# efficiency kgoe/100km -> ktoe/100km so that after conversion TWh/100km
totals.loc[:, "passenger car efficiency"] /= 1e6
# convert ktoe to TWh
patterns = ["passenger cars", ".*space efficiency", ".*water efficiency"]
exclude = totals.columns.str.fullmatch("|".join(patterns))
totals = totals.copy()
totals.loc[:, ~exclude] *= 11.63 / 1e3
return totals
def fill_missing_years(fill_values: pd.Series) -> pd.Series:
"""
Fill missing years for some countries by first using forward fill (ffill)
and then backward fill (bfill).
Parameters
----------
fill_values : pd.Series
A pandas Series with a MultiIndex (levels: country and year) representing
energy values, where some values may be zero and need to be filled.
Returns
-------
pd.Series
A pandas Series with zero values replaced by the forward-filled and
backward-filled values of the corresponding country.
Notes
-----
- The function groups the data by the 'country' level and performs forward fill
and backward fill to fill zero values.
- Zero values in the original Series are replaced by the ffilled and bfilled
value of their respective country group.
"""
# Forward fill and then backward fill within each country group
fill_values = fill_values.groupby(level="country").ffill().bfill()
return fill_values
def build_energy_totals(
countries: List[str],
eurostat: pd.DataFrame,
swiss: pd.DataFrame,
idees: pd.DataFrame,
) -> pd.DataFrame:
"""
Combine energy totals for the specified countries from Eurostat, Swiss, and
IDEES data.
Parameters
----------
countries : List[str]
List of country codes for which energy totals are to be calculated.
eurostat : pd.DataFrame
Eurostat energy balances dataframe.
swiss : pd.DataFrame
Swiss energy data dataframe.
idees : pd.DataFrame
IDEES energy data dataframe.
Returns
-------
pd.DataFrame
Energy totals dataframe for the given countries.
Notes
-----
- Missing values are filled based on Eurostat energy balances and average values in EU28.
- The function also performs specific calculations for Norway and splits road, rail, and aviation traffic for non-IDEES data.
References
----------
- `Norway heating data <http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14>`_
"""
eurostat_fuels = {"electricity": "Electricity", "total": "Total all products"}
eurostat_countries = eurostat.index.unique(0)
eurostat_years = eurostat.index.unique(1)
new_index = pd.MultiIndex.from_product(
[countries, eurostat_years], names=["country", "year"]
)
efficiency_keywords = ["space efficiency", "water efficiency"]
to_drop = idees.columns[idees.columns.str.contains("|".join(efficiency_keywords))]
to_drop = to_drop.append(pd.Index(["passenger cars", "passenger car efficiency"]))
df = idees.reindex(new_index).drop(to_drop, axis=1)
in_eurostat = df.index.levels[0].intersection(eurostat_countries)
# add international navigation
slicer = idx[in_eurostat, :, :, "Bunkers", :]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
# fill missing years for some countries by mean over the other years
fill_values = fill_missing_years(fill_values)
df.loc[in_eurostat, "total international navigation"] = fill_values
# add swiss energy data
df = pd.concat([df.drop("CH", errors="ignore"), swiss]).sort_index()
# get values for missing countries based on Eurostat EnergyBalances
# agriculture
to_fill = df.index[
df["total agriculture"].isna()
& df.index.get_level_values("country").isin(eurostat_countries)
]
c = to_fill.get_level_values("country")
y = to_fill.get_level_values("year")
# take total final energy consumption from Eurostat
eurostat_sector = "Agriculture & forestry"
slicer = idx[c, y, :, :, eurostat_sector]
fill_values = eurostat.loc[slicer]["Total all products"].groupby(level=[0, 1]).sum()
# fill missing years for some countries by mean over the other years
fill_values = fill_missing_years(fill_values)
df.loc[to_fill, "total agriculture"] = fill_values
# split into end uses by average EU data from IDEES
uses = ["electricity", "heat", "machinery"]
for use in uses:
avg = (
idees["total agriculture electricity"] / idees["total agriculture"]
).mean()
df.loc[to_fill, f"total agriculture {use}"] = (
df.loc[to_fill, "total agriculture"] * avg
)
# divide cooking/space/water according to averages in EU28
uses = ["space", "cooking", "water"]
to_fill = df.index[
df["total residential"].isna()
& df.index.get_level_values("country").isin(eurostat_countries)
]
c = to_fill.get_level_values("country")
y = to_fill.get_level_values("year")
for sector in ["residential", "services", "road", "rail"]:
eurostat_sector = sector.capitalize()
# fuel use
for fuel in ["electricity", "total"]:
slicer = idx[c, y, :, :, eurostat_sector]
fill_values = (
eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=[0, 1]).sum()
)
# fill missing years for some countries by mean over the other years
fill_values = fill_missing_years(fill_values)
df.loc[to_fill, f"{fuel} {sector}"] = fill_values
for sector in ["residential", "services"]:
# electric use
for use in uses:
fuel_use = df[f"electricity {sector} {use}"]
fuel = (
df[f"electricity {sector}"].replace(0, np.nan).infer_objects(copy=False)
)
avg = fuel_use.div(fuel).mean()
logger.debug(
f"{sector}: average fraction of electricity for {use} is {avg:.3f}"
)
df.loc[to_fill, f"electricity {sector} {use}"] = (
avg * df.loc[to_fill, f"electricity {sector}"]
)
# non-electric use
for use in uses:
nonelectric_use = (
df[f"total {sector} {use}"] - df[f"electricity {sector} {use}"]
)
nonelectric = df[f"total {sector}"] - df[f"electricity {sector}"]
nonelectric = nonelectric.copy().replace(0, np.nan)
avg = nonelectric_use.div(nonelectric).mean()
logger.debug(
f"{sector}: average fraction of non-electric for {use} is {avg:.3f}"
)
electric_use = df.loc[to_fill, f"electricity {sector} {use}"]
nonelectric = (
df.loc[to_fill, f"total {sector}"]
- df.loc[to_fill, f"electricity {sector}"]
)
df.loc[to_fill, f"total {sector} {use}"] = electric_use + avg * nonelectric
# Fix Norway space and water heating fractions
# http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
# The main heating source for about 73 per cent of the households is based on electricity
# => 26% is non-electric
if "NO" in df.index:
elec_fraction = 0.73
no_norway = df.drop("NO")
for sector in ["residential", "services"]:
# assume non-electric is heating
nonelectric = (
df.loc["NO", f"total {sector}"] - df.loc["NO", f"electricity {sector}"]
)
total_heating = nonelectric / (1 - elec_fraction)
for use in uses:
nonelectric_use = (
no_norway[f"total {sector} {use}"]
- no_norway[f"electricity {sector} {use}"]
)
nonelectric = (
no_norway[f"total {sector}"] - no_norway[f"electricity {sector}"]
)
nonelectric = nonelectric.copy().replace(0, np.nan)
fraction = nonelectric_use.div(nonelectric).mean()
df.loc["NO", f"total {sector} {use}"] = (
total_heating * fraction
).values
df.loc["NO", f"electricity {sector} {use}"] = (
total_heating * fraction * elec_fraction
).values
# Missing aviation
slicer = idx[c, y, :, :, "Domestic aviation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
# fill missing years for some countries by mean over the other years
fill_values = fill_missing_years(fill_values)
df.loc[to_fill, "total domestic aviation"] = fill_values
slicer = idx[c, y, :, :, "International aviation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
# fill missing years for some countries by mean over the other years
fill_values = fill_missing_years(fill_values)
df.loc[to_fill, "total international aviation"] = fill_values
# missing domestic navigation
slicer = idx[c, y, :, :, "Domestic Navigation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
# fill missing years for some countries by mean over the other years
fill_values = fill_missing_years(fill_values)
df.loc[to_fill, "total domestic navigation"] = fill_values
# split road traffic for non-IDEES
missing = df.index[df["total passenger cars"].isna()]
for fuel in ["total", "electricity"]:
selection = [
f"{fuel} passenger cars",
f"{fuel} other road passenger",
f"{fuel} light duty road freight",
]
if fuel == "total":
selection.extend([f"{fuel} two-wheel", f"{fuel} heavy duty road freight"])
road = df[selection].sum()
road_fraction = road / road.sum()
fill_values = cartesian(df.loc[missing, f"{fuel} road"], road_fraction)
df.loc[missing, road_fraction.index] = fill_values
# split rail traffic for non-IDEES
missing = df.index[df["total rail passenger"].isna()]
for fuel in ["total", "electricity"]:
selection = [f"{fuel} rail passenger", f"{fuel} rail freight"]
rail = df[selection].sum()
rail_fraction = rail / rail.sum()
fill_values = cartesian(df.loc[missing, f"{fuel} rail"], rail_fraction)
df.loc[missing, rail_fraction.index] = fill_values
# split aviation traffic for non-IDEES
missing = df.index[df["total domestic aviation passenger"].isna()]
for destination in ["domestic", "international"]:
selection = [
f"total {destination} aviation passenger",
f"total {destination} aviation freight",
]
aviation = df[selection].sum()
aviation_fraction = aviation / aviation.sum()
fill_values = cartesian(
df.loc[missing, f"total {destination} aviation"], aviation_fraction
)
df.loc[missing, aviation_fraction.index] = fill_values
for purpose in ["passenger", "freight"]:
attrs = [
f"total domestic aviation {purpose}",
f"total international aviation {purpose}",
]
df.loc[missing, f"total aviation {purpose}"] = df.loc[missing, attrs].sum(
axis=1
)
if "BA" in df.index:
# fill missing data for BA (services and road energy data)
# proportional to RS with ratio of total residential demand
mean_BA = df.loc["BA"].loc[2014:2021, "total residential"].mean()
mean_RS = df.loc["RS"].loc[2014:2021, "total residential"].mean()
ratio = mean_BA / mean_RS
df.loc["BA"] = (
df.loc["BA"].replace(0.0, np.nan).infer_objects(copy=False).values
)
df.loc["BA"] = df.loc["BA"].combine_first(ratio * df.loc["RS"]).values
return df
def build_district_heat_share(countries: List[str], idees: pd.DataFrame) -> pd.Series:
"""
Calculate the share of district heating for each country.
Parameters
----------
countries : List[str]
List of country codes for which to calculate district heating share.
idees : pd.DataFrame
IDEES energy data dataframe.
Returns
-------
pd.Series
Series with the district heating share for each country.
Notes
-----
- The function calculates the district heating share as the sum of residential and services distributed heat, divided by the sum of residential and services thermal uses.
- The district heating share is then reindexed to match the provided list of countries.
- Missing district heating shares are filled from `data/district_heat_share.csv`.
- The function makes a conservative assumption and takes the minimum district heating share from both the IDEES data and `data/district_heat_share.csv`.
"""
# district heating share
district_heat = idees[
["distributed heat residential", "distributed heat services"]
].sum(axis=1)
total_heat = (
idees[["thermal uses residential", "thermal uses services"]]
.sum(axis=1)
.replace(0, np.nan)
)
district_heat_share = district_heat / total_heat
district_heat_share = district_heat_share.reindex(countries, level="country")
# Missing district heating share
dh_share = (
pd.read_csv(snakemake.input.district_heat_share, index_col=0, usecols=[0, 1])
.div(100)
.squeeze()
)
# make conservative assumption and take minimum from both data sets
new_index = pd.MultiIndex.from_product(
[dh_share.index, district_heat_share.index.get_level_values(1).unique()]
)
district_heat_share = pd.concat(
[district_heat_share, dh_share.reindex(new_index, level=0)], axis=1
).min(axis=1)
district_heat_share = district_heat_share.reindex(countries, level=0)
district_heat_share.name = "district heat share"
# restrict to available years
district_heat_share = (
district_heat_share.unstack().dropna(how="all", axis=1).ffill(axis=1)
)
return district_heat_share
def build_eea_co2(
input_co2: str, year: int = 1990, emissions_scope: str = "CO2"
) -> pd.DataFrame:
"""
Calculate CO2 emissions for a given year based on EEA data in Mt.
Parameters
----------
input_co2 : str
Path to the input CSV file with CO2 data.
year : int, optional
Year for which to calculate emissions, by default 1990.
emissions_scope : str, optional
Scope of the emissions to consider, by default "CO2".
Returns
-------
pd.DataFrame
DataFrame with CO2 emissions for the given year.
Notes
-----
- The function reads the `input_co2` data and for a specific `year` and `emission scope`
- It calculates "industrial non-elec" and "agriculture" emissions from that data
- It drops unneeded columns and converts the emissions to Mt.
References
---------
- `EEA CO2 data <https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16>`_ (downloaded 201228, modified by EEA last on 201221)
"""
df = pd.read_csv(input_co2, encoding="latin-1", low_memory=False)
df.replace(dict(Year="1985-1987"), 1986, inplace=True)
df.Year = df.Year.astype(int)
index_col = ["Country_code", "Pollutant_name", "Year", "Sector_name"]
df = df.set_index(index_col).sort_index()
cts = ["CH", "EUA", "NO"] + eu28_eea
slicer = idx[cts, emissions_scope, year, to_ipcc.values()]
emissions = (
df.loc[slicer, "emissions"]
.unstack("Sector_name")
.rename(columns=reverse(to_ipcc))
.droplevel([1, 2])
)
emissions.rename(index={"EUA": "EU28", "UK": "GB"}, inplace=True)
to_subtract = [
"electricity",
"services non-elec",
"residential non-elec",
"road non-elec",
"rail non-elec",
"domestic aviation",
"international aviation",
"domestic navigation",
"international navigation",
"agriculture, forestry and fishing",
]
emissions["industrial non-elec"] = emissions["total energy"] - emissions[
to_subtract
].sum(axis=1)
emissions["agriculture"] += emissions["agriculture, forestry and fishing"]
to_drop = [
"total energy",
"total wL",
"total woL",
"agriculture, forestry and fishing",
]
emissions.drop(columns=to_drop, inplace=True)
# convert from Gt to Mt
return emissions / 1e3
def build_eurostat_co2(eurostat: pd.DataFrame, year: int = 1990) -> pd.Series:
"""
Calculate CO2 emissions for a given year based on Eurostat fuel consumption
data and fuel-specific emissions.
Parameters
----------
eurostat : pd.DataFrame
DataFrame with Eurostat data.
year : int, optional
Year for which to calculate emissions, by default 1990.
Returns
-------
pd.Series
Series with CO2 emissions for the given year.
Notes
-----
- The function hard-sets fuel-specific emissions:
- solid fuels: 0.36 tCO2_equi/MW_th (approximates coal)
- oil: 0.285 tCO2_equi/MW_th (average of distillate and residue)
- natural gas: 0.2 tCO2_equi/MW_th
- It then multiplies the Eurostat fuel consumption data for `year` by the specific emissions and sums the result.
References
----------
- Oil values from `EIA <https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11>`_
- Distillate oil (No. 2) 0.276
- Residual oil (No. 6) 0.298
- `EIA Electricity Annual <https://www.eia.gov/electricity/annual/html/epa_a_03.html>`_
"""
eurostat_year = eurostat.xs(year, level="year")
specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
# emissions in tCO2_equiv per MWh_th
specific_emissions["Solid fossil fuels"] = 0.36 # Approximates coal
specific_emissions["Oil and petroleum products"] = (
0.285 # Average of distillate and residue
)
specific_emissions["Natural gas"] = 0.2 # For natural gas
return eurostat_year.multiply(specific_emissions).sum(axis=1)
def build_co2_totals(
countries: List[str], eea_co2: pd.DataFrame, eurostat_co2: pd.DataFrame
) -> pd.DataFrame:
"""
Combine CO2 emissions data from EEA and Eurostat for a list of countries.
Parameters
----------
countries : List[str]
List of country codes for which CO2 totals need to be built.
eea_co2 : pd.DataFrame
DataFrame with EEA CO2 emissions data.
eurostat_co2 : pd.DataFrame
DataFrame with Eurostat CO2 emissions data.
Returns
-------
pd.DataFrame
Combined CO2 emissions data for the given countries.
Notes
-----
- The function combines the CO2 emissions from EEA and Eurostat into a single DataFrame for the given countries.
"""
co2 = eea_co2.reindex(countries)
for ct in pd.Index(countries).intersection(
["BA", "RS", "XK", "AL", "ME", "MK", "UA", "MD"]
):
mappings = {
"electricity": (ct, "+", "Electricity & heat generation", np.nan),
"residential non-elec": (ct, "+", "+", "Residential"),
"services non-elec": (ct, "+", "+", "Services"),
"road non-elec": (ct, "+", "+", "Road"),
"rail non-elec": (ct, "+", "+", "Rail"),
"domestic navigation": (ct, "+", "+", "Domestic Navigation"),
"international navigation": (ct, "-", "Bunkers"),
"domestic aviation": (ct, "+", "+", "Domestic aviation"),
"international aviation": (ct, "-", "International aviation"),
# does not include industrial process emissions or fuel processing/refining
"industrial non-elec": (ct, "+", "Industry sector"),
# does not include non-energy emissions
"agriculture": (eurostat_co2.index.get_level_values(0) == ct)
& eurostat_co2.index.isin(["Agriculture & forestry", "Fishing"], level=3),
}
for i, mi in mappings.items():
co2.at[ct, i] = eurostat_co2.loc[mi].sum()
return co2
def build_transport_data(
countries: List[str], population: pd.DataFrame, idees: pd.DataFrame
) -> pd.DataFrame:
"""
Build transport data for a set of countries based on IDEES data.
Parameters
----------
countries : List[str]
List of country codes.
population : pd.DataFrame
DataFrame with population data.
idees : pd.DataFrame
DataFrame with IDEES data.
Returns
-------
pd.DataFrame
DataFrame with transport data.
Notes
-----
- The function first collects the number of passenger cars.
- For Switzerland, it reads the data from `data/gr-e-11.03.02.01.01-cc.csv`.
- It fills missing data on the number of cars and fuel efficiency with average data.
References
----------
- Swiss transport data: `BFS <https://www.bfs.admin.ch/bfs/en/home/statistics/mobility-transport/transport-infrastructure-vehicles/vehicles/road-vehicles-stock-level-motorisation.html>`_
"""
years = np.arange(2000, 2022)
# first collect number of cars
transport_data = pd.DataFrame(idees["passenger cars"])
countries_without_ch = set(countries) - {"CH"}
new_index = pd.MultiIndex.from_product(
[countries_without_ch, transport_data.index.unique(1)],
names=["country", "year"],
)
transport_data = transport_data.reindex(index=new_index)
if "CH" in countries:
fn = snakemake.input.swiss_transport
swiss_cars = pd.read_csv(fn, index_col=0).loc[years, ["passenger cars"]]
swiss_cars.index = pd.MultiIndex.from_product(
[["CH"], swiss_cars.index], names=["country", "year"]
)
transport_data = pd.concat([transport_data, swiss_cars]).sort_index()
transport_data = transport_data.rename(columns={"passenger cars": "number cars"})
# clean up dataframe
transport_data = transport_data[
transport_data.index.get_level_values(1).isin(years)
]
missing = transport_data.index[transport_data["number cars"].isna()]
if not missing.empty:
logger.info(
f"Missing data on cars from:\n{list(missing)}\nFilling gaps with averaged data."
)
cars_pp = transport_data["number cars"] / population
fill_values = {
year: cars_pp.mean() * population for year in transport_data.index.unique(1)
}
fill_values = pd.DataFrame(fill_values).stack()
fill_values = pd.DataFrame(fill_values, columns=["number cars"])
fill_values.index.names = ["country", "year"]
fill_values = fill_values.reindex(transport_data.index)
transport_data = transport_data.combine_first(fill_values)
# collect average fuel efficiency in MWh/100km, taking passengar car efficiency in TWh/100km
transport_data["average fuel efficiency"] = idees["passenger car efficiency"] * 1e6
missing = transport_data.index[transport_data["average fuel efficiency"].isna()]
if not missing.empty:
logger.info(
f"Missing data on fuel efficiency from:\n{list(missing)}\nFilling gaps with averaged data."
)
fill_values = transport_data["average fuel efficiency"].mean()
transport_data.loc[missing, "average fuel efficiency"] = fill_values
return transport_data
def rescale_idees_from_eurostat(
idees_countries: List[str], energy: pd.DataFrame, eurostat: pd.DataFrame
) -> pd.DataFrame:
"""
Takes JRC IDEES data from 2021 and rescales it by the ratio of the Eurostat
data and the 2021 Eurostat data.
Missing data: ['passenger car efficiency', 'passenger cars']
Parameters
----------
idees_countries : List[str]
List of IDEES country codes.
energy : pd.DataFrame
DataFrame with JRC IDEES data.
eurostat : pd.DataFrame
DataFrame with Eurostat data.
Returns
-------
pd.DataFrame
DataFrame with rescaled IDEES data.
Notes
-----
- The function first reads in the Eurostat data for 2015 and calculates the ratio of that data with other Eurostat data.
- This ratio is mapped to the IDEES data.
References
----------
- JRC IDEES data: `JRC IDEES <https://ec.europa.eu/jrc/en/publication/eur-scientific-and-technical-research-reports/jrc-idees>`_
- Eurostat data: `Eurostat <https://ec.europa.eu/eurostat/data/database>`_
"""
main_cols = ["Total all products", "Electricity"]
# read in the eurostat data for 2015
eurostat_2021 = eurostat.xs(2021, level="year")[main_cols]
# calculate the ratio of the two data sets
ratio = eurostat[main_cols] / eurostat_2021
ratio = ratio.droplevel([2, 5])
cols_rename = {"Total all products": "total", "Electricity": "ele"}
index_rename = {v: k for k, v in idees_rename.items()}
ratio.rename(columns=cols_rename, index=index_rename, inplace=True)
mappings = {
"Residential": {
"total": [
"total residential space",
"total residential water",
"total residential cooking",
"total residential",
"distributed heat residential",
"thermal uses residential",
],
"elec": [
"electricity residential space",
"electricity residential water",
"electricity residential cooking",
"electricity residential",
],
},
"Services": {
"total": [
"total services space",
"total services water",
"total services cooking",
"total services",
"distributed heat services",
"thermal uses services",
],
"elec": [
"electricity services space",
"electricity services water",
"electricity services cooking",
"electricity services",
],
},
"Agriculture & forestry": {
"total": [
"total agriculture heat",
"total agriculture machinery",
"total agriculture",
],
"elec": [
"total agriculture electricity",
],
},
"Road": {
"total": [
"total road",
"total passenger cars",
"total other road passenger",
"total light duty road freight",
"total heavy duty road freight",
],
"elec": [
"electricity road",
"electricity passenger cars",
"electricity other road passenger",
"electricity light duty road freight",
],
},
"Rail": {
"total": [
"total rail",
"total rail passenger",
"total rail freight",
],
"elec": [
"electricity rail",
"electricity rail passenger",
"electricity rail freight",
],
},
}
avia_inter = [
"total aviation passenger",
"total aviation freight",
"total international aviation passenger",
"total international aviation freight",
"total international aviation",
]
avia_domestic = [
"total domestic aviation passenger",
"total domestic aviation freight",
"total domestic aviation",
]
navigation = [
"total domestic navigation",
]
# international navigation is already read in from the eurostat data directly
for country in idees_countries:
filling_years = [(2015, slice(2016, 2021)), (2000, slice(1990, 1999))]
for source_year, target_years in filling_years:
slicer_source = idx[country, source_year, :, :]
slicer_target = idx[country, target_years, :, :]
for sector, mapping in mappings.items():
sector_ratio = ratio.loc[
(country, slice(None), slice(None), sector)
].droplevel("lvl2")
energy.loc[slicer_target, mapping["total"]] = cartesian(
sector_ratio.loc[target_years, "total"],
energy.loc[slicer_source, mapping["total"]].squeeze(axis=0),
).values
energy.loc[slicer_target, mapping["elec"]] = cartesian(
sector_ratio.loc[target_years, "ele"],
energy.loc[slicer_source, mapping["elec"]].squeeze(axis=0),
).values
level_drops = ["country", "lvl2", "lvl3"]
slicer = idx[country, :, :, "Domestic aviation"]
avi_d = ratio.loc[slicer, "total"].droplevel(level_drops)
slicer = idx[country, :, :, "International aviation"]
avi_i = ratio.loc[slicer, "total"].droplevel(level_drops)
slicer = idx[country, :, :, "Domestic Navigation"]
nav = ratio.loc[slicer, "total"].droplevel(level_drops)
energy.loc[slicer_target, avia_inter] = cartesian(
avi_i.loc[target_years],
energy.loc[slicer_source, avia_inter].squeeze(axis=0),
).values
energy.loc[slicer_target, avia_domestic] = cartesian(
avi_d.loc[target_years],
energy.loc[slicer_source, avia_domestic].squeeze(axis=0),
).values
energy.loc[slicer_target, navigation] = cartesian(
nav.loc[target_years],
energy.loc[slicer_source, navigation].squeeze(axis=0),
).values
# set the total of agriculture/road to the sum of all agriculture/road categories (corresponding to the IDEES data)
rows = idx[country, :]
cols = [
"total agriculture electricity",
"total agriculture heat",
"total agriculture machinery",
]
energy.loc[rows, "total agriculture"] = energy.loc[rows, cols].sum(axis=1)
cols = [
"total passenger cars",
"total other road passenger",
"total light duty road freight",
"total heavy duty road freight",
]
energy.loc[rows, "total road"] = energy.loc[rows, cols].sum(axis=1)
return energy
def update_residential_from_eurostat(energy: pd.DataFrame) -> pd.DataFrame:
"""
Updates energy balances for residential from disaggregated data from
Eurostat by mutating input data DataFrame.
Parameters
----------
energy : pd.DataFrame
DataFrame with energy data.
Returns
-------
pd.DataFrame
DataFrame with updated energy balances.
Notes
-----
- The function first reads in the Eurostat data for households and maps the energy types to the corresponding Eurostat codes.
- For each energy type, it selects the corresponding data, converts units, and drops unnecessary data.
"""
eurostat_households = pd.read_csv(snakemake.input.eurostat_households)
# Column mapping for energy type
nrg_type = {
"total residential": ("FC_OTH_HH_E", "TOTAL"),
"total residential space": ("FC_OTH_HH_E_SH", "TOTAL"),
"total residential water": ("FC_OTH_HH_E_WH", "TOTAL"),
"total residential cooking": ("FC_OTH_HH_E_CK", "TOTAL"),
"electricity residential": ("FC_OTH_HH_E", "E7000"),
"electricity residential space": ("FC_OTH_HH_E_SH", "E7000"),
"electricity residential water": ("FC_OTH_HH_E_WH", "E7000"),
"electricity residential cooking": ("FC_OTH_HH_E_CK", "E7000"),
}
for nrg_name, (code, siec) in nrg_type.items():
# Select energy balance type, rename columns and countries to match IDEES data,
# convert TJ to TWh
col_to_rename = {"geo": "country", "TIME_PERIOD": "year", "OBS_VALUE": nrg_name}
idx_to_rename = {v: k for k, v in idees_rename.items()}
drop_geo = ["EU27_2020", "EA20"]
nrg_data = eurostat_households.query(
"nrg_bal == @code and siec == @siec and geo not in @drop_geo and OBS_VALUE > 0"
).copy()
nrg_data.rename(columns=col_to_rename, inplace=True)
nrg_data = nrg_data.set_index(["country", "year"])[nrg_name] / 3.6e3
nrg_data.rename(index=idx_to_rename, inplace=True)
# update energy balance from household-specific eurostat data
idx = nrg_data.index.intersection(energy.index)
energy.loc[idx, nrg_name] = nrg_data[idx]
logger.info(
"Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat"
)
def build_transformation_output_coke(eurostat, fn):
"""
Extracts and builds the transformation output data for coke ovens from the
Eurostat dataset.
This function specifically filters the Eurostat data to extract
transformation output related to coke ovens.
Since the transformation output for coke ovens
is not included in the final energy consumption of the iron and steel sector,
it needs to be processed and added separately. The filtered data is saved
as a CSV file.
Parameters:
eurostat (pd.DataFrame): A pandas DataFrame containing Eurostat data with
a multi-level index
fn (str): The file path where the resulting CSV file should be saved.
Output:
The resulting transformation output data for coke ovens is saved as a CSV
file at the path specified in fn.
"""
slicer = pd.IndexSlice[:, :, :, "Coke ovens", "Other sources", :]
df = eurostat.loc[slicer, :].droplevel(level=[2, 3, 4, 5])
df.to_csv(fn)
def build_heating_efficiencies(
countries: List[str], idees: pd.DataFrame
) -> pd.DataFrame:
"""
Build heating efficiencies for a set of countries based on IDEES data.
Parameters
----------
countries : List[str]
List of country codes.
idees : pd.DataFrame
DataFrame with IDEES data.
Returns
-------
pd.DataFrame
DataFrame with heating efficiencies.
Notes
-----
- It fills missing data with average data.
"""
years = np.arange(2000, 2022)
cols = idees.columns[
idees.columns.str.contains("space efficiency")
^ idees.columns.str.contains("water efficiency")
]
heating_efficiencies = pd.DataFrame(idees[cols])
new_index = pd.MultiIndex.from_product(
[countries, heating_efficiencies.index.unique(1)],
names=["country", "year"],
)
heating_efficiencies = heating_efficiencies.reindex(index=new_index)
for col in cols:
unstacked = heating_efficiencies[col].unstack()
fillvalue = unstacked.mean()
for ct in unstacked.index:
mask = unstacked.loc[ct].isna()
unstacked.loc[ct, mask] = fillvalue[mask]
heating_efficiencies[col] = unstacked.stack()
return heating_efficiencies
# %%
if __name__ == "__main__":
if "snakemake" not in globals():
from _helpers import mock_snakemake
snakemake = mock_snakemake("build_energy_totals")
configure_logging(snakemake)
set_scenario_config(snakemake)
params = snakemake.params.energy
nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index("index")
population = nuts3["pop"].groupby(nuts3.country).sum()
countries = snakemake.params.countries
idees_countries = pd.Index(countries).intersection(eu27)
input_eurostat = snakemake.input.eurostat
eurostat = build_eurostat(
input_eurostat,
countries,
nprocesses=snakemake.threads,
disable_progressbar=snakemake.config["run"].get("disable_progressbar", False),
)
build_transformation_output_coke(
eurostat, snakemake.output.transformation_output_coke
)
swiss = build_swiss()
idees = build_idees(idees_countries)
energy = build_energy_totals(countries, eurostat, swiss, idees)
update_residential_from_eurostat(energy)
energy.to_csv(snakemake.output.energy_name)
# use rescaled idees data to calculate district heat share
district_heat_share = build_district_heat_share(
countries, energy.loc[idees_countries]
)
district_heat_share.to_csv(snakemake.output.district_heat_share)
base_year_emissions = params["base_emissions_year"]
emissions_scope = snakemake.params.energy["emissions"]
eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
eurostat_co2 = build_eurostat_co2(eurostat, base_year_emissions)
co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
co2.to_csv(snakemake.output.co2_name)
transport = build_transport_data(countries, population, idees)
transport.to_csv(snakemake.output.transport_name)
heating_efficiencies = build_heating_efficiencies(countries, idees)
heating_efficiencies.to_csv(snakemake.output.heating_efficiencies)