pypsa-eur/scripts/build_heat_totals.py

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors
#
# SPDX-License-Identifier: MIT
"""
Approximate heat demand for all weather years.

:func:`approximate_heat_demand` approximates annual heat demand based on energy totals and heating degree days (HDD) using a regression of heat demand on HDDs.

Inputs
------
- `resources/<run_name>/energy_totals.csv`: Energy consumption by sector (columns), country and year. Output of :func:`scripts.build_energy_totals.py`.
- `data/era5-annual-HDD-per-country.csv`: Number of heating degree days by year (columns) and country (index).

Outputs
-------
- `resources/<run_name>/heat_totals.csv`: Approximated annual heat demand for each country.
"""

from itertools import product

import pandas as pd
from numpy.polynomial import Polynomial

idx = pd.IndexSlice


def approximate_heat_demand(energy_totals: pd.DataFrame, hdd: pd.DataFrame):
    """
    Approximate heat demand for a set of countries based on energy totals and
    heating degree days (HDD). A polynomial regression of heat demand on HDDs
    is performed on the data from 2007 to 2021. Then, for 2022 and 2023, the
    heat demand is estimated from known HDDs based on the regression.

    Parameters
    ----------
    energy_totals : pd.DataFrame
        DataFrame with energy consumption by sector (columns), country and year. Output of :func:`scripts.build_energy_totals.py`.
    hdd : pd.DataFrame
        DataFrame with number of heating degree days by year (columns) and country (index).

    Returns
    -------
    pd.DataFrame
        DataFrame with approximated heat demand for each country.

    Notes
    -----
    - Missing data is filled forward for GB in 2020 and backward for CH from 2007 to 2009.
    - If only one year of heating data is available for a country, a point (0, 0) is added to make the polynomial fit work.
    """

    countries = hdd.columns.intersection(energy_totals.index.levels[0])

    demands = {}

    for kind, sector in product(["total", "electricity"], ["services", "residential"]):
        # reduced number years (2007-2021) for regression because it implicitly
        # assumes a constant building stock
        row = idx[:, 2007:2021]
        col = f"{kind} {sector} space"
        demand = energy_totals.loc[row, col].unstack(0)

        # ffill for GB in 2020- and bfill for CH 2007-2009
        # compromise to have more years available for the fit
        demand = demand.ffill(axis=0).bfill(axis=0)

        demand_approx = {}

        for c in countries:
            Y = demand[c].dropna()
            X = hdd.loc[Y.index, c]

            # Sometimes (looking at you, Switzerland) we only have
            # _one_ year of heating data to base the prediction on. In
            # this case we add a point at 0, 0 to make a "polynomial"
            # fit work.
            if len(X) == len(Y) == 1:
                X.loc[-1] = 0
                Y.loc[-1] = 0

            to_predict = hdd.index.difference(Y.index)
            X_pred = hdd.loc[to_predict, c]

            p = Polynomial.fit(X, Y, 1)
            Y_pred = p(X_pred)

            demand_approx[c] = pd.Series(Y_pred, index=to_predict)

        demand_approx = pd.DataFrame(demand_approx)
        demand_approx = pd.concat([demand, demand_approx]).sort_index()
        demands[f"{kind} {sector} space"] = demand_approx.groupby(
            demand_approx.index
        ).sum()

    demands = pd.concat(demands).unstack().T.clip(lower=0)
    demands.index.names = ["country", "year"]

    return demands


if __name__ == "__main__":
    if "snakemake" not in globals():
        from _helpers import mock_snakemake

        snakemake = mock_snakemake("build_heat_totals")

    hdd = pd.read_csv(snakemake.input.hdd, index_col=0).T
    hdd.index = hdd.index.astype(int)

    energy_totals = pd.read_csv(snakemake.input.energy_totals, index_col=[0, 1])

    heat_demand = approximate_heat_demand(energy_totals, hdd)

    heat_demand.to_csv(snakemake.output.heat_totals)
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2023-04-30 08:52:58 +00:00			`# -- coding: utf-8 --`
Add copyright headers 2024-03-01 12:30:01 +00:00			`# SPDX-FileCopyrightText: : 2017-2024 The PyPSA-Eur Authors`
			`#`
			`# SPDX-License-Identifier: MIT`
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`"""`
			`Approximate heat demand for all weather years.`
Document heating rules 2024-06-17 09:38:23 +00:00
			:func:`approximate_heat_demand` approximates annual heat demand based on energy totals and heating degree days (HDD) using a regression of heat demand on HDDs.

			`Inputs`
			`------`
			- `resources/<run_name>/energy_totals.csv`: Energy consumption by sector (columns), country and year. Output of :func:`scripts.build_energy_totals.py`.
			- `data/era5-annual-HDD-per-country.csv`: Number of heating degree days by year (columns) and country (index).

			`Outputs`
			`-------`
			- `resources/<run_name>/heat_totals.csv`: Approximated annual heat demand for each country.
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`"""`

			`from itertools import product`
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2023-04-30 08:52:58 +00:00
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`import pandas as pd`
			`from numpy.polynomial import Polynomial`

			`idx = pd.IndexSlice`


Document heating rules 2024-06-17 09:38:23 +00:00			`def approximate_heat_demand(energy_totals: pd.DataFrame, hdd: pd.DataFrame):`
			`"""`
			`Approximate heat demand for a set of countries based on energy totals and`
			`heating degree days (HDD). A polynomial regression of heat demand on HDDs`
			`is performed on the data from 2007 to 2021. Then, for 2022 and 2023, the`
			`heat demand is estimated from known HDDs based on the regression.`

			`Parameters`
			`----------`
			`energy_totals : pd.DataFrame`
			DataFrame with energy consumption by sector (columns), country and year. Output of :func:`scripts.build_energy_totals.py`.
			`hdd : pd.DataFrame`
correct typos in heating rule docs 2024-06-17 09:47:55 +00:00			`DataFrame with number of heating degree days by year (columns) and country (index).`
Document heating rules 2024-06-17 09:38:23 +00:00
			`Returns`
			`-------`
			`pd.DataFrame`
			`DataFrame with approximated heat demand for each country.`

			`Notes`
			`-----`
			`- Missing data is filled forward for GB in 2020 and backward for CH from 2007 to 2009.`
			`- If only one year of heating data is available for a country, a point (0, 0) is added to make the polynomial fit work.`
			`"""`
build_heat_totals: use more years for regression 2024-03-14 13:09:39 +00:00
build_heat_totals: allow subset of countries 2024-03-14 17:47:09 +00:00			`countries = hdd.columns.intersection(energy_totals.index.levels[0])`
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00
			`demands = {}`

			`for kind, sector in product(["total", "electricity"], ["services", "residential"]):`
build_heat_totals: use more years for regression 2024-03-14 13:09:39 +00:00			`# reduced number years (2007-2021) for regression because it implicitly`
			`# assumes a constant building stock`
			`row = idx[:, 2007:2021]`
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`col = f"{kind} {sector} space"`
			`demand = energy_totals.loc[row, col].unstack(0)`

build_heat_totals: use more years for regression 2024-03-14 13:09:39 +00:00			`# ffill for GB in 2020- and bfill for CH 2007-2009`
			`# compromise to have more years available for the fit`
			`demand = demand.ffill(axis=0).bfill(axis=0)`

interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`demand_approx = {}`

			`for c in countries:`
			`Y = demand[c].dropna()`
			`X = hdd.loc[Y.index, c]`

Various bugfixes to make multiyear branch run 2024-02-29 10:38:21 +00:00			`# Sometimes (looking at you, Switzerland) we only have`
			`# _one_ year of heating data to base the prediction on. In`
			`# this case we add a point at 0, 0 to make a "polynomial"`
			`# fit work.`
			`if len(X) == len(Y) == 1:`
			`X.loc[-1] = 0`
			`Y.loc[-1] = 0`

interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`to_predict = hdd.index.difference(Y.index)`
			`X_pred = hdd.loc[to_predict, c]`

			`p = Polynomial.fit(X, Y, 1)`
			`Y_pred = p(X_pred)`

			`demand_approx[c] = pd.Series(Y_pred, index=to_predict)`

			`demand_approx = pd.DataFrame(demand_approx)`
			`demand_approx = pd.concat([demand, demand_approx]).sort_index()`
			`demands[f"{kind} {sector} space"] = demand_approx.groupby(`
			`demand_approx.index`
			`).sum()`

			`demands = pd.concat(demands).unstack().T.clip(lower=0)`
			`demands.index.names = ["country", "year"]`
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2023-04-30 08:52:58 +00:00
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00			`return demands`


			`if __name__ == "__main__":`
			`if "snakemake" not in globals():`
build_heat_totals: use more years for regression 2024-03-14 13:09:39 +00:00			`from _helpers import mock_snakemake`
[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci 2023-04-30 08:52:58 +00:00
build_heat_totals: use more years for regression 2024-03-14 13:09:39 +00:00			`snakemake = mock_snakemake("build_heat_totals")`
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00
			`hdd = pd.read_csv(snakemake.input.hdd, index_col=0).T`
build_heat_totals: use more years for regression 2024-03-14 13:09:39 +00:00			`hdd.index = hdd.index.astype(int)`
interpolate heat demands for years outside 2007-2015 2022-07-25 12:35:54 +00:00
			`energy_totals = pd.read_csv(snakemake.input.energy_totals, index_col=[0, 1])`

			`heat_demand = approximate_heat_demand(energy_totals, hdd)`

			`heat_demand.to_csv(snakemake.output.heat_totals)`