2023-03-06 08:27:45 +00:00
# -*- coding: utf-8 -*-
2024-02-19 15:21:48 +00:00
# SPDX-FileCopyrightText: : 2020-2024 The PyPSA-Eur Authors
2023-03-06 17:49:23 +00:00
#
# SPDX-License-Identifier: MIT
2023-03-09 11:45:43 +00:00
"""
2024-06-17 09:38:23 +00:00
Build total energy demands and carbon emissions per country using JRC IDEES ,
eurostat , and EEA data .
- Country - specific data is read in : func : ` build_eurostat ` , : func : ` build_idees ` and ` build_swiss ` .
- : func : ` build_energy_totals ` then combines energy data from Eurostat , Swiss , and IDEES data and : func : ` rescale_idees_from_eurostat ` rescales IDEES data to match Eurostat data .
- : func : ` build_district_heat_share ` calculates the share of district heating for each country from IDEES data .
- Historical CO2 emissions are calculated in : func : ` build_eea_co2 ` and : func : ` build_eurostat_co2 ` and combined in : func : ` build_co2_totals ` .
Relevant Settings
- - - - - - - - - - - - - - - - -
. . code : : yaml
countries :
energy :
Inputs
- - - - - -
- ` resources / < run_name > / nuts3_shapes . gejson ` : NUTS3 shapes .
- ` data / bundle / eea_UNFCCC_v23 . csv ` : CO2 emissions data from EEA .
- ` data / switzerland - new_format - all_years . csv ` : Swiss energy data .
- ` data / gr - e - 11.03 .02 .01 .01 - cc . csv ` : Swiss transport data
- ` data / bundle / jrc - idees ` : JRC IDEES data .
- ` data / district_heat_share . csv ` : District heating shares .
- ` data / eurostat / Balances - April2023 ` : Eurostat energy balances .
- ` data / eurostat / eurostat - household_energy_balances - february_2024 . csv ` : Eurostat household energy balances .
Outputs
- - - - - - -
- ` resources / < run_name > / energy_totals . csv ` : Energy totals per country , sector and year .
- ` resources / < run_name > / co2_totals . csv ` : CO2 emissions per country , sector and year .
- ` resources / < run_name > / transport_data . csv ` : Transport data per country and year .
- ` resources / < run_name > / district_heat_share . csv ` : District heating share per by country and year .
2023-03-09 11:45:43 +00:00
"""
2023-02-23 09:30:32 +00:00
import logging
2021-07-01 18:09:04 +00:00
import multiprocessing as mp
from functools import partial
2024-06-17 09:38:23 +00:00
from typing import List
2021-07-01 18:09:04 +00:00
2023-03-09 10:04:41 +00:00
import country_converter as coco
2019-04-16 14:03:51 +00:00
import geopandas as gpd
2021-07-01 18:09:04 +00:00
import numpy as np
2019-04-16 14:03:51 +00:00
import pandas as pd
2024-02-12 10:53:20 +00:00
from _helpers import configure_logging , mute_print , set_scenario_config
2021-07-01 18:09:04 +00:00
from tqdm import tqdm
2019-04-16 14:03:51 +00:00
2023-03-09 10:04:41 +00:00
cc = coco . CountryConverter ( )
2024-01-19 09:47:58 +00:00
logger = logging . getLogger ( __name__ )
2019-04-16 14:03:51 +00:00
idx = pd . IndexSlice
2023-02-18 12:47:34 +00:00
2024-07-22 19:15:40 +00:00
# from JRC-2021 methodology p.58
agriculture_idees_eurostat_mapping = {
" Solids " : [ " C0000X0350-0370 " , " P1000 " , " S2000 " ] ,
" LPG " : [ " O4630 " ] ,
" Diesel oil and liquid biofuels " : [ " O4671XR5220B " , " R5210P " , " R5210B " , " R5220P " , " R5220B " , " R5230P " , " R5230B " , " R5290 " ] ,
" Fuel oil and other liquids " : [ " O4680 " , " O4100_TOT_4200-4500XBIO " , " O4652XR5210B " , " O4651 " , " O4653 " , " O4661XR5230B " , " O4669 " , " O4640 " , " O4691 " , " O4692 " , " O4695 " , " O4694 " , " O4693 " , " O4699 " ] ,
" Natural gas and biogas " : [ " G3000 " , " C0350-0370 " , " R5300 " ] ,
" Biomass and waste " : [ " R5110-5150_W6000RI " , " R5160 " , " W6210 " , " W6100_6220 " ] ,
" Solar and geothermal " : [ " RA200 " , " RA410 " ] ,
" Ambient heat " : [ " RA600 " ] ,
" Distributed heat " : [ " H8000 " ] ,
" Electricity " : [ " E7000 " ]
}
2024-06-17 09:38:23 +00:00
def cartesian ( s1 : pd . Series , s2 : pd . Series ) - > pd . DataFrame :
2021-07-01 18:09:04 +00:00
"""
2024-06-17 09:38:23 +00:00
Compute the Cartesian product of two pandas Series .
Parameters
- - - - - - - - - -
s1 : pd . Series
The first pandas Series
s2 : pd . Series :
The second pandas Series .
Returns
- - - - - - - - - -
pd . DataFrame
A DataFrame representing the Cartesian product of s1 and s2 .
Examples
- - - - - - - -
>> > s1 = pd . Series ( [ 1 , 2 , 3 ] , index = [ " a " , " b " , " c " ] )
>> > s2 = pd . Series ( [ 4 , 5 , 6 ] , index = [ " d " , " e " , " f " ] )
>> > cartesian ( s1 , s2 )
d e f
a 4 5 6
b 8 10 12
c 12 15 18
2021-07-01 18:09:04 +00:00
"""
return pd . DataFrame ( np . outer ( s1 , s2 ) , index = s1 . index , columns = s2 . index )
2024-06-17 09:44:03 +00:00
2024-06-17 09:38:23 +00:00
def reverse ( dictionary : dict ) - > dict :
2021-07-01 18:09:04 +00:00
"""
2024-06-17 09:38:23 +00:00
Reverses the keys and values of a dictionary .
Parameters
- - - - - - - - - -
dictionary : dict
The dictionary to be reversed .
Returns
- - - - - - -
dict
A new dictionary with the keys and values reversed .
Examples
- - - - - - - -
>> > d = { " a " : 1 , " b " : 2 , " c " : 3 }
>> > reverse ( d )
{ 1 : ' a ' , 2 : ' b ' , 3 : ' c ' }
2021-07-01 18:09:04 +00:00
"""
return { v : k for k , v in dictionary . items ( ) }
2024-03-04 16:16:26 +00:00
2021-07-01 18:09:04 +00:00
idees_rename = { " GR " : " EL " , " GB " : " UK " }
2023-03-09 10:04:41 +00:00
eu28 = cc . EU28as ( " ISO2 " ) . ISO2 . tolist ( )
2024-07-18 13:39:52 +00:00
eu27 = cc . EU27as ( " ISO2 " ) . ISO2 . tolist ( )
2021-07-01 18:09:04 +00:00
eu28_eea = eu28 . copy ( )
2019-04-16 14:03:51 +00:00
eu28_eea . remove ( " GB " )
eu28_eea . append ( " UK " )
2021-07-01 18:09:04 +00:00
to_ipcc = {
" electricity " : " 1.A.1.a - Public Electricity and Heat Production " ,
" residential non-elec " : " 1.A.4.b - Residential " ,
" services non-elec " : " 1.A.4.a - Commercial/Institutional " ,
" rail non-elec " : " 1.A.3.c - Railways " ,
" road non-elec " : " 1.A.3.b - Road Transportation " ,
" domestic navigation " : " 1.A.3.d - Domestic Navigation " ,
" international navigation " : " 1.D.1.b - International Navigation " ,
" domestic aviation " : " 1.A.3.a - Domestic Aviation " ,
" international aviation " : " 1.D.1.a - International Aviation " ,
" total energy " : " 1 - Energy " ,
" industrial processes " : " 2 - Industrial Processes and Product Use " ,
" agriculture " : " 3 - Agriculture " ,
2021-07-06 16:32:35 +00:00
" agriculture, forestry and fishing " : " 1.A.4.c - Agriculture/Forestry/Fishing " ,
2021-07-01 18:09:04 +00:00
" LULUCF " : " 4 - Land Use, Land-Use Change and Forestry " ,
" waste management " : " 5 - Waste management " ,
" other " : " 6 - Other Sector " ,
" indirect " : " ind_CO2 - Indirect CO2 " ,
" total wL " : " Total (with LULUCF) " ,
" total woL " : " Total (without LULUCF) " ,
}
2023-03-06 08:27:45 +00:00
2024-06-17 09:38:23 +00:00
def eurostat_per_country ( input_eurostat : str , country : str ) - > pd . DataFrame :
"""
Read energy balance data for a specific country from Eurostat .
Parameters
- - - - - - - - - -
input_eurostat : str
Path to the directory containing Eurostat data files .
country : str
Country code for the specific country .
Returns
- - - - - - -
pd . DataFrame
Concatenated energy balance data for the specified country .
Notes
- - - - -
- The function reads ` < input_eurostat > / < country > . - Energy - balance - sheets - April - 2023 - edition . xlsb `
- It removes the " Cover " sheet from the data and concatenates all the remaining sheets into a single DataFrame .
"""
2024-03-06 19:42:45 +00:00
filename = (
f " { input_eurostat } / { country } -Energy-balance-sheets-April-2023-edition.xlsb "
)
sheet = pd . read_excel (
filename ,
engine = " pyxlsb " ,
sheet_name = None ,
skiprows = 4 ,
index_col = list ( range ( 4 ) ) ,
)
sheet . pop ( " Cover " )
return pd . concat ( sheet )
2024-06-17 09:38:23 +00:00
def build_eurostat (
input_eurostat : str ,
countries : List [ str ] ,
nprocesses : int = 1 ,
disable_progressbar : bool = False ,
) - > pd . DataFrame :
2022-07-23 08:37:32 +00:00
"""
Return multi - index for all countries ' energy data in TWh/a.
2024-06-17 09:38:23 +00:00
Parameters :
- - - - - - - - - - -
input_eurostat : str
Path to the Eurostat database .
countries : List [ str ]
List of countries for which energy data is to be retrieved .
nprocesses : int , optional
Number of processes to use for parallel execution , by default 1.
disable_progressbar : bool , optional
Whether to disable the progress bar , by default False .
Returns :
- - - - - - - -
pd . DataFrame
Multi - index DataFrame containing energy data for all countries in TWh / a .
Notes :
- - - - - -
- The function first renames the countries in the input list using the ` idees_rename ` mapping and removes " CH " .
- It then reads country - wise data using : func : ` eurostat_per_country ` into a single DataFrame .
- The data is reordered , converted to TWh / a , and missing values are filled .
2022-07-23 08:37:32 +00:00
"""
2024-06-17 09:38:23 +00:00
2024-03-05 17:43:24 +00:00
countries = { idees_rename . get ( country , country ) for country in countries } - { " CH " }
2024-03-06 19:42:45 +00:00
func = partial ( eurostat_per_country , input_eurostat )
tqdm_kwargs = dict (
ascii = False ,
unit = " country " ,
total = len ( countries ) ,
desc = " Build from eurostat database " ,
2024-03-14 18:24:37 +00:00
disable = disable_progressbar ,
2024-03-06 19:42:45 +00:00
)
with mute_print ( ) :
with mp . Pool ( processes = nprocesses ) as pool :
dfs = list ( tqdm ( pool . imap ( func , countries ) , * * tqdm_kwargs ) )
index_names = [ " country " , " year " , " lvl1 " , " lvl2 " , " lvl3 " , " lvl4 " ]
df = pd . concat ( dfs , keys = countries , names = index_names )
df . index = df . index . set_levels ( df . index . levels [ 1 ] . astype ( int ) , level = 1 )
2024-03-05 17:43:24 +00:00
# drop columns with all NaNs
unnamed_cols = df . columns [ df . columns . astype ( str ) . str . startswith ( " Unnamed " ) ]
df . drop ( unnamed_cols , axis = 1 , inplace = True )
2024-03-06 19:42:45 +00:00
df . drop ( list ( range ( 1990 , 2022 ) ) , axis = 1 , inplace = True , errors = " ignore " )
2024-03-05 17:43:24 +00:00
# make numeric values where possible
df . replace ( " Z " , 0 , inplace = True )
df = df . apply ( pd . to_numeric , errors = " coerce " )
df = df . select_dtypes ( include = [ np . number ] )
2024-03-06 19:42:45 +00:00
# write 'International aviation' to the lower level of the multiindex
int_avia = df . index . get_level_values ( 3 ) == " International aviation "
2024-03-05 17:43:24 +00:00
temp = df . loc [ int_avia ]
temp . index = pd . MultiIndex . from_frame (
temp . index . to_frame ( ) . fillna ( " International aviation " )
2022-07-23 08:37:32 +00:00
)
2024-03-05 17:43:24 +00:00
df = pd . concat ( [ temp , df . loc [ ~ int_avia ] ] )
2024-03-18 16:16:30 +00:00
# Fill in missing data on "Domestic aviation" for each country.
for country in countries :
slicer = idx [ country , : , : , : , " Domestic aviation " ]
# For the Total and Fossil energy columns, fill in zeros with
# the closest non-zero value in the year index.
for col in [ " Total " , " Fossil energy " ] :
df . loc [ slicer , col ] = (
df . loc [ slicer , col ] . replace ( 0.0 , np . nan ) . ffill ( ) . bfill ( )
)
2024-02-27 11:04:07 +00:00
# Renaming some indices
2024-03-05 17:43:24 +00:00
index_rename = {
2024-02-27 11:04:07 +00:00
" Households " : " Residential " ,
" Commercial & public services " : " Services " ,
" Domestic navigation " : " Domestic Navigation " ,
2024-03-05 17:43:24 +00:00
" International maritime bunkers " : " Bunkers " ,
2024-03-06 19:42:45 +00:00
" UK " : " GB " ,
2024-05-13 15:33:07 +00:00
" EL " : " GR " ,
2021-07-01 18:09:04 +00:00
}
2024-03-06 19:42:45 +00:00
columns_rename = { " Total " : " Total all products " }
2024-03-05 17:43:24 +00:00
df . rename ( index = index_rename , columns = columns_rename , inplace = True )
df . sort_index ( inplace = True )
2024-02-27 12:14:41 +00:00
2024-03-05 17:43:24 +00:00
# convert to TWh/a from ktoe/a
2021-07-01 18:09:04 +00:00
df * = 11.63 / 1e3
return df
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
def build_swiss ( ) - > pd . DataFrame :
2022-07-23 15:05:07 +00:00
"""
2024-03-06 19:42:45 +00:00
Return a pd . DataFrame of Swiss energy data in TWh / a .
2024-06-17 09:38:23 +00:00
Returns
- - - - - - - -
pd . DataFrame
Swiss energy data in TWh / a .
Notes
- - - - -
- Reads Swiss energy data from ` data / switzerland - new_format - all_years . csv ` .
- Reshapes and renames data .
- Converts energy units from PJ / a to TWh / a .
2022-07-23 15:05:07 +00:00
"""
2021-07-01 18:09:04 +00:00
fn = snakemake . input . swiss
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_csv ( fn , index_col = [ 0 , 1 ] )
df . columns = df . columns . astype ( int )
df . columns . name = " year "
df = df . stack ( ) . unstack ( " item " )
df . columns . name = None
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert PJ/a to TWh/a
df / = 3.6
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return df
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
def idees_per_country ( ct : str , base_dir : str ) - > pd . DataFrame :
"""
Calculate energy totals per country using JRC - IDEES data .
Parameters
- - - - - - - - - -
ct : str
The country code .
base_dir : str
The base directory where the JRC - IDEES data files are located .
Returns
- - - - - - -
pd . DataFrame
A DataFrame containing the energy totals per country . Columns are energy uses .
Notes
- - - - -
- Retrieves JRC - IDEES data for the specified country from ` base_dir ` for residential , tertiary , and transport sectors .
- Calculates energy totals for each sector , stores them in a dictionary and returns them as data frame .
2024-06-17 09:47:55 +00:00
- Assertions ensure indices of JRC - IDEES data are as expected .
2024-06-17 09:38:23 +00:00
"""
2024-06-17 09:44:03 +00:00
2021-07-01 18:09:04 +00:00
ct_idees = idees_rename . get ( ct , ct )
2024-07-19 08:20:19 +00:00
fn_residential = f " { base_dir } / { ct_idees } /JRC-IDEES-2021_Residential_ { ct_idees } .xlsx "
fn_tertiary = f " { base_dir } / { ct_idees } /JRC-IDEES-2021_Tertiary_ { ct_idees } .xlsx "
fn_transport = f " { base_dir } / { ct_idees } /JRC-IDEES-2021_Transport_ { ct_idees } .xlsx "
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
ct_totals = { }
2021-07-01 18:09:04 +00:00
# residential
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_residential , " RES_hh_fec " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
rows = [ " Advanced electric heating " , " Conventional electric heating " ]
2024-03-06 19:42:45 +00:00
ct_totals [ " electricity residential space " ] = df . loc [ rows ] . sum ( )
ct_totals [ " total residential space " ] = df . loc [ " Space heating " ]
ct_totals [ " total residential water " ] = df . loc [ " Water heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 23 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity residential water " ] = df . iloc [ 23 ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " total residential cooking " ] = df . loc [ " Cooking " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 30 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity residential cooking " ] = df . iloc [ 30 ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_residential , " RES_summary " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
row = " Energy consumption by fuel - Eurostat structure (ktoe) "
2024-03-06 19:42:45 +00:00
ct_totals [ " total residential " ] = df . loc [ row ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 40 ] == " Electricity "
ct_totals [ " electricity residential " ] = df . iloc [ 40 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
# TODO derived heat changed to distributed heat and numbers changed as well!
# this needs to be checked
assert df . index [ 39 ] == " Distributed heat "
ct_totals [ " derived heat residential " ] = df . iloc [ 39 ]
2021-09-29 12:36:56 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 43 ] == " Thermal uses "
ct_totals [ " thermal uses residential " ] = df . iloc [ 43 ]
2021-09-29 12:36:56 +00:00
2021-07-01 18:09:04 +00:00
# services
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_tertiary , " SER_hh_fec " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " total services space " ] = df . loc [ " Space heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
rows = [ " Advanced electric heating " , " Conventional electric heating " ]
2024-03-06 19:42:45 +00:00
ct_totals [ " electricity services space " ] = df . loc [ rows ] . sum ( )
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " total services water " ] = df . loc [ " Hot water " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 24 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity services water " ] = df . iloc [ 24 ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " total services cooking " ] = df . loc [ " Catering " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 31 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity services cooking " ] = df . iloc [ 31 ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_tertiary , " SER_summary " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
row = " Energy consumption by fuel - Eurostat structure (ktoe) "
2024-03-06 19:42:45 +00:00
ct_totals [ " total services " ] = df . loc [ row ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 43 ] == " Electricity "
ct_totals [ " electricity services " ] = df . iloc [ 43 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
# TODO check derived heat changed to distributed heat
assert df . index [ 42 ] == " Distributed heat "
ct_totals [ " derived heat services " ] = df . iloc [ 42 ]
2021-09-29 12:36:56 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 46 ] == " Thermal uses "
ct_totals [ " thermal uses services " ] = df . iloc [ 46 ]
2021-10-04 07:23:23 +00:00
2021-07-06 16:32:35 +00:00
# agriculture, forestry and fishing
start = " Detailed split of energy consumption (ktoe) "
end = " Market shares of energy uses ( % ) "
2021-10-04 07:23:23 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_tertiary , " AGR_fec " , index_col = 0 ) . loc [ start : end ]
2021-07-06 16:32:35 +00:00
rows = [
" Lighting " ,
" Ventilation " ,
" Specific electricity uses " ,
2024-07-18 13:39:52 +00:00
" Pumping devices (electricity) " ,
2021-07-06 16:32:35 +00:00
]
2024-03-06 19:42:45 +00:00
ct_totals [ " total agriculture electricity " ] = df . loc [ rows ] . sum ( )
2021-07-06 16:32:35 +00:00
rows = [ " Specific heat uses " , " Low enthalpy heat " ]
2024-03-06 19:42:45 +00:00
ct_totals [ " total agriculture heat " ] = df . loc [ rows ] . sum ( )
2021-07-06 16:32:35 +00:00
rows = [
" Motor drives " ,
2024-07-18 13:39:52 +00:00
" Farming machine drives (diesel oil and liquid biofuels) " ,
" Pumping devices (diesel oil and liquid biofuels) " ,
2021-07-06 16:32:35 +00:00
]
2024-03-06 19:42:45 +00:00
ct_totals [ " total agriculture machinery " ] = df . loc [ rows ] . sum ( )
2021-07-06 16:32:35 +00:00
2021-08-18 12:17:21 +00:00
row = " Agriculture, forestry and fishing "
2024-03-06 19:42:45 +00:00
ct_totals [ " total agriculture " ] = df . loc [ row ]
2021-07-06 16:32:35 +00:00
2021-07-01 18:09:04 +00:00
# transport
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_transport , " TrRoad_ene " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " total road " ] = df . loc [ " by fuel (EUROSTAT DATA) " ]
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " electricity road " ] = df . loc [ " Electricity " ]
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
ct_totals [ " total two-wheel " ] = df . loc [ " Powered two-wheelers (Gasoline) " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 19 ] == " Passenger cars "
2022-07-23 09:19:37 +00:00
ct_totals [ " total passenger cars " ] = df . iloc [ 19 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 30 ] == " Battery electric vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity passenger cars " ] = df . iloc [ 30 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 31 ] == " Motor coaches, buses and trolley buses "
2022-07-23 09:19:37 +00:00
ct_totals [ " total other road passenger " ] = df . iloc [ 31 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 39 ] == " Battery electric vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity other road passenger " ] = df . iloc [ 39 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 41 ] == " Light commercial vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " total light duty road freight " ] = df . iloc [ 41 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 49 ] == " Battery electric vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity light duty road freight " ] = df . iloc [ 49 ]
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
row = " Heavy goods vehicles (Diesel oil incl. biofuels) "
2024-03-06 19:42:45 +00:00
ct_totals [ " total heavy duty road freight " ] = df . loc [ row ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 61 ] == " Passenger cars "
2022-07-23 09:19:37 +00:00
ct_totals [ " passenger car efficiency " ] = df . iloc [ 61 ]
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_transport , " TrRail_ene " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
ct_totals [ " total rail " ] = df . loc [ " by fuel " ]
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
ct_totals [ " electricity rail " ] = df . loc [ " Electricity " ]
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 9 ] == " Passenger transport "
ct_totals [ " total rail passenger " ] = df . iloc [ 9 ]
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 10 ] == " Metro and tram, urban light rail "
assert df . index [ 13 ] == " Electric "
assert df . index [ 14 ] == " High speed passenger trains "
ct_totals [ " electricity rail passenger " ] = df . iloc [ [ 10 , 13 , 14 ] ] . sum ( )
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 15 ] == " Freight transport "
ct_totals [ " total rail freight " ] = df . iloc [ 15 ]
2019-07-18 13:38:37 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 17 ] == " Electric "
ct_totals [ " electricity rail freight " ] = df . iloc [ 17 ]
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_transport , " TrAvia_ene " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 4 ] == " Passenger transport "
ct_totals [ " total aviation passenger " ] = df . iloc [ 4 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 8 ] == " Freight transport "
ct_totals [ " total aviation freight " ] = df . iloc [ 8 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 2 ] == " Domestic "
ct_totals [ " total domestic aviation passenger " ] = df . iloc [ 2 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
# TODO added Ukraine to intra EU flights
assert df . index [ 6 ] == " International - Intra-EEAwUK "
assert df . index [ 7 ] == " International - Extra-EEAwUK "
ct_totals [ " total international aviation passenger " ] = df . iloc [ [ 6 , 7 ] ] . sum ( )
2024-07-18 13:43:38 +00:00
2024-07-18 13:39:52 +00:00
# TODO freight changed from "Domestic and International - Intra-EU" -> split
# domestic and international (intra-EU and outside EU)
assert df . index [ 9 ] == " Domestic "
ct_totals [ " total domestic aviation freight " ] = df . iloc [ 9 ]
2019-04-16 14:03:51 +00:00
2024-07-18 13:39:52 +00:00
assert df . index [ 10 ] == " International - Intra-EEAwUK "
assert df . index [ 11 ] == " International - Extra-EEAwUK "
ct_totals [ " total international aviation freight " ] = df . iloc [ [ 10 , 11 ] ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total domestic aviation " ] = (
ct_totals [ " total domestic aviation freight " ]
+ ct_totals [ " total domestic aviation passenger " ]
2023-03-06 08:27:45 +00:00
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total international aviation " ] = (
ct_totals [ " total international aviation freight " ]
+ ct_totals [ " total international aviation passenger " ]
2023-03-06 08:27:45 +00:00
)
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_transport , " TrNavi_ene " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# coastal and inland
2024-07-18 13:39:52 +00:00
ct_totals [ " total domestic navigation " ] = df . loc [ " Energy consumption (ktoe) " ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
df = pd . read_excel ( fn_transport , " TrRoad_act " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 85 ] == " Passenger cars "
2022-07-23 09:19:37 +00:00
ct_totals [ " passenger cars " ] = df . iloc [ 85 ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
return pd . DataFrame ( ct_totals )
2019-04-16 14:03:51 +00:00
2024-06-17 09:44:03 +00:00
2024-06-17 09:38:23 +00:00
def build_idees ( countries : List [ str ] ) - > pd . DataFrame :
"""
Build energy totals from IDEES database for the given list of countries
using : func : ` idees_per_country ` .
Parameters
- - - - - - - - - -
countries : List [ str ]
List of country names for which energy totals need to be built .
Returns
- - - - - - -
pd . DataFrame
Energy totals for the given countries .
Notes
- - - - -
- Retrieves energy totals per country and year using : func : ` idees_per_country ` .
- Returns a DataFrame with columns : country , year , and energy totals for different categories .
"""
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
nprocesses = snakemake . threads
2023-03-07 19:37:47 +00:00
disable_progress = snakemake . config [ " run " ] . get ( " disable_progressbar " , False )
2023-04-30 08:52:58 +00:00
2024-03-06 19:42:45 +00:00
func = partial ( idees_per_country , base_dir = snakemake . input . idees )
2021-07-01 18:09:04 +00:00
tqdm_kwargs = dict (
ascii = False ,
2024-02-29 10:38:21 +00:00
unit = " country " ,
2021-07-01 18:09:04 +00:00
total = len ( countries ) ,
desc = " Build from IDEES database " ,
2023-04-29 16:49:49 +00:00
disable = disable_progress ,
2021-07-01 18:09:04 +00:00
)
2023-02-21 21:36:49 +00:00
with mute_print ( ) :
with mp . Pool ( processes = nprocesses ) as pool :
totals_list = list ( tqdm ( pool . imap ( func , countries ) , * * tqdm_kwargs ) )
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
totals = pd . concat (
totals_list ,
keys = countries ,
names = [ " country " , " year " ] ,
)
2024-04-12 11:59:15 +00:00
2024-07-18 13:39:52 +00:00
# clean up dataframe
years = np . arange ( 2000 , 2022 )
totals = totals [ totals . index . get_level_values ( 1 ) . isin ( years ) ]
2024-07-18 13:43:38 +00:00
2024-05-25 15:16:47 +00:00
# efficiency kgoe/100km -> ktoe/100km so that after conversion TWh/100km
totals . loc [ : , " passenger car efficiency " ] / = 1e6
2021-07-01 18:09:04 +00:00
# convert ktoe to TWh
2024-03-06 19:42:45 +00:00
exclude = totals . columns . str . fullmatch ( " passenger cars " )
2024-07-18 13:39:52 +00:00
totals = totals . copy ( )
2024-03-06 19:42:45 +00:00
totals . loc [ : , ~ exclude ] * = 11.63 / 1e3
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
return totals
2019-04-16 14:03:51 +00:00
2019-07-18 13:38:37 +00:00
2024-06-17 09:38:23 +00:00
def build_energy_totals (
countries : List [ str ] ,
eurostat : pd . DataFrame ,
swiss : pd . DataFrame ,
idees : pd . DataFrame ,
) - > pd . DataFrame :
"""
Combine energy totals for the specified countries from Eurostat , Swiss , and
IDEES data .
Parameters
- - - - - - - - - -
countries : List [ str ]
List of country codes for which energy totals are to be calculated .
eurostat : pd . DataFrame
Eurostat energy balances dataframe .
swiss : pd . DataFrame
Swiss energy data dataframe .
idees : pd . DataFrame
IDEES energy data dataframe .
Returns
- - - - - - -
pd . DataFrame
Energy totals dataframe for the given countries .
Notes
- - - - -
- Missing values are filled based on Eurostat energy balances and average values in EU28 .
- The function also performs specific calculations for Norway and splits road , rail , and aviation traffic for non - IDEES data .
2024-06-17 09:44:03 +00:00
2024-06-17 09:38:23 +00:00
References
- - - - - - - - - -
- ` Norway heating data < http : / / www . ssb . no / en / energi - og - industri / statistikker / husenergi / hvert - 3 - aar / 2014 - 07 - 14 > ` _
"""
2024-03-06 15:42:33 +00:00
eurostat_fuels = { " electricity " : " Electricity " , " total " : " Total all products " }
2024-03-06 19:42:45 +00:00
eurostat_countries = eurostat . index . levels [ 0 ]
eurostat_years = eurostat . index . levels [ 1 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
to_drop = [ " passenger cars " , " passenger car efficiency " ]
2024-03-06 19:42:45 +00:00
new_index = pd . MultiIndex . from_product (
[ countries , eurostat_years ] , names = [ " country " , " year " ]
)
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
df = idees . reindex ( new_index ) . drop ( to_drop , axis = 1 )
in_eurostat = df . index . levels [ 0 ] . intersection ( eurostat_countries )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# add international navigation
2019-07-18 13:38:37 +00:00
2024-03-06 19:42:45 +00:00
slicer = idx [ in_eurostat , : , : , " Bunkers " , : ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = [ 0 , 1 ] ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ in_eurostat , " total international navigation " ] = fill_values
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# add swiss energy data
2019-07-18 13:38:37 +00:00
2024-03-14 17:11:02 +00:00
df = pd . concat ( [ df . drop ( " CH " , errors = " ignore " ) , swiss ] ) . sort_index ( )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# get values for missing countries based on Eurostat EnergyBalances
2024-07-22 19:15:40 +00:00
# agriculture
to_fill = df . index [
df [ " total agriculture " ] . isna ( )
& df . index . get_level_values ( " country " ) . isin ( eurostat_countries )
]
c = to_fill . get_level_values ( " country " )
y = to_fill . get_level_values ( " year " )
# take total final energy consumption from Eurostat
eurostat_sector = ' Agriculture & forestry '
slicer = idx [ c , y , : , : , eurostat_sector ]
fill_values = eurostat . loc [ slicer ] [ " Total all products " ] . groupby ( level = [ 0 , 1 ] ) . sum ( )
# fill missing years for some countries by mean over the other years
means = fill_values . groupby ( level = ' country ' ) . transform ( ' mean ' )
fill_values = fill_values . where ( fill_values != 0 , means )
# split into end uses by average EU data from IDEES
uses = [ " electricity " , " heat " , " machinery " ]
for use in uses :
avg = ( idees [ " total agriculture electricity " ]
/ idees [ " total agriculture " ] ) . mean ( )
df . loc [ to_fill , f " total agriculture { use } " ] = df . loc [ to_fill , " total agriculture " ] * avg
2021-07-01 18:09:04 +00:00
# divide cooking/space/water according to averages in EU28
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
uses = [ " space " , " cooking " , " water " ]
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
to_fill = df . index [
df [ " total residential " ] . isna ( )
& df . index . get_level_values ( " country " ) . isin ( eurostat_countries )
]
c = to_fill . get_level_values ( " country " )
y = to_fill . get_level_values ( " year " )
2021-07-01 18:09:04 +00:00
for sector in [ " residential " , " services " , " road " , " rail " ] :
2024-03-06 15:42:33 +00:00
eurostat_sector = sector . capitalize ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# fuel use
for fuel in [ " electricity " , " total " ] :
2024-03-06 19:42:45 +00:00
slicer = idx [ c , y , : , : , eurostat_sector ]
2022-07-23 15:05:07 +00:00
fill_values = (
2024-03-06 19:42:45 +00:00
eurostat . loc [ slicer , eurostat_fuels [ fuel ] ] . groupby ( level = [ 0 , 1 ] ) . sum ( )
2024-03-04 16:16:26 +00:00
)
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , f " { fuel } { sector } " ] = fill_values
for sector in [ " residential " , " services " ] :
# electric use
for use in uses :
fuel_use = df [ f " electricity { sector } { use } " ]
2024-07-18 13:43:38 +00:00
fuel = (
df [ f " electricity { sector } " ] . replace ( 0 , np . nan ) . infer_objects ( copy = False )
)
2021-07-01 18:09:04 +00:00
avg = fuel_use . div ( fuel ) . mean ( )
2023-02-23 09:30:32 +00:00
logger . debug (
f " { sector } : average fraction of electricity for { use } is { avg : .3f } "
)
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , f " electricity { sector } { use } " ] = (
avg * df . loc [ to_fill , f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# non-electric use
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for use in uses :
nonelectric_use = (
df [ f " total { sector } { use } " ] - df [ f " electricity { sector } { use } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
nonelectric = df [ f " total { sector } " ] - df [ f " electricity { sector } " ]
2024-07-18 13:39:52 +00:00
nonelectric = nonelectric . copy ( ) . replace ( 0 , np . nan )
2021-07-01 18:09:04 +00:00
avg = nonelectric_use . div ( nonelectric ) . mean ( )
2023-02-23 09:30:32 +00:00
logger . debug (
f " { sector } : average fraction of non-electric for { use } is { avg : .3f } "
)
2021-07-01 18:09:04 +00:00
electric_use = df . loc [ to_fill , f " electricity { sector } { use } " ]
nonelectric = (
df . loc [ to_fill , f " total { sector } " ]
- df . loc [ to_fill , f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , f " total { sector } { use } " ] = electric_use + avg * nonelectric
# Fix Norway space and water heating fractions
# http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
# The main heating source for about 73 per cent of the households is based on electricity
# => 26% is non-electric
2019-04-16 14:03:51 +00:00
2024-01-16 19:13:28 +00:00
if " NO " in df . index :
2023-03-09 10:04:41 +00:00
elec_fraction = 0.73
2019-04-16 14:03:51 +00:00
2023-03-09 10:04:41 +00:00
no_norway = df . drop ( " NO " )
2019-04-16 14:03:51 +00:00
2023-03-09 10:04:41 +00:00
for sector in [ " residential " , " services " ] :
# assume non-electric is heating
2021-07-01 18:09:04 +00:00
nonelectric = (
2023-03-09 10:04:41 +00:00
df . loc [ " NO " , f " total { sector } " ] - df . loc [ " NO " , f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2023-03-09 10:04:41 +00:00
total_heating = nonelectric / ( 1 - elec_fraction )
for use in uses :
nonelectric_use = (
no_norway [ f " total { sector } { use } " ]
- no_norway [ f " electricity { sector } { use } " ]
)
nonelectric = (
no_norway [ f " total { sector } " ] - no_norway [ f " electricity { sector } " ]
)
2024-07-18 13:39:52 +00:00
nonelectric = nonelectric . copy ( ) . replace ( 0 , np . nan )
2023-03-09 10:04:41 +00:00
fraction = nonelectric_use . div ( nonelectric ) . mean ( )
2024-03-06 19:42:45 +00:00
df . loc [ " NO " , f " total { sector } { use } " ] = (
total_heating * fraction
) . values
2023-03-09 10:04:41 +00:00
df . loc [ " NO " , f " electricity { sector } { use } " ] = (
total_heating * fraction * elec_fraction
2024-03-06 19:42:45 +00:00
) . values
2021-07-01 18:09:04 +00:00
# Missing aviation
2024-03-06 19:42:45 +00:00
slicer = idx [ c , y , : , : , " Domestic aviation " ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = [ 0 , 1 ] ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , " total domestic aviation " ] = fill_values
2024-03-06 19:42:45 +00:00
slicer = idx [ c , y , : , : , " International aviation " ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = [ 0 , 1 ] ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , " total international aviation " ] = fill_values
# missing domestic navigation
2024-03-06 19:42:45 +00:00
slicer = idx [ c , y , : , : , " Domestic Navigation " ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = [ 0 , 1 ] ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , " total domestic navigation " ] = fill_values
# split road traffic for non-IDEES
missing = df . index [ df [ " total passenger cars " ] . isna ( ) ]
2024-03-06 15:42:33 +00:00
for fuel in [ " total " , " electricity " ] :
2021-07-01 18:09:04 +00:00
selection = [
f " { fuel } passenger cars " ,
f " { fuel } other road passenger " ,
f " { fuel } light duty road freight " ,
]
if fuel == " total " :
selection . extend ( [ f " { fuel } two-wheel " , f " { fuel } heavy duty road freight " ] )
road = df [ selection ] . sum ( )
road_fraction = road / road . sum ( )
fill_values = cartesian ( df . loc [ missing , f " { fuel } road " ] , road_fraction )
df . loc [ missing , road_fraction . index ] = fill_values
# split rail traffic for non-IDEES
missing = df . index [ df [ " total rail passenger " ] . isna ( ) ]
for fuel in [ " total " , " electricity " ] :
selection = [ f " { fuel } rail passenger " , f " { fuel } rail freight " ]
rail = df [ selection ] . sum ( )
rail_fraction = rail / rail . sum ( )
fill_values = cartesian ( df . loc [ missing , f " { fuel } rail " ] , rail_fraction )
df . loc [ missing , rail_fraction . index ] = fill_values
# split aviation traffic for non-IDEES
missing = df . index [ df [ " total domestic aviation passenger " ] . isna ( ) ]
for destination in [ " domestic " , " international " ] :
selection = [
f " total { destination } aviation passenger " ,
f " total { destination } aviation freight " ,
]
aviation = df [ selection ] . sum ( )
aviation_fraction = aviation / aviation . sum ( )
fill_values = cartesian (
df . loc [ missing , f " total { destination } aviation " ] , aviation_fraction
)
df . loc [ missing , aviation_fraction . index ] = fill_values
for purpose in [ " passenger " , " freight " ] :
attrs = [
f " total domestic aviation { purpose } " ,
f " total international aviation { purpose } " ,
]
2021-09-29 12:36:56 +00:00
df . loc [ missing , f " total aviation { purpose } " ] = df . loc [ missing , attrs ] . sum (
axis = 1
)
2021-07-01 18:09:04 +00:00
if " BA " in df . index :
2024-03-06 15:42:33 +00:00
# fill missing data for BA (services and road energy data)
# proportional to RS with ratio of total residential demand
2024-03-06 19:42:45 +00:00
mean_BA = df . loc [ " BA " ] . loc [ 2014 : 2021 , " total residential " ] . mean ( )
mean_RS = df . loc [ " RS " ] . loc [ 2014 : 2021 , " total residential " ] . mean ( )
ratio = mean_BA / mean_RS
2024-07-18 13:43:38 +00:00
df . loc [ " BA " ] = (
df . loc [ " BA " ] . replace ( 0.0 , np . nan ) . infer_objects ( copy = False ) . values
)
2024-03-06 19:42:45 +00:00
df . loc [ " BA " ] = df . loc [ " BA " ] . combine_first ( ratio * df . loc [ " RS " ] ) . values
2021-07-01 18:09:04 +00:00
2024-01-15 16:51:08 +00:00
return df
2024-06-17 09:38:23 +00:00
def build_district_heat_share ( countries : List [ str ] , idees : pd . DataFrame ) - > pd . Series :
"""
Calculate the share of district heating for each country .
Parameters
- - - - - - - - - -
countries : List [ str ]
List of country codes for which to calculate district heating share .
idees : pd . DataFrame
IDEES energy data dataframe .
Returns
- - - - - - -
pd . Series
Series with the district heating share for each country .
Notes
- - - - -
- The function calculates the district heating share as the sum of residential and services derived heat , divided by the sum of residential and services thermal uses .
- The district heating share is then reindexed to match the provided list of countries .
- Missing district heating shares are filled from ` data / district_heat_share . csv ` .
- The function makes a conservative assumption and takes the minimum district heating share from both the IDEES data and ` data / district_heat_share . csv ` .
"""
2024-01-15 16:51:08 +00:00
# district heating share
district_heat = idees [ [ " derived heat residential " , " derived heat services " ] ] . sum (
axis = 1
)
2024-07-19 13:43:14 +00:00
total_heat = (
idees [ [ " thermal uses residential " , " thermal uses services " ] ]
. sum ( axis = 1 )
. replace ( 0 , np . nan )
)
2024-01-15 16:51:08 +00:00
district_heat_share = district_heat / total_heat
2024-03-06 19:42:45 +00:00
district_heat_share = district_heat_share . reindex ( countries , level = " country " )
2024-01-15 17:55:09 +00:00
2021-09-29 12:36:56 +00:00
# Missing district heating share
2024-03-06 15:42:33 +00:00
dh_share = (
pd . read_csv ( snakemake . input . district_heat_share , index_col = 0 , usecols = [ 0 , 1 ] )
. div ( 100 )
. squeeze ( )
2024-01-22 08:18:26 +00:00
)
2021-09-29 12:36:56 +00:00
# make conservative assumption and take minimum from both data sets
2024-07-19 13:15:35 +00:00
new_index = pd . MultiIndex . from_product (
[ dh_share . index , district_heat_share . index . get_level_values ( 1 ) . unique ( ) ]
)
2024-03-06 15:42:33 +00:00
district_heat_share = pd . concat (
2024-07-19 12:41:36 +00:00
[ district_heat_share , dh_share . reindex ( new_index , level = 0 ) ] , axis = 1
2024-03-06 15:42:33 +00:00
) . min ( axis = 1 )
2024-07-19 13:43:14 +00:00
2024-07-19 13:34:44 +00:00
district_heat_share = district_heat_share . reindex ( countries , level = 0 )
2021-09-29 12:36:56 +00:00
2024-01-15 16:51:08 +00:00
district_heat_share . name = " district heat share "
2024-03-13 12:42:29 +00:00
# restrict to available years
district_heat_share = (
district_heat_share . unstack ( ) . dropna ( how = " all " , axis = 1 ) . ffill ( axis = 1 )
)
2024-01-15 16:51:08 +00:00
return district_heat_share
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
def build_eea_co2 (
input_co2 : str , year : int = 1990 , emissions_scope : str = " CO2 "
) - > pd . DataFrame :
"""
Calculate CO2 emissions for a given year based on EEA data in Mt .
Parameters
- - - - - - - - - -
input_co2 : str
Path to the input CSV file with CO2 data .
year : int , optional
Year for which to calculate emissions , by default 1990.
emissions_scope : str , optional
Scope of the emissions to consider , by default " CO2 " .
Returns
- - - - - - -
pd . DataFrame
DataFrame with CO2 emissions for the given year .
Notes
- - - - -
- The function reads the ` input_co2 ` data and for a specific ` year ` and ` emission scope `
- It calculates " industrial non-elec " and " agriculture " emissions from that data
- It drops unneeded columns and converts the emissions to Mt .
References
- - - - - - - - -
- ` EEA CO2 data < https : / / www . eea . europa . eu / data - and - maps / data / national - emissions - reported - to - the - unfccc - and - to - the - eu - greenhouse - gas - monitoring - mechanism - 16 > ` _ ( downloaded 201228 , modified by EEA last on 201221 )
"""
2023-02-21 21:36:49 +00:00
df = pd . read_csv ( input_co2 , encoding = " latin-1 " , low_memory = False )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df . replace ( dict ( Year = " 1985-1987 " ) , 1986 , inplace = True )
df . Year = df . Year . astype ( int )
index_col = [ " Country_code " , " Pollutant_name " , " Year " , " Sector_name " ]
df = df . set_index ( index_col ) . sort_index ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
cts = [ " CH " , " EUA " , " NO " ] + eu28_eea
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
slicer = idx [ cts , emissions_scope , year , to_ipcc . values ( ) ]
emissions = (
df . loc [ slicer , " emissions " ]
. unstack ( " Sector_name " )
. rename ( columns = reverse ( to_ipcc ) )
. droplevel ( [ 1 , 2 ] )
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
emissions . rename ( index = { " EUA " : " EU28 " , " UK " : " GB " } , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
to_subtract = [
" electricity " ,
" services non-elec " ,
" residential non-elec " ,
" road non-elec " ,
" rail non-elec " ,
" domestic aviation " ,
" international aviation " ,
" domestic navigation " ,
" international navigation " ,
2021-08-18 12:17:21 +00:00
" agriculture, forestry and fishing " ,
2021-07-01 18:09:04 +00:00
]
emissions [ " industrial non-elec " ] = emissions [ " total energy " ] - emissions [
to_subtract
] . sum ( axis = 1 )
2019-04-16 14:03:51 +00:00
2021-07-06 16:32:35 +00:00
emissions [ " agriculture " ] + = emissions [ " agriculture, forestry and fishing " ]
to_drop = [
" total energy " ,
" total wL " ,
" total woL " ,
" agriculture, forestry and fishing " ,
]
2021-07-01 18:09:04 +00:00
emissions . drop ( columns = to_drop , inplace = True )
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
# convert from Gt to Mt
2021-07-01 18:09:04 +00:00
return emissions / 1e3
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
def build_eurostat_co2 ( eurostat : pd . DataFrame , year : int = 1990 ) - > pd . Series :
"""
Calculate CO2 emissions for a given year based on Eurostat fuel consumption
data and fuel - specific emissions .
Parameters
- - - - - - - - - -
eurostat : pd . DataFrame
DataFrame with Eurostat data .
year : int , optional
Year for which to calculate emissions , by default 1990.
Returns
- - - - - - -
pd . Series
Series with CO2 emissions for the given year .
Notes
- - - - -
- The function hard - sets fuel - specific emissions :
- solid fuels : 0.36 tCO2_equi / MW_th ( approximates coal )
- oil : 0.285 tCO2_equi / MW_th ( average of distillate and residue )
- natural gas : 0.2 tCO2_equi / MW_th
- It then multiplies the Eurostat fuel consumption data for ` year ` by the specific emissions and sums the result .
References
- - - - - - - - - -
- Oil values from ` EIA < https : / / www . eia . gov / tools / faqs / faq . cfm ? id = 74 & t = 11 > ` _
- Distillate oil ( No . 2 ) 0.276
- Residual oil ( No . 6 ) 0.298
- ` EIA Electricity Annual < https : / / www . eia . gov / electricity / annual / html / epa_a_03 . html > ` _
"""
2024-06-17 09:44:03 +00:00
2024-03-06 19:42:45 +00:00
eurostat_year = eurostat . xs ( year , level = " year " )
2019-04-16 14:03:51 +00:00
2024-03-06 15:42:33 +00:00
specific_emissions = pd . Series ( index = eurostat . columns , dtype = float )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# emissions in tCO2_equiv per MWh_th
2024-03-06 15:42:33 +00:00
specific_emissions [ " Solid fuels " ] = 0.36 # Approximates coal
specific_emissions [ " Oil (total) " ] = 0.285 # Average of distillate and residue
specific_emissions [ " Gas " ] = 0.2 # For natural gas
2019-04-16 14:03:51 +00:00
2024-03-06 19:42:45 +00:00
return eurostat_year . multiply ( specific_emissions ) . sum ( axis = 1 )
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
def build_co2_totals (
countries : List [ str ] , eea_co2 : pd . DataFrame , eurostat_co2 : pd . DataFrame
) - > pd . DataFrame :
"""
Combine CO2 emissions data from EEA and Eurostat for a list of countries .
Parameters
- - - - - - - - - -
countries : List [ str ]
List of country codes for which CO2 totals need to be built .
eea_co2 : pd . DataFrame
DataFrame with EEA CO2 emissions data .
eurostat_co2 : pd . DataFrame
DataFrame with Eurostat CO2 emissions data .
Returns
- - - - - - -
pd . DataFrame
Combined CO2 emissions data for the given countries .
Notes
- - - - -
- The function combines the CO2 emissions from EEA and Eurostat into a single DataFrame for the given countries .
"""
2021-07-01 18:09:04 +00:00
co2 = eea_co2 . reindex ( countries )
2019-04-16 14:03:51 +00:00
2023-03-09 10:04:41 +00:00
for ct in pd . Index ( countries ) . intersection ( [ " BA " , " RS " , " AL " , " ME " , " MK " ] ) :
2021-07-01 18:09:04 +00:00
mappings = {
2024-02-27 11:04:07 +00:00
" electricity " : ( ct , " + " , " Electricity & heat generation " , np . nan ) ,
" residential non-elec " : ( ct , " + " , " + " , " Residential " ) ,
" services non-elec " : ( ct , " + " , " + " , " Services " ) ,
" road non-elec " : ( ct , " + " , " + " , " Road " ) ,
" rail non-elec " : ( ct , " + " , " + " , " Rail " ) ,
" domestic navigation " : ( ct , " + " , " + " , " Domestic Navigation " ) ,
" international navigation " : ( ct , " - " , " Bunkers " ) ,
" domestic aviation " : ( ct , " + " , " + " , " Domestic aviation " ) ,
" international aviation " : ( ct , " - " , " International aviation " ) ,
2021-07-01 18:09:04 +00:00
# does not include industrial process emissions or fuel processing/refining
2024-02-27 11:04:07 +00:00
" industrial non-elec " : ( ct , " + " , " Industry sector " ) ,
2021-07-01 18:09:04 +00:00
# does not include non-energy emissions
2022-07-23 15:05:56 +00:00
" agriculture " : ( eurostat_co2 . index . get_level_values ( 0 ) == ct )
& eurostat_co2 . index . isin ( [ " Agriculture & forestry " , " Fishing " ] , level = 3 ) ,
2021-07-01 18:09:04 +00:00
}
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for i , mi in mappings . items ( ) :
co2 . at [ ct , i ] = eurostat_co2 . loc [ mi ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return co2
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
def build_transport_data (
countries : List [ str ] , population : pd . DataFrame , idees : pd . DataFrame
) - > pd . DataFrame :
"""
Build transport data for a set of countries based on IDEES data .
Parameters
- - - - - - - - - -
countries : List [ str ]
List of country codes .
population : pd . DataFrame
DataFrame with population data .
idees : pd . DataFrame
DataFrame with IDEES data .
Returns
- - - - - - -
pd . DataFrame
DataFrame with transport data .
Notes
- - - - -
- The function first collects the number of passenger cars .
- For Switzerland , it reads the data from ` data / gr - e - 11.03 .02 .01 .01 - cc . csv ` .
- It fills missing data on the number of cars and fuel efficiency with average data .
References
- - - - - - - - - -
- Swiss transport data : ` BFS < https : / / www . bfs . admin . ch / bfs / en / home / statistics / mobility - transport / transport - infrastructure - vehicles / vehicles / road - vehicles - stock - level - motorisation . html > ` _
"""
2019-04-16 14:03:51 +00:00
2024-06-17 09:38:23 +00:00
# first collect number of cars
2024-03-13 12:42:03 +00:00
transport_data = pd . DataFrame ( idees [ " passenger cars " ] )
2019-04-16 14:03:51 +00:00
2024-03-14 15:48:32 +00:00
countries_without_ch = set ( countries ) - { " CH " }
new_index = pd . MultiIndex . from_product (
[ countries_without_ch , transport_data . index . levels [ 1 ] ] ,
names = [ " country " , " year " ] ,
)
transport_data = transport_data . reindex ( index = new_index )
2023-03-09 10:04:41 +00:00
if " CH " in countries :
2024-03-13 12:42:03 +00:00
fn = snakemake . input . swiss_transport
2024-03-14 15:48:32 +00:00
swiss_cars = pd . read_csv ( fn , index_col = 0 ) . loc [ 2000 : 2015 , [ " passenger cars " ] ]
2024-03-13 12:42:03 +00:00
swiss_cars . index = pd . MultiIndex . from_product (
[ [ " CH " ] , swiss_cars . index ] , names = [ " country " , " year " ]
)
transport_data = pd . concat ( [ transport_data , swiss_cars ] ) . sort_index ( )
transport_data . rename ( columns = { " passenger cars " : " number cars " } , inplace = True )
2024-07-19 08:21:07 +00:00
2024-07-19 08:20:19 +00:00
# clean up dataframe
years = np . arange ( 2000 , 2022 )
2024-07-19 08:21:07 +00:00
transport_data = transport_data [
transport_data . index . get_level_values ( 1 ) . isin ( years )
]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = transport_data . index [ transport_data [ " number cars " ] . isna ( ) ]
2023-03-09 10:04:41 +00:00
if not missing . empty :
logger . info (
f " Missing data on cars from: \n { list ( missing ) } \n Filling gaps with averaged data. "
)
2019-04-16 14:03:51 +00:00
2023-03-09 10:04:41 +00:00
cars_pp = transport_data [ " number cars " ] / population
2024-03-14 15:48:32 +00:00
fill_values = {
year : cars_pp . mean ( ) * population for year in transport_data . index . levels [ 1 ]
}
fill_values = pd . DataFrame ( fill_values ) . stack ( )
fill_values = pd . DataFrame ( fill_values , columns = [ " number cars " ] )
fill_values . index . names = [ " country " , " year " ]
fill_values = fill_values . reindex ( transport_data . index )
transport_data = transport_data . combine_first ( fill_values )
2019-04-16 14:03:51 +00:00
2024-05-25 15:16:47 +00:00
# collect average fuel efficiency in MWh/100km, taking passengar car efficiency in TWh/100km
transport_data [ " average fuel efficiency " ] = idees [ " passenger car efficiency " ] * 1e6
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = transport_data . index [ transport_data [ " average fuel efficiency " ] . isna ( ) ]
2023-03-09 10:04:41 +00:00
if not missing . empty :
logger . info (
2024-03-14 13:31:18 +00:00
f " Missing data on fuel efficiency from: \n { list ( missing ) } \n Filling gaps with averaged data. "
2023-03-09 10:04:41 +00:00
)
2019-04-16 14:03:51 +00:00
2023-03-09 10:04:41 +00:00
fill_values = transport_data [ " average fuel efficiency " ] . mean ( )
transport_data . loc [ missing , " average fuel efficiency " ] = fill_values
2019-04-16 14:03:51 +00:00
return transport_data
2024-03-05 17:43:24 +00:00
def rescale_idees_from_eurostat (
2024-06-17 09:38:23 +00:00
idees_countries : List [ str ] , energy : pd . DataFrame , eurostat : pd . DataFrame
) - > pd . DataFrame :
2024-02-23 12:56:43 +00:00
"""
2024-07-19 12:36:46 +00:00
Takes JRC IDEES data from 2021 and rescales it by the ratio of the Eurostat
data and the 2021 Eurostat data .
2024-06-17 09:38:23 +00:00
Missing data : [ ' passenger car efficiency ' , ' passenger cars ' ]
Parameters
- - - - - - - - - -
idees_countries : List [ str ]
List of IDEES country codes .
energy : pd . DataFrame
DataFrame with JRC IDEES data .
eurostat : pd . DataFrame
DataFrame with Eurostat data .
Returns
- - - - - - -
pd . DataFrame
DataFrame with rescaled IDEES data .
Notes
- - - - -
- The function first reads in the Eurostat data for 2015 and calculates the ratio of that data with other Eurostat data .
- This ratio is mapped to the IDEES data .
References
- - - - - - - - - -
- JRC IDEES data : ` JRC IDEES < https : / / ec . europa . eu / jrc / en / publication / eur - scientific - and - technical - research - reports / jrc - idees > ` _
- Eurostat data : ` Eurostat < https : / / ec . europa . eu / eurostat / data / database > ` _
2024-02-23 12:56:43 +00:00
"""
2024-06-17 09:38:23 +00:00
2024-03-05 17:43:24 +00:00
main_cols = [ " Total all products " , " Electricity " ]
2024-02-23 12:53:28 +00:00
# read in the eurostat data for 2015
2024-07-19 12:36:46 +00:00
eurostat_2015 = eurostat . xs ( 2021 , level = " year " ) [ main_cols ]
2024-02-23 12:53:28 +00:00
# calculate the ratio of the two data sets
2024-03-06 19:42:45 +00:00
ratio = eurostat [ main_cols ] / eurostat_2015
ratio = ratio . droplevel ( [ 2 , 5 ] )
2024-03-05 17:43:24 +00:00
cols_rename = { " Total all products " : " total " , " Electricity " : " ele " }
index_rename = { v : k for k , v in idees_rename . items ( ) }
ratio . rename ( columns = cols_rename , index = index_rename , inplace = True )
2024-02-27 11:04:07 +00:00
mappings = {
" Residential " : {
" total " : [
" total residential space " ,
" total residential water " ,
" total residential cooking " ,
" total residential " ,
" derived heat residential " ,
" thermal uses residential " ,
] ,
" elec " : [
" electricity residential space " ,
" electricity residential water " ,
" electricity residential cooking " ,
" electricity residential " ,
2024-02-27 12:14:41 +00:00
] ,
2024-02-27 11:04:07 +00:00
} ,
" Services " : {
" total " : [
" total services space " ,
" total services water " ,
" total services cooking " ,
" total services " ,
" derived heat services " ,
" thermal uses services " ,
] ,
" elec " : [
" electricity services space " ,
" electricity services water " ,
" electricity services cooking " ,
" electricity services " ,
2024-02-27 12:14:41 +00:00
] ,
2024-02-27 11:04:07 +00:00
} ,
" Agriculture & forestry " : {
" total " : [
" total agriculture heat " ,
" total agriculture machinery " ,
" total agriculture " ,
] ,
" elec " : [
" total agriculture electricity " ,
2024-02-27 12:14:41 +00:00
] ,
2024-02-27 11:04:07 +00:00
} ,
" Road " : {
" total " : [
" total road " ,
" total passenger cars " ,
" total other road passenger " ,
" total light duty road freight " ,
2024-05-20 13:37:54 +00:00
" total heavy duty road freight " ,
2024-02-27 11:04:07 +00:00
] ,
" elec " : [
" electricity road " ,
" electricity passenger cars " ,
" electricity other road passenger " ,
" electricity light duty road freight " ,
2024-02-27 12:14:41 +00:00
] ,
2024-02-27 11:04:07 +00:00
} ,
" Rail " : {
" total " : [
" total rail " ,
" total rail passenger " ,
" total rail freight " ,
] ,
" elec " : [
" electricity rail " ,
" electricity rail passenger " ,
" electricity rail freight " ,
2024-02-27 12:14:41 +00:00
] ,
2024-02-27 11:04:07 +00:00
} ,
}
2024-02-23 12:53:28 +00:00
avia_inter = [
2024-02-27 11:04:07 +00:00
" total aviation passenger " ,
" total aviation freight " ,
" total international aviation passenger " ,
" total international aviation freight " ,
" total international aviation " ,
2024-02-23 12:53:28 +00:00
]
avia_domestic = [
" total domestic aviation passenger " ,
" total domestic aviation freight " ,
" total domestic aviation " ,
]
navigation = [
" total domestic navigation " ,
]
2024-03-25 14:03:23 +00:00
# international navigation is already read in from the eurostat data directly
2024-02-23 12:53:28 +00:00
for country in idees_countries :
2024-03-13 12:40:23 +00:00
filling_years = [ ( 2015 , slice ( 2016 , 2021 ) ) , ( 2000 , slice ( 1990 , 1999 ) ) ]
for source_year , target_years in filling_years :
2024-03-06 19:42:45 +00:00
2024-03-13 12:40:23 +00:00
slicer_source = idx [ country , source_year , : , : ]
slicer_target = idx [ country , target_years , : , : ]
2024-02-23 12:53:28 +00:00
2024-03-13 12:40:23 +00:00
for sector , mapping in mappings . items ( ) :
sector_ratio = ratio . loc [
( country , slice ( None ) , slice ( None ) , sector )
] . droplevel ( " lvl2 " )
energy . loc [ slicer_target , mapping [ " total " ] ] = cartesian (
sector_ratio . loc [ target_years , " total " ] ,
energy . loc [ slicer_source , mapping [ " total " ] ] . squeeze ( axis = 0 ) ,
) . values
energy . loc [ slicer_target , mapping [ " elec " ] ] = cartesian (
sector_ratio . loc [ target_years , " ele " ] ,
energy . loc [ slicer_source , mapping [ " elec " ] ] . squeeze ( axis = 0 ) ,
) . values
2024-02-27 12:14:41 +00:00
2024-03-13 12:40:23 +00:00
level_drops = [ " country " , " lvl2 " , " lvl3 " ]
2024-02-23 12:53:28 +00:00
2024-03-13 12:40:23 +00:00
slicer = idx [ country , : , : , " Domestic aviation " ]
avi_d = ratio . loc [ slicer , " total " ] . droplevel ( level_drops )
2024-03-06 19:42:45 +00:00
2024-03-13 12:40:23 +00:00
slicer = idx [ country , : , : , " International aviation " ]
avi_i = ratio . loc [ slicer , " total " ] . droplevel ( level_drops )
2024-03-06 19:42:45 +00:00
2024-03-13 12:40:23 +00:00
slicer = idx [ country , : , : , " Domestic Navigation " ]
nav = ratio . loc [ slicer , " total " ] . droplevel ( level_drops )
2024-03-06 19:42:45 +00:00
2024-03-13 12:40:23 +00:00
energy . loc [ slicer_target , avia_inter ] = cartesian (
avi_i . loc [ target_years ] ,
energy . loc [ slicer_source , avia_inter ] . squeeze ( axis = 0 ) ,
) . values
energy . loc [ slicer_target , avia_domestic ] = cartesian (
avi_d . loc [ target_years ] ,
energy . loc [ slicer_source , avia_domestic ] . squeeze ( axis = 0 ) ,
) . values
energy . loc [ slicer_target , navigation ] = cartesian (
nav . loc [ target_years ] ,
energy . loc [ slicer_source , navigation ] . squeeze ( axis = 0 ) ,
) . values
2024-02-23 12:53:28 +00:00
2024-05-20 13:37:54 +00:00
# set the total of agriculture/road to the sum of all agriculture/road categories (corresponding to the IDEES data)
2024-05-25 16:02:56 +00:00
rows = idx [ country , : ]
cols = [
2024-03-25 14:03:23 +00:00
" total agriculture electricity " ,
" total agriculture heat " ,
" total agriculture machinery " ,
2024-05-20 13:37:54 +00:00
]
2024-05-25 16:02:56 +00:00
energy . loc [ rows , " total agriculture " ] = energy . loc [ rows , cols ] . sum ( axis = 1 )
2024-03-25 14:03:23 +00:00
2024-05-25 16:02:56 +00:00
cols = [
2024-05-20 13:37:54 +00:00
" total passenger cars " ,
" total other road passenger " ,
" total light duty road freight " ,
" total heavy duty road freight " ,
2024-03-25 14:05:17 +00:00
]
2024-05-25 16:02:56 +00:00
energy . loc [ rows , " total road " ] = energy . loc [ rows , cols ] . sum ( axis = 1 )
2024-03-25 14:03:23 +00:00
2024-02-23 12:53:28 +00:00
return energy
2019-04-16 14:03:51 +00:00
2024-02-23 12:56:43 +00:00
2024-06-17 09:38:23 +00:00
def update_residential_from_eurostat ( energy : pd . DataFrame ) - > pd . DataFrame :
2024-04-14 11:28:50 +00:00
"""
2024-06-17 09:38:23 +00:00
Updates energy balances for residential from disaggregated data from
Eurostat by mutating input data DataFrame .
Parameters
- - - - - - - - - -
energy : pd . DataFrame
DataFrame with energy data .
Returns
- - - - - - -
pd . DataFrame
DataFrame with updated energy balances .
Notes
- - - - -
- The function first reads in the Eurostat data for households and maps the energy types to the corresponding Eurostat codes .
- For each energy type , it selects the corresponding data , converts units , and drops unnecessary data .
2024-04-14 11:28:50 +00:00
"""
2024-05-21 18:16:57 +00:00
eurostat_households = pd . read_csv ( snakemake . input . eurostat_households )
2024-04-14 11:28:50 +00:00
# Column mapping for energy type
nrg_type = {
2024-05-21 18:16:57 +00:00
" total residential " : ( " FC_OTH_HH_E " , " TOTAL " ) ,
" total residential space " : ( " FC_OTH_HH_E_SH " , " TOTAL " ) ,
" total residential water " : ( " FC_OTH_HH_E_WH " , " TOTAL " ) ,
" total residential cooking " : ( " FC_OTH_HH_E_CK " , " TOTAL " ) ,
" electricity residential " : ( " FC_OTH_HH_E " , " E7000 " ) ,
" electricity residential space " : ( " FC_OTH_HH_E_SH " , " E7000 " ) ,
" electricity residential water " : ( " FC_OTH_HH_E_WH " , " E7000 " ) ,
" electricity residential cooking " : ( " FC_OTH_HH_E_CK " , " E7000 " ) ,
2024-04-14 11:28:50 +00:00
}
2024-04-14 11:33:16 +00:00
2024-05-21 18:16:57 +00:00
for nrg_name , ( code , siec ) in nrg_type . items ( ) :
2024-04-14 11:28:50 +00:00
2024-05-21 18:16:57 +00:00
# Select energy balance type, rename columns and countries to match IDEES data,
# convert TJ to TWh, and drop XK data already since included in RS data
col_to_rename = { " geo " : " country " , " TIME_PERIOD " : " year " , " OBS_VALUE " : nrg_name }
idx_to_rename = { v : k for k , v in idees_rename . items ( ) }
drop_geo = [ " EU27_2020 " , " EA20 " , " XK " ]
nrg_data = eurostat_households . query (
" nrg_bal == @code and siec == @siec and geo not in @drop_geo and OBS_VALUE > 0 "
) . copy ( )
nrg_data . rename ( columns = col_to_rename , inplace = True )
nrg_data = nrg_data . set_index ( [ " country " , " year " ] ) [ nrg_name ] / 3.6e3
nrg_data . rename ( index = idx_to_rename , inplace = True )
# update energy balance from household-specific eurostat data
idx = nrg_data . index . intersection ( energy . index )
energy . loc [ idx , nrg_name ] = nrg_data [ idx ]
2024-04-14 11:33:16 +00:00
2024-04-18 15:31:42 +00:00
logger . info (
" Updated energy balances for residential using disaggregate final energy consumption data in Households from Eurostat "
)
2024-04-14 11:28:50 +00:00
2024-07-18 13:43:38 +00:00
# %%
2019-04-16 14:03:51 +00:00
if __name__ == " __main__ " :
if " snakemake " not in globals ( ) :
2023-03-06 18:09:45 +00:00
from _helpers import mock_snakemake
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
snakemake = mock_snakemake ( " build_energy_totals " )
2023-03-06 08:27:45 +00:00
2024-02-12 10:53:20 +00:00
configure_logging ( snakemake )
set_scenario_config ( snakemake )
2023-02-23 09:30:32 +00:00
2023-06-15 16:52:25 +00:00
params = snakemake . params . energy
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
nuts3 = gpd . read_file ( snakemake . input . nuts3_shapes ) . set_index ( " index " )
population = nuts3 [ " pop " ] . groupby ( nuts3 . country ) . sum ( )
2019-04-16 14:03:51 +00:00
2023-06-15 16:52:25 +00:00
countries = snakemake . params . countries
2024-07-18 13:39:52 +00:00
idees_countries = pd . Index ( countries ) . intersection ( eu27 )
2019-04-16 14:03:51 +00:00
2022-08-01 13:21:11 +00:00
input_eurostat = snakemake . input . eurostat
2024-03-14 18:24:37 +00:00
eurostat = build_eurostat (
input_eurostat ,
countries ,
nprocesses = snakemake . threads ,
disable_progressbar = snakemake . config [ " run " ] . get ( " disable_progressbar " , False ) ,
)
2024-03-06 19:42:45 +00:00
swiss = build_swiss ( )
idees = build_idees ( idees_countries )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
energy = build_energy_totals ( countries , eurostat , swiss , idees )
2024-02-23 12:56:43 +00:00
2024-05-21 18:16:57 +00:00
update_residential_from_eurostat ( energy )
2024-04-14 11:28:50 +00:00
2021-07-01 18:09:04 +00:00
energy . to_csv ( snakemake . output . energy_name )
2019-04-16 14:03:51 +00:00
2024-02-23 12:53:28 +00:00
# use rescaled idees data to calculate district heat share
district_heat_share = build_district_heat_share (
countries , energy . loc [ idees_countries ]
)
2024-01-15 16:51:08 +00:00
district_heat_share . to_csv ( snakemake . output . district_heat_share )
2023-05-17 17:25:45 +00:00
base_year_emissions = params [ " base_emissions_year " ]
2023-06-15 16:52:25 +00:00
emissions_scope = snakemake . params . energy [ " emissions " ]
2022-08-01 13:21:11 +00:00
eea_co2 = build_eea_co2 ( snakemake . input . co2 , base_year_emissions , emissions_scope )
2024-03-06 19:42:45 +00:00
eurostat_co2 = build_eurostat_co2 ( eurostat , base_year_emissions )
2021-07-01 18:09:04 +00:00
co2 = build_co2_totals ( countries , eea_co2 , eurostat_co2 )
2020-12-29 10:31:00 +00:00
co2 . to_csv ( snakemake . output . co2_name )
2021-07-01 18:09:04 +00:00
2024-03-06 15:42:33 +00:00
transport = build_transport_data ( countries , population , idees )
2021-07-01 18:09:04 +00:00
transport . to_csv ( snakemake . output . transport_name )