2021-07-01 18:09:04 +00:00
from functools import partial
from tqdm import tqdm
import multiprocessing as mp
2019-04-16 14:03:51 +00:00
import pandas as pd
import geopandas as gpd
2021-07-01 18:09:04 +00:00
import numpy as np
2019-04-16 14:03:51 +00:00
idx = pd . IndexSlice
2021-07-01 18:09:04 +00:00
def cartesian ( s1 , s2 ) :
""" Cartesian product of two pd.Series """
return pd . DataFrame ( np . outer ( s1 , s2 ) , index = s1 . index , columns = s2 . index )
def reverse ( dictionary ) :
""" reverses a keys and values of a dictionary """
return { v : k for k , v in dictionary . items ( ) }
non_EU = [ " NO " , " CH " , " ME " , " MK " , " RS " , " BA " , " AL " ]
idees_rename = { " GR " : " EL " , " GB " : " UK " }
eu28 = [
" FR " ,
" DE " ,
" GB " ,
" IT " ,
" ES " ,
" PL " ,
" SE " ,
" NL " ,
" BE " ,
" FI " ,
" CZ " ,
" DK " ,
" PT " ,
" RO " ,
" AT " ,
" BG " ,
" EE " ,
" GR " ,
" LV " ,
" HU " ,
" IE " ,
" SK " ,
" LT " ,
" HR " ,
" LU " ,
" SI " ,
] + [ " CY " , " MT " ]
eu28_eea = eu28 . copy ( )
2019-04-16 14:03:51 +00:00
eu28_eea . remove ( " GB " )
eu28_eea . append ( " UK " )
2021-07-01 18:09:04 +00:00
to_ipcc = {
" electricity " : " 1.A.1.a - Public Electricity and Heat Production " ,
" residential non-elec " : " 1.A.4.b - Residential " ,
" services non-elec " : " 1.A.4.a - Commercial/Institutional " ,
" rail non-elec " : " 1.A.3.c - Railways " ,
" road non-elec " : " 1.A.3.b - Road Transportation " ,
" domestic navigation " : " 1.A.3.d - Domestic Navigation " ,
" international navigation " : " 1.D.1.b - International Navigation " ,
" domestic aviation " : " 1.A.3.a - Domestic Aviation " ,
" international aviation " : " 1.D.1.a - International Aviation " ,
" total energy " : " 1 - Energy " ,
" industrial processes " : " 2 - Industrial Processes and Product Use " ,
" agriculture " : " 3 - Agriculture " ,
2021-07-06 16:32:35 +00:00
" agriculture, forestry and fishing " : ' 1.A.4.c - Agriculture/Forestry/Fishing ' ,
2021-07-01 18:09:04 +00:00
" LULUCF " : " 4 - Land Use, Land-Use Change and Forestry " ,
" waste management " : " 5 - Waste management " ,
" other " : " 6 - Other Sector " ,
" indirect " : " ind_CO2 - Indirect CO2 " ,
" total wL " : " Total (with LULUCF) " ,
" total woL " : " Total (without LULUCF) " ,
}
2022-07-23 08:37:32 +00:00
def eurostat_per_country ( country ) :
country_fn = idees_rename . get ( country , country )
fn = snakemake . input . eurostat + f " / { country_fn } -Energy-balance-sheets-June-2021-edition.xlsb "
2019-04-16 14:03:51 +00:00
2022-07-23 08:37:32 +00:00
df = pd . read_excel (
fn ,
2021-07-01 18:09:04 +00:00
sheet_name = None ,
2022-07-23 08:37:32 +00:00
skiprows = 4 ,
index_col = list ( range ( 3 ) ) ,
na_values = [ " + " , " - " , " = " , " Z " , " : " ] ,
2021-07-01 18:09:04 +00:00
)
2019-04-16 14:03:51 +00:00
2022-07-23 08:37:32 +00:00
df . pop ( " Cover " )
return pd . concat ( df )
def build_eurostat ( countries , year = None ) :
""" Return multi-index for all countries ' energy data in TWh/a. """
nprocesses = snakemake . threads
tqdm_kwargs = dict ( ascii = False , unit = ' country ' , total = len ( countries ) ,
desc = ' Build from eurostat database ' )
with mp . Pool ( processes = nprocesses ) as pool :
dfs = list ( tqdm ( pool . imap ( eurostat_per_country , countries ) , * * tqdm_kwargs ) )
index_names = [ ' country ' , ' year ' , ' lvl1 ' , ' lvl2 ' , ' lvl3 ' ]
df = pd . concat ( dfs , keys = countries , names = index_names )
2019-04-16 14:03:51 +00:00
2022-07-23 08:37:32 +00:00
df . dropna ( how = ' all ' , axis = 0 , inplace = True )
df . dropna ( how = ' all ' , axis = 1 , inplace = True )
df = df [ df . index . get_level_values ( ' lvl1 ' ) != ' ktoe ' ]
i = df . index . to_frame ( index = False )
i . loc [ i . lvl2 == ' Primary production ' , [ ' lvl1 ' , ' lvl3 ' ] ] = ' Main '
i . loc [ i . lvl2 == ' Gross electricity production ' , ' lvl1 ' ] = " Gross production "
i . ffill ( inplace = True )
df . index = pd . MultiIndex . from_frame ( i )
df . drop ( list ( range ( 1990 , 2020 ) ) , axis = 1 , inplace = True )
df . drop ( " Unnamed: 7 " , axis = 1 , inplace = True )
df . fillna ( 0. , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert ktoe/a to TWh/a
df * = 11.63 / 1e3
2022-07-23 08:37:32 +00:00
if year :
df = df . xs ( str ( year ) , level = ' year ' )
2021-07-01 18:09:04 +00:00
return df
2019-04-16 14:03:51 +00:00
2022-07-23 08:37:32 +00:00
def build_swiss ( year = None ) :
2021-07-01 18:09:04 +00:00
""" Return a pd.Series of Swiss energy data in TWh/a """
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
fn = snakemake . input . swiss
2019-04-16 14:03:51 +00:00
2022-07-23 08:37:32 +00:00
df = pd . read_csv ( fn , index_col = [ 0 , 1 ] ) . loc [ " CH " ]
if year :
df = df [ str ( year ) ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert PJ/a to TWh/a
df / = 3.6
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return df
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
def idees_per_country ( country ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
base_dir = snakemake . input . idees
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals = { }
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_idees = idees_rename . get ( country , country )
2021-07-01 18:09:04 +00:00
fn_residential = f " { base_dir } /JRC-IDEES-2015_Residential_ { ct_idees } .xlsx "
2021-07-06 16:32:35 +00:00
fn_tertiary = f " { base_dir } /JRC-IDEES-2015_Tertiary_ { ct_idees } .xlsx "
2021-07-01 18:09:04 +00:00
fn_transport = f " { base_dir } /JRC-IDEES-2015_Transport_ { ct_idees } .xlsx "
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# residential
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_residential , " RES_hh_fec " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total residential space " ] = df . loc [ " Space heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
rows = [ " Advanced electric heating " , " Conventional electric heating " ]
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity residential space " ] = df . loc [ rows ] . sum ( )
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total residential water " ] = df . loc [ " Water heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 23 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity residential water " ] = df . iloc [ 23 ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total residential cooking " ] = df . loc [ " Cooking " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 30 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity residential cooking " ] = df . iloc [ 30 ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_residential , " RES_summary " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
row = " Energy consumption by fuel - Eurostat structure (ktoe) "
2022-07-23 09:19:37 +00:00
ct_totals [ " total residential " ] = df . loc [ row ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 47 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity residential " ] = df . iloc [ 47 ]
2019-04-16 14:03:51 +00:00
2021-09-29 12:36:56 +00:00
assert df . index [ 46 ] == " Derived heat "
2022-07-23 09:19:37 +00:00
ct_totals [ " derived heat residential " ] = df . iloc [ 46 ]
2021-09-29 12:36:56 +00:00
assert df . index [ 50 ] == ' Thermal uses '
2022-07-23 09:19:37 +00:00
ct_totals [ " thermal uses residential " ] = df . iloc [ 50 ]
2021-09-29 12:36:56 +00:00
2021-07-01 18:09:04 +00:00
# services
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_tertiary , " SER_hh_fec " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total services space " ] = df . loc [ " Space heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
rows = [ " Advanced electric heating " , " Conventional electric heating " ]
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity services space " ] = df . loc [ rows ] . sum ( )
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total services water " ] = df . loc [ " Hot water " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 24 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity services water " ] = df . iloc [ 24 ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total services cooking " ] = df . loc [ " Catering " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 31 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity services cooking " ] = df . iloc [ 31 ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_tertiary , " SER_summary " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
row = " Energy consumption by fuel - Eurostat structure (ktoe) "
2022-07-23 09:19:37 +00:00
ct_totals [ " total services " ] = df . loc [ row ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 50 ] == " Electricity "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity services " ] = df . iloc [ 50 ]
2019-04-16 14:03:51 +00:00
2021-09-29 12:36:56 +00:00
assert df . index [ 49 ] == " Derived heat "
2022-07-23 09:19:37 +00:00
ct_totals [ " derived heat services " ] = df . iloc [ 49 ]
2021-09-29 12:36:56 +00:00
assert df . index [ 53 ] == ' Thermal uses '
2022-07-23 09:19:37 +00:00
ct_totals [ " thermal uses services " ] = df . iloc [ 53 ]
2021-10-04 07:23:23 +00:00
2021-07-06 16:32:35 +00:00
# agriculture, forestry and fishing
start = " Detailed split of energy consumption (ktoe) "
end = " Market shares of energy uses ( % ) "
2021-10-04 07:23:23 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_tertiary , " AGR_fec " , index_col = 0 ) . loc [ start : end ]
2021-07-06 16:32:35 +00:00
rows = [
" Lighting " ,
" Ventilation " ,
" Specific electricity uses " ,
" Pumping devices (electric) "
]
2022-07-23 09:19:37 +00:00
ct_totals [ " total agriculture electricity " ] = df . loc [ rows ] . sum ( )
2021-07-06 16:32:35 +00:00
rows = [ " Specific heat uses " , " Low enthalpy heat " ]
2022-07-23 09:19:37 +00:00
ct_totals [ " total agriculture heat " ] = df . loc [ rows ] . sum ( )
2021-07-06 16:32:35 +00:00
rows = [
" Motor drives " ,
" Farming machine drives (diesel oil incl. biofuels) " ,
" Pumping devices (diesel oil incl. biofuels) " ,
]
2022-07-23 09:19:37 +00:00
ct_totals [ " total agriculture machinery " ] = df . loc [ rows ] . sum ( )
2021-07-06 16:32:35 +00:00
2021-08-18 12:17:21 +00:00
row = " Agriculture, forestry and fishing "
2022-07-23 09:19:37 +00:00
ct_totals [ " total agriculture " ] = df . loc [ row ]
2021-07-06 16:32:35 +00:00
2021-07-01 18:09:04 +00:00
# transport
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_transport , " TrRoad_ene " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total road " ] = df . loc [ " by fuel (EUROSTAT DATA) " ]
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity road " ] = df . loc [ " Electricity " ]
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total two-wheel " ] = df . loc [ " Powered 2-wheelers (Gasoline) " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 19 ] == " Passenger cars "
2022-07-23 09:19:37 +00:00
ct_totals [ " total passenger cars " ] = df . iloc [ 19 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 30 ] == " Battery electric vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity passenger cars " ] = df . iloc [ 30 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 31 ] == " Motor coaches, buses and trolley buses "
2022-07-23 09:19:37 +00:00
ct_totals [ " total other road passenger " ] = df . iloc [ 31 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 39 ] == " Battery electric vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity other road passenger " ] = df . iloc [ 39 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 41 ] == " Light duty vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " total light duty road freight " ] = df . iloc [ 41 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 49 ] == " Battery electric vehicles "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity light duty road freight " ] = df . iloc [ 49 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
row = " Heavy duty vehicles (Diesel oil incl. biofuels) "
2022-07-23 09:19:37 +00:00
ct_totals [ " total heavy duty road freight " ] = df . loc [ row ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 61 ] == " Passenger cars "
2022-07-23 09:19:37 +00:00
ct_totals [ " passenger car efficiency " ] = df . iloc [ 61 ]
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_transport , " TrRail_ene " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " total rail " ] = df . loc [ " by fuel (EUROSTAT DATA) " ]
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity rail " ] = df . loc [ " Electricity " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 15 ] == " Passenger transport "
2022-07-23 09:19:37 +00:00
ct_totals [ " total rail passenger " ] = df . iloc [ 15 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 16 ] == " Metro and tram, urban light rail "
assert df . index [ 19 ] == " Electric "
assert df . index [ 20 ] == " High speed passenger trains "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity rail passenger " ] = df . iloc [ [ 16 , 19 , 20 ] ] . sum ( )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 21 ] == " Freight transport "
2022-07-23 09:19:37 +00:00
ct_totals [ " total rail freight " ] = df . iloc [ 21 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 23 ] == " Electric "
2022-07-23 09:19:37 +00:00
ct_totals [ " electricity rail freight " ] = df . iloc [ 23 ]
2019-07-18 13:38:37 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_transport , " TrAvia_ene " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 6 ] == " Passenger transport "
2022-07-23 09:19:37 +00:00
ct_totals [ " total aviation passenger " ] = df . iloc [ 6 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 10 ] == " Freight transport "
2022-07-23 09:19:37 +00:00
ct_totals [ " total aviation freight " ] = df . iloc [ 10 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 7 ] == " Domestic "
2022-07-23 09:19:37 +00:00
ct_totals [ " total domestic aviation passenger " ] = df . iloc [ 7 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 8 ] == " International - Intra-EU "
assert df . index [ 9 ] == " International - Extra-EU "
2022-07-23 09:19:37 +00:00
ct_totals [ " total international aviation passenger " ] = df . iloc [ [ 8 , 9 ] ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 11 ] == " Domestic and International - Intra-EU "
2022-07-23 09:19:37 +00:00
ct_totals [ " total domestic aviation freight " ] = df . iloc [ 11 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 12 ] == " International - Extra-EU "
2022-07-23 09:19:37 +00:00
ct_totals [ " total international aviation freight " ] = df . iloc [ 12 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total domestic aviation " ] = ct_totals [ " total domestic aviation freight " ] \
2022-07-23 09:19:37 +00:00
+ ct_totals [ " total domestic aviation passenger " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total international aviation " ] = ct_totals [ " total international aviation freight " ] \
2022-07-23 09:19:37 +00:00
+ ct_totals [ " total international aviation passenger " ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_transport , " TrNavi_ene " , index_col = 0 )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# coastal and inland
2022-07-23 09:19:37 +00:00
ct_totals [ " total domestic navigation " ] = df . loc [ " by fuel (EUROSTAT DATA) " ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
df = pd . read_excel ( fn_transport , " TrRoad_act " , index_col = 0 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 85 ] == " Passenger cars "
2022-07-23 09:19:37 +00:00
ct_totals [ " passenger cars " ] = df . iloc [ 85 ]
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
return pd . DataFrame ( ct_totals )
2019-04-16 14:03:51 +00:00
2022-07-23 09:19:37 +00:00
def build_idees ( countries , year = None ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
nprocesses = snakemake . threads
tqdm_kwargs = dict ( ascii = False , unit = ' country ' , total = len ( countries ) ,
desc = ' Build from IDEES database ' )
with mp . Pool ( processes = nprocesses ) as pool :
2022-07-23 09:19:37 +00:00
dfs = list ( tqdm ( pool . imap ( idees_per_country , countries ) , * * tqdm_kwargs ) )
2021-09-29 12:36:56 +00:00
2022-07-23 09:19:37 +00:00
df = pd . concat ( dfs , keys = countries , names = [ ' country ' , ' year ' ] )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert ktoe to TWh
2022-07-23 09:19:37 +00:00
exclude = df . columns . str . fullmatch ( " passenger cars " )
df . loc [ : , ~ exclude ] * = 11.63 / 1e3
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert TWh/100km to kWh/km
2022-07-23 09:19:37 +00:00
df [ " passenger car efficiency " ] * = 10
2019-04-16 14:03:51 +00:00
2021-09-29 12:36:56 +00:00
# district heating share
2022-07-23 09:19:37 +00:00
subset = [ " derived heat residential " , " derived heat services " ]
district_heat = df [ subset ] . sum ( axis = 1 )
subset = [ " thermal uses residential " , " thermal uses services " ]
total_heat = df [ subset ] . sum ( axis = 1 )
df [ " district heat share " ] = district_heat . div ( total_heat )
2021-09-29 12:36:56 +00:00
2022-07-23 09:19:37 +00:00
if year :
df = df . xs ( int ( year ) , level = ' year ' )
return df
2019-04-16 14:03:51 +00:00
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
def build_energy_totals ( countries , eurostat , swiss , idees ) :
2019-07-18 13:38:37 +00:00
2022-07-23 08:37:32 +00:00
eurostat_fuels = dict (
electricity = " Electricity " ,
total = " Total "
)
eurostat_sectors = dict (
residential = " Households " ,
services = " Commercial & public services " ,
road = " Road " ,
rail = " Rail "
)
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
to_drop = [ " passenger cars " , " passenger car efficiency " ]
df = idees . reindex ( countries ) . drop ( to_drop , axis = 1 )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
eurostat_countries = eurostat . index . levels [ 0 ]
in_eurostat = df . index . intersection ( eurostat_countries )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# add international navigation
2019-07-18 13:38:37 +00:00
2022-07-23 08:37:32 +00:00
slicer = idx [ in_eurostat , : , " International maritime bunkers " , : ]
fill_values = eurostat . loc [ slicer , " Total " ] . groupby ( level = 0 ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ in_eurostat , " total international navigation " ] = fill_values
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# add swiss energy data
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
df . loc [ " CH " ] = swiss
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# get values for missing countries based on Eurostat EnergyBalances
# divide cooking/space/water according to averages in EU28
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = df . index [ df [ " total residential " ] . isna ( ) ]
to_fill = missing . intersection ( eurostat_countries )
uses = [ " space " , " cooking " , " water " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for sector in [ " residential " , " services " , " road " , " rail " ] :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# fuel use
for fuel in [ " electricity " , " total " ] :
2022-07-23 08:37:32 +00:00
slicer = idx [ to_fill , : , : , eurostat_sectors [ sector ] ]
2021-07-01 18:09:04 +00:00
fill_values = eurostat . loc [ slicer , eurostat_fuels [ fuel ] ] . groupby ( level = 0 ) . sum ( )
df . loc [ to_fill , f " { fuel } { sector } " ] = fill_values
for sector in [ " residential " , " services " ] :
# electric use
for use in uses :
fuel_use = df [ f " electricity { sector } { use } " ]
fuel = df [ f " electricity { sector } " ]
avg = fuel_use . div ( fuel ) . mean ( )
print ( f " { sector } : average fraction of electricity for { use } is { avg : .3f } " )
df . loc [ to_fill , f " electricity { sector } { use } " ] = avg * df . loc [ to_fill , f " electricity { sector } " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# non-electric use
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for use in uses :
nonelectric_use = df [ f " total { sector } { use } " ] - df [ f " electricity { sector } { use } " ]
nonelectric = df [ f " total { sector } " ] - df [ f " electricity { sector } " ]
avg = nonelectric_use . div ( nonelectric ) . mean ( )
print ( f " { sector } : average fraction of non-electric for { use } is { avg : .3f } " )
electric_use = df . loc [ to_fill , f " electricity { sector } { use } " ]
nonelectric = df . loc [ to_fill , f " total { sector } " ] - df . loc [ to_fill , f " electricity { sector } " ]
df . loc [ to_fill , f " total { sector } { use } " ] = electric_use + avg * nonelectric
# Fix Norway space and water heating fractions
# http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
# The main heating source for about 73 per cent of the households is based on electricity
# => 26% is non-electric
elec_fraction = 0.73
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
no_norway = df . drop ( " NO " )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for sector in [ " residential " , " services " ] :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# assume non-electric is heating
nonelectric = df . loc [ " NO " , f " total { sector } " ] - df . loc [ " NO " , f " electricity { sector } " ]
total_heating = nonelectric / ( 1 - elec_fraction )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for use in uses :
nonelectric_use = no_norway [ f " total { sector } { use } " ] - no_norway [ f " electricity { sector } { use } " ]
nonelectric = no_norway [ f " total { sector } " ] - no_norway [ f " electricity { sector } " ]
fraction = nonelectric_use . div ( nonelectric ) . mean ( )
df . loc [ " NO " , f " total { sector } { use } " ] = total_heating * fraction
df . loc [ " NO " , f " electricity { sector } { use } " ] = total_heating * fraction * elec_fraction
# Missing aviation
slicer = idx [ to_fill , : , : , " Domestic aviation " ]
2022-07-23 08:37:32 +00:00
fill_values = eurostat . loc [ slicer , " Total " ] . groupby ( level = 0 ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , " total domestic aviation " ] = fill_values
2022-07-23 08:37:32 +00:00
slicer = idx [ to_fill , : , " International aviation " , : ]
fill_values = eurostat . loc [ slicer , " Total " ] . groupby ( level = 0 ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , " total international aviation " ] = fill_values
# missing domestic navigation
2022-07-23 08:37:32 +00:00
slicer = idx [ to_fill , : , : , " Domestic navigation " ]
fill_values = eurostat . loc [ slicer , " Total " ] . groupby ( level = 0 ) . sum ( )
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , " total domestic navigation " ] = fill_values
# split road traffic for non-IDEES
missing = df . index [ df [ " total passenger cars " ] . isna ( ) ]
for fuel in [ " total " , " electricity " ] :
selection = [
f " { fuel } passenger cars " ,
f " { fuel } other road passenger " ,
f " { fuel } light duty road freight " ,
]
if fuel == " total " :
selection . extend ( [
f " { fuel } two-wheel " ,
f " { fuel } heavy duty road freight "
] )
road = df [ selection ] . sum ( )
road_fraction = road / road . sum ( )
fill_values = cartesian ( df . loc [ missing , f " { fuel } road " ] , road_fraction )
df . loc [ missing , road_fraction . index ] = fill_values
# split rail traffic for non-IDEES
missing = df . index [ df [ " total rail passenger " ] . isna ( ) ]
for fuel in [ " total " , " electricity " ] :
selection = [ f " { fuel } rail passenger " , f " { fuel } rail freight " ]
rail = df [ selection ] . sum ( )
rail_fraction = rail / rail . sum ( )
fill_values = cartesian ( df . loc [ missing , f " { fuel } rail " ] , rail_fraction )
df . loc [ missing , rail_fraction . index ] = fill_values
# split aviation traffic for non-IDEES
missing = df . index [ df [ " total domestic aviation passenger " ] . isna ( ) ]
for destination in [ " domestic " , " international " ] :
selection = [
f " total { destination } aviation passenger " ,
f " total { destination } aviation freight " ,
]
aviation = df [ selection ] . sum ( )
aviation_fraction = aviation / aviation . sum ( )
fill_values = cartesian ( df . loc [ missing , f " total { destination } aviation " ] , aviation_fraction )
df . loc [ missing , aviation_fraction . index ] = fill_values
for purpose in [ " passenger " , " freight " ] :
attrs = [ f " total domestic aviation { purpose } " , f " total international aviation { purpose } " ]
2021-09-29 12:36:56 +00:00
df . loc [ missing , f " total aviation { purpose } " ] = df . loc [ missing , attrs ] . sum ( axis = 1 )
2021-07-01 18:09:04 +00:00
if " BA " in df . index :
# fill missing data for BA (services and road energy data)
# proportional to RS with ratio of total residential demand
missing = df . loc [ " BA " ] == 0.0
ratio = df . at [ " BA " , " total residential " ] / df . at [ " RS " , " total residential " ]
df . loc [ ' BA ' , missing ] = ratio * df . loc [ " RS " , missing ]
2021-09-29 12:36:56 +00:00
# Missing district heating share
dh_share = pd . read_csv ( snakemake . input . district_heat_share ,
index_col = 0 , usecols = [ 0 , 1 ] )
# make conservative assumption and take minimum from both data sets
df [ " district heat share " ] = ( pd . concat ( [ df [ " district heat share " ] ,
dh_share . reindex ( index = df . index ) / 100 ] ,
axis = 1 ) . min ( axis = 1 ) )
2021-07-01 18:09:04 +00:00
return df
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_eea_co2 ( year = 1990 ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16
# downloaded 201228 (modified by EEA last on 201221)
df = pd . read_csv ( snakemake . input . co2 , encoding = " latin-1 " )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df . replace ( dict ( Year = " 1985-1987 " ) , 1986 , inplace = True )
df . Year = df . Year . astype ( int )
index_col = [ " Country_code " , " Pollutant_name " , " Year " , " Sector_name " ]
df = df . set_index ( index_col ) . sort_index ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
emissions_scope = snakemake . config [ " energy " ] [ " emissions " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
cts = [ " CH " , " EUA " , " NO " ] + eu28_eea
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
slicer = idx [ cts , emissions_scope , year , to_ipcc . values ( ) ]
emissions = (
df . loc [ slicer , " emissions " ]
. unstack ( " Sector_name " )
. rename ( columns = reverse ( to_ipcc ) )
. droplevel ( [ 1 , 2 ] )
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
emissions . rename ( index = { " EUA " : " EU28 " , " UK " : " GB " } , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
to_subtract = [
" electricity " ,
" services non-elec " ,
" residential non-elec " ,
" road non-elec " ,
" rail non-elec " ,
" domestic aviation " ,
" international aviation " ,
" domestic navigation " ,
" international navigation " ,
2021-08-18 12:17:21 +00:00
" agriculture, forestry and fishing "
2021-07-01 18:09:04 +00:00
]
emissions [ " industrial non-elec " ] = emissions [ " total energy " ] - emissions [ to_subtract ] . sum ( axis = 1 )
2019-04-16 14:03:51 +00:00
2021-07-06 16:32:35 +00:00
emissions [ " agriculture " ] + = emissions [ " agriculture, forestry and fishing " ]
to_drop = [ " total energy " , " total wL " , " total woL " , " agriculture, forestry and fishing " ]
2021-07-01 18:09:04 +00:00
emissions . drop ( columns = to_drop , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert from Gg to Mt
return emissions / 1e3
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_eurostat_co2 ( countries , year = 1990 ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
eurostat = build_eurostat ( countries , year )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
specific_emissions = pd . Series ( index = eurostat . columns , dtype = float )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# emissions in tCO2_equiv per MWh_th
specific_emissions [ " Solid fuels " ] = 0.36 # Approximates coal
specific_emissions [ " Oil (total) " ] = 0.285 # Average of distillate and residue
specific_emissions [ " Gas " ] = 0.2 # For natural gas
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11
# Distillate oil (No. 2) 0.276
# Residual oil (No. 6) 0.298
# https://www.eia.gov/electricity/annual/html/epa_a_03.html
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return eurostat . multiply ( specific_emissions ) . sum ( axis = 1 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_co2_totals ( countries , eea_co2 , eurostat_co2 ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
co2 = eea_co2 . reindex ( countries )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for ct in countries . intersection ( [ " BA " , " RS " , " AL " , " ME " , " MK " ] ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
mappings = {
" electricity " : ( ct , " + " , " Conventional Thermal Power Stations " , " of which From Coal " ) ,
" residential non-elec " : ( ct , " + " , " + " , " Residential " ) ,
" services non-elec " : ( ct , " + " , " + " , " Services " ) ,
" road non-elec " : ( ct , " + " , " + " , " Road " ) ,
" rail non-elec " : ( ct , " + " , " + " , " Rail " ) ,
" domestic navigation " : ( ct , " + " , " + " , " Domestic Navigation " ) ,
" international navigation " : ( ct , " - " , " Bunkers " ) ,
" domestic aviation " : ( ct , " + " , " + " , " Domestic aviation " ) ,
" international aviation " : ( ct , " + " , " + " , " International aviation " ) ,
# does not include industrial process emissions or fuel processing/refining
" industrial non-elec " : ( ct , " + " , " Industry " ) ,
# does not include non-energy emissions
2021-08-16 14:26:20 +00:00
" agriculture " : ( eurostat_co2 . index . get_level_values ( 0 ) == ct ) & eurostat_co2 . index . isin ( [ " Agriculture / Forestry " , " Fishing " ] , level = 3 ) ,
2021-07-01 18:09:04 +00:00
}
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for i , mi in mappings . items ( ) :
co2 . at [ ct , i ] = eurostat_co2 . loc [ mi ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return co2
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_transport_data ( countries , population , idees ) :
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
transport_data = pd . DataFrame ( index = countries )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# collect number of cars
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
transport_data [ " number cars " ] = idees [ " passenger cars " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# CH from http://ec.europa.eu/eurostat/statistics-explained/index.php/Passenger_cars_in_the_EU#Luxembourg_has_the_highest_number_of_passenger_cars_per_inhabitant
transport_data . at [ " CH " , " number cars " ] = 4.136e6
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = transport_data . index [ transport_data [ " number cars " ] . isna ( ) ]
print ( f " Missing data on cars from: \n { list ( missing ) } \n Filling gaps with averaged data. " )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
cars_pp = transport_data [ " number cars " ] / population
transport_data . loc [ missing , " number cars " ] = cars_pp . mean ( ) * population
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# collect average fuel efficiency in kWh/km
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
transport_data [ " average fuel efficiency " ] = idees [ " passenger car efficiency " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = transport_data . index [ transport_data [ " average fuel efficiency " ] . isna ( ) ]
print ( f " Missing data on fuel efficiency from: \n { list ( missing ) } \n Filling gapswith averaged data. " )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
fill_values = transport_data [ " average fuel efficiency " ] . mean ( )
transport_data . loc [ missing , " average fuel efficiency " ] = fill_values
2019-04-16 14:03:51 +00:00
return transport_data
if __name__ == " __main__ " :
if ' snakemake ' not in globals ( ) :
2021-07-01 18:09:04 +00:00
from helper import mock_snakemake
snakemake = mock_snakemake ( ' build_energy_totals ' )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
config = snakemake . config [ " energy " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
nuts3 = gpd . read_file ( snakemake . input . nuts3_shapes ) . set_index ( " index " )
population = nuts3 [ " pop " ] . groupby ( nuts3 . country ) . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
countries = population . index
idees_countries = countries . intersection ( eu28 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
data_year = config [ " energy_totals_year " ]
2022-07-23 08:37:32 +00:00
eurostat = build_eurostat ( countries . difference ( [ ' CH ' ] ) , data_year )
2021-07-01 18:09:04 +00:00
swiss = build_swiss ( data_year )
idees = build_idees ( idees_countries , data_year )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
energy = build_energy_totals ( countries , eurostat , swiss , idees )
energy . to_csv ( snakemake . output . energy_name )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
base_year_emissions = config [ " base_emissions_year " ]
2021-04-23 09:26:38 +00:00
eea_co2 = build_eea_co2 ( base_year_emissions )
2021-07-01 18:09:04 +00:00
eurostat_co2 = build_eurostat_co2 ( countries , base_year_emissions )
co2 = build_co2_totals ( countries , eea_co2 , eurostat_co2 )
2020-12-29 10:31:00 +00:00
co2 . to_csv ( snakemake . output . co2_name )
2021-07-01 18:09:04 +00:00
transport = build_transport_data ( countries , population , idees )
transport . to_csv ( snakemake . output . transport_name )