2023-03-06 08:27:45 +00:00
# -*- coding: utf-8 -*-
2023-02-23 09:30:32 +00:00
import logging
2023-03-06 08:27:45 +00:00
2023-02-23 09:30:32 +00:00
logger = logging . getLogger ( __name__ )
2021-07-01 18:09:04 +00:00
import multiprocessing as mp
from functools import partial
2019-04-16 14:03:51 +00:00
import geopandas as gpd
2021-07-01 18:09:04 +00:00
import numpy as np
2019-04-16 14:03:51 +00:00
import pandas as pd
2023-02-21 21:36:49 +00:00
from helper import mute_print
2021-07-01 18:09:04 +00:00
from tqdm import tqdm
2019-04-16 14:03:51 +00:00
idx = pd . IndexSlice
2023-02-18 12:47:34 +00:00
2021-07-01 18:09:04 +00:00
def cartesian ( s1 , s2 ) :
"""
Cartesian product of two pd . Series .
"""
return pd . DataFrame ( np . outer ( s1 , s2 ) , index = s1 . index , columns = s2 . index )
def reverse ( dictionary ) :
"""
Reverses a keys and values of a dictionary .
"""
return { v : k for k , v in dictionary . items ( ) }
# translations for Eurostat
eurostat_country_to_alpha2 = {
" EU28 " : " EU " ,
" EA19 " : " EA " ,
" Belgium " : " BE " ,
" Bulgaria " : " BG " ,
" Czech Republic " : " CZ " ,
" Denmark " : " DK " ,
" Germany " : " DE " ,
" Estonia " : " EE " ,
" Ireland " : " IE " ,
" Greece " : " GR " ,
" Spain " : " ES " ,
" France " : " FR " ,
" Croatia " : " HR " ,
" Italy " : " IT " ,
" Cyprus " : " CY " ,
" Latvia " : " LV " ,
" Lithuania " : " LT " ,
" Luxembourg " : " LU " ,
" Hungary " : " HU " ,
" Malta " : " MA " ,
" Netherlands " : " NL " ,
" Austria " : " AT " ,
" Poland " : " PL " ,
" Portugal " : " PT " ,
" Romania " : " RO " ,
" Slovenia " : " SI " ,
" Slovakia " : " SK " ,
" Finland " : " FI " ,
" Sweden " : " SE " ,
" United Kingdom " : " GB " ,
" Iceland " : " IS " ,
" Norway " : " NO " ,
" Montenegro " : " ME " ,
" FYR of Macedonia " : " MK " ,
" Albania " : " AL " ,
" Serbia " : " RS " ,
" Turkey " : " TU " ,
" Bosnia and Herzegovina " : " BA " ,
" Kosovo \n (UNSCR 1244/99) " : " KO " , # 2017 version
# 2016 version
" Kosovo \n (under United Nations Security Council Resolution 1244/99) " : " KO " ,
" Moldova " : " MO " ,
" Ukraine " : " UK " ,
" Switzerland " : " CH " ,
2019-04-16 14:03:51 +00:00
}
2021-07-01 18:09:04 +00:00
non_EU = [ " NO " , " CH " , " ME " , " MK " , " RS " , " BA " , " AL " ]
idees_rename = { " GR " : " EL " , " GB " : " UK " }
eu28 = [
" FR " ,
" DE " ,
" GB " ,
" IT " ,
" ES " ,
" PL " ,
" SE " ,
" NL " ,
" BE " ,
" FI " ,
" CZ " ,
" DK " ,
" PT " ,
" RO " ,
" AT " ,
" BG " ,
" EE " ,
" GR " ,
" LV " ,
" HU " ,
" IE " ,
" SK " ,
" LT " ,
" HR " ,
" LU " ,
" SI " ,
] + [ " CY " , " MT " ]
eu28_eea = eu28 . copy ( )
2019-04-16 14:03:51 +00:00
eu28_eea . remove ( " GB " )
eu28_eea . append ( " UK " )
2021-07-01 18:09:04 +00:00
to_ipcc = {
" electricity " : " 1.A.1.a - Public Electricity and Heat Production " ,
" residential non-elec " : " 1.A.4.b - Residential " ,
" services non-elec " : " 1.A.4.a - Commercial/Institutional " ,
" rail non-elec " : " 1.A.3.c - Railways " ,
" road non-elec " : " 1.A.3.b - Road Transportation " ,
" domestic navigation " : " 1.A.3.d - Domestic Navigation " ,
" international navigation " : " 1.D.1.b - International Navigation " ,
" domestic aviation " : " 1.A.3.a - Domestic Aviation " ,
" international aviation " : " 1.D.1.a - International Aviation " ,
" total energy " : " 1 - Energy " ,
" industrial processes " : " 2 - Industrial Processes and Product Use " ,
" agriculture " : " 3 - Agriculture " ,
2021-07-06 16:32:35 +00:00
" agriculture, forestry and fishing " : " 1.A.4.c - Agriculture/Forestry/Fishing " ,
2021-07-01 18:09:04 +00:00
" LULUCF " : " 4 - Land Use, Land-Use Change and Forestry " ,
" waste management " : " 5 - Waste management " ,
" other " : " 6 - Other Sector " ,
" indirect " : " ind_CO2 - Indirect CO2 " ,
" total wL " : " Total (with LULUCF) " ,
" total woL " : " Total (without LULUCF) " ,
}
2023-03-06 08:27:45 +00:00
2022-08-01 13:21:11 +00:00
def build_eurostat ( input_eurostat , countries , report_year , year ) :
2019-04-16 14:03:51 +00:00
"""
Return multi - index for all countries ' energy data in TWh/a.
"""
2021-07-01 18:09:04 +00:00
filenames = {
2016 : f " / { year } -Energy-Balances-June2016edition.xlsx " ,
2017 : f " / { year } -ENERGY-BALANCES-June2017edition.xlsx " ,
}
2019-04-16 14:03:51 +00:00
2023-02-21 21:36:49 +00:00
with mute_print ( ) :
dfs = pd . read_excel (
input_eurostat + filenames [ report_year ] ,
sheet_name = None ,
skiprows = 1 ,
index_col = list ( range ( 4 ) ) ,
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# sorted_index necessary for slicing
lookup = eurostat_country_to_alpha2
labelled_dfs = {
lookup [ df . columns [ 0 ] ] : df
for df in dfs . values ( )
if lookup [ df . columns [ 0 ] ] in countries
}
df = pd . concat ( labelled_dfs , sort = True ) . sort_index ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# drop non-numeric and country columns
non_numeric_cols = df . columns [ df . dtypes != float ]
country_cols = df . columns . intersection ( lookup . keys ( ) )
to_drop = non_numeric_cols . union ( country_cols )
df . drop ( to_drop , axis = 1 , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert ktoe/a to TWh/a
df * = 11.63 / 1e3
return df
2019-04-16 14:03:51 +00:00
def build_swiss ( year ) :
2021-07-01 18:09:04 +00:00
"""
Return a pd . Series of Swiss energy data in TWh / a .
"""
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
fn = snakemake . input . swiss
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_csv ( fn , index_col = [ 0 , 1 ] ) . loc [ " CH " , str ( year ) ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert PJ/a to TWh/a
df / = 3.6
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return df
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def idees_per_country ( ct , year ) :
base_dir = snakemake . input . idees
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals = { }
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_idees = idees_rename . get ( ct , ct )
fn_residential = f " { base_dir } /JRC-IDEES-2015_Residential_ { ct_idees } .xlsx "
2021-07-06 16:32:35 +00:00
fn_tertiary = f " { base_dir } /JRC-IDEES-2015_Tertiary_ { ct_idees } .xlsx "
2021-07-01 18:09:04 +00:00
fn_transport = f " { base_dir } /JRC-IDEES-2015_Transport_ { ct_idees } .xlsx "
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# residential
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_residential , " RES_hh_fec " , index_col = 0 ) [ year ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total residential space " ] = df [ " Space heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
rows = [ " Advanced electric heating " , " Conventional electric heating " ]
ct_totals [ " electricity residential space " ] = df [ rows ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total residential water " ] = df . at [ " Water heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 23 ] == " Electricity "
ct_totals [ " electricity residential water " ] = df [ 23 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total residential cooking " ] = df [ " Cooking " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 30 ] == " Electricity "
ct_totals [ " electricity residential cooking " ] = df [ 30 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_residential , " RES_summary " , index_col = 0 ) [ year ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
row = " Energy consumption by fuel - Eurostat structure (ktoe) "
ct_totals [ " total residential " ] = df [ row ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 47 ] == " Electricity "
ct_totals [ " electricity residential " ] = df [ 47 ]
2019-04-16 14:03:51 +00:00
2021-09-29 12:36:56 +00:00
assert df . index [ 46 ] == " Derived heat "
2021-10-04 07:23:23 +00:00
ct_totals [ " derived heat residential " ] = df [ 46 ]
2021-09-29 12:36:56 +00:00
assert df . index [ 50 ] == " Thermal uses "
2021-10-02 08:03:40 +00:00
ct_totals [ " thermal uses residential " ] = df [ 50 ]
2021-09-29 12:36:56 +00:00
2021-07-01 18:09:04 +00:00
# services
2019-04-16 14:03:51 +00:00
2021-07-06 16:32:35 +00:00
df = pd . read_excel ( fn_tertiary , " SER_hh_fec " , index_col = 0 ) [ year ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total services space " ] = df [ " Space heating " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
rows = [ " Advanced electric heating " , " Conventional electric heating " ]
ct_totals [ " electricity services space " ] = df [ rows ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total services water " ] = df [ " Hot water " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 24 ] == " Electricity "
ct_totals [ " electricity services water " ] = df [ 24 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total services cooking " ] = df [ " Catering " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 31 ] == " Electricity "
ct_totals [ " electricity services cooking " ] = df [ 31 ]
2019-04-16 14:03:51 +00:00
2021-07-06 16:32:35 +00:00
df = pd . read_excel ( fn_tertiary , " SER_summary " , index_col = 0 ) [ year ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
row = " Energy consumption by fuel - Eurostat structure (ktoe) "
ct_totals [ " total services " ] = df [ row ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 50 ] == " Electricity "
ct_totals [ " electricity services " ] = df [ 50 ]
2019-04-16 14:03:51 +00:00
2021-09-29 12:36:56 +00:00
assert df . index [ 49 ] == " Derived heat "
2021-10-02 08:03:40 +00:00
ct_totals [ " derived heat services " ] = df [ 49 ]
2021-09-29 12:36:56 +00:00
assert df . index [ 53 ] == " Thermal uses "
2021-10-02 08:03:40 +00:00
ct_totals [ " thermal uses services " ] = df [ 53 ]
2021-10-04 07:23:23 +00:00
2021-07-06 16:32:35 +00:00
# agriculture, forestry and fishing
start = " Detailed split of energy consumption (ktoe) "
end = " Market shares of energy uses ( % ) "
2021-10-04 07:23:23 +00:00
2021-07-06 16:32:35 +00:00
df = pd . read_excel ( fn_tertiary , " AGR_fec " , index_col = 0 ) . loc [ start : end , year ]
rows = [
" Lighting " ,
" Ventilation " ,
" Specific electricity uses " ,
" Pumping devices (electric) " ,
]
ct_totals [ " total agriculture electricity " ] = df [ rows ] . sum ( )
rows = [ " Specific heat uses " , " Low enthalpy heat " ]
ct_totals [ " total agriculture heat " ] = df [ rows ] . sum ( )
rows = [
" Motor drives " ,
" Farming machine drives (diesel oil incl. biofuels) " ,
" Pumping devices (diesel oil incl. biofuels) " ,
]
ct_totals [ " total agriculture machinery " ] = df [ rows ] . sum ( )
2021-08-18 12:17:21 +00:00
row = " Agriculture, forestry and fishing "
2021-07-06 16:32:35 +00:00
ct_totals [ " total agriculture " ] = df [ row ]
2021-07-01 18:09:04 +00:00
# transport
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_transport , " TrRoad_ene " , index_col = 0 ) [ year ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total road " ] = df [ " by fuel (EUROSTAT DATA) " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " electricity road " ] = df [ " Electricity " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total two-wheel " ] = df [ " Powered 2-wheelers (Gasoline) " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 19 ] == " Passenger cars "
ct_totals [ " total passenger cars " ] = df [ 19 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 30 ] == " Battery electric vehicles "
ct_totals [ " electricity passenger cars " ] = df [ 30 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 31 ] == " Motor coaches, buses and trolley buses "
ct_totals [ " total other road passenger " ] = df [ 31 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 39 ] == " Battery electric vehicles "
ct_totals [ " electricity other road passenger " ] = df [ 39 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 41 ] == " Light duty vehicles "
ct_totals [ " total light duty road freight " ] = df [ 41 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 49 ] == " Battery electric vehicles "
ct_totals [ " electricity light duty road freight " ] = df [ 49 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
row = " Heavy duty vehicles (Diesel oil incl. biofuels) "
ct_totals [ " total heavy duty road freight " ] = df [ row ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 61 ] == " Passenger cars "
ct_totals [ " passenger car efficiency " ] = df [ 61 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_transport , " TrRail_ene " , index_col = 0 ) [ year ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total rail " ] = df [ " by fuel (EUROSTAT DATA) " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " electricity rail " ] = df [ " Electricity " ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 15 ] == " Passenger transport "
ct_totals [ " total rail passenger " ] = df [ 15 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 16 ] == " Metro and tram, urban light rail "
assert df . index [ 19 ] == " Electric "
assert df . index [ 20 ] == " High speed passenger trains "
ct_totals [ " electricity rail passenger " ] = df [ [ 16 , 19 , 20 ] ] . sum ( )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 21 ] == " Freight transport "
ct_totals [ " total rail freight " ] = df [ 21 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 23 ] == " Electric "
ct_totals [ " electricity rail freight " ] = df [ 23 ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_transport , " TrAvia_ene " , index_col = 0 ) [ year ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 6 ] == " Passenger transport "
ct_totals [ " total aviation passenger " ] = df [ 6 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 10 ] == " Freight transport "
ct_totals [ " total aviation freight " ] = df [ 10 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 7 ] == " Domestic "
ct_totals [ " total domestic aviation passenger " ] = df [ 7 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 8 ] == " International - Intra-EU "
assert df . index [ 9 ] == " International - Extra-EU "
ct_totals [ " total international aviation passenger " ] = df [ [ 8 , 9 ] ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 11 ] == " Domestic and International - Intra-EU "
ct_totals [ " total domestic aviation freight " ] = df [ 11 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 12 ] == " International - Extra-EU "
ct_totals [ " total international aviation freight " ] = df [ 12 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total domestic aviation " ] = (
ct_totals [ " total domestic aviation freight " ]
+ ct_totals [ " total domestic aviation passenger " ]
2023-03-06 08:27:45 +00:00
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
ct_totals [ " total international aviation " ] = (
ct_totals [ " total international aviation freight " ]
+ ct_totals [ " total international aviation passenger " ]
2023-03-06 08:27:45 +00:00
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_transport , " TrNavi_ene " , index_col = 0 ) [ year ]
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# coastal and inland
ct_totals [ " total domestic navigation " ] = df [ " by fuel (EUROSTAT DATA) " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df = pd . read_excel ( fn_transport , " TrRoad_act " , index_col = 0 ) [ year ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
assert df . index [ 85 ] == " Passenger cars "
ct_totals [ " passenger cars " ] = df [ 85 ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return pd . Series ( ct_totals , name = ct )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_idees ( countries , year ) :
nprocesses = snakemake . threads
2023-02-21 21:36:49 +00:00
2021-07-01 18:09:04 +00:00
func = partial ( idees_per_country , year = year )
tqdm_kwargs = dict (
ascii = False ,
unit = " country " ,
total = len ( countries ) ,
desc = " Build from IDEES database " ,
)
2023-02-21 21:36:49 +00:00
with mute_print ( ) :
with mp . Pool ( processes = nprocesses ) as pool :
totals_list = list ( tqdm ( pool . imap ( func , countries ) , * * tqdm_kwargs ) )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
totals = pd . concat ( totals_list , axis = 1 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert ktoe to TWh
exclude = totals . index . str . fullmatch ( " passenger cars " )
totals . loc [ ~ exclude ] * = 11.63 / 1e3
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert TWh/100km to kWh/km
totals . loc [ " passenger car efficiency " ] * = 10
2019-04-16 14:03:51 +00:00
2021-09-29 12:36:56 +00:00
# district heating share
2021-10-02 08:03:40 +00:00
district_heat = totals . loc [
[ " derived heat residential " , " derived heat services " ]
] . sum ( )
total_heat = totals . loc [ [ " thermal uses residential " , " thermal uses services " ] ] . sum ( )
2021-09-29 12:36:56 +00:00
totals . loc [ " district heat share " ] = district_heat . div ( total_heat )
2021-07-01 18:09:04 +00:00
return totals . T
2019-04-16 14:03:51 +00:00
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
def build_energy_totals ( countries , eurostat , swiss , idees ) :
eurostat_fuels = { " electricity " : " Electricity " , " total " : " Total all products " }
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
to_drop = [ " passenger cars " , " passenger car efficiency " ]
df = idees . reindex ( countries ) . drop ( to_drop , axis = 1 )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
eurostat_countries = eurostat . index . levels [ 0 ]
in_eurostat = df . index . intersection ( eurostat_countries )
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# add international navigation
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
slicer = idx [ in_eurostat , : , " Bunkers " , : ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = 0 ) . sum ( )
df . loc [ in_eurostat , " total international navigation " ] = fill_values
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# add swiss energy data
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
df . loc [ " CH " ] = swiss
2019-07-18 13:38:37 +00:00
2021-07-01 18:09:04 +00:00
# get values for missing countries based on Eurostat EnergyBalances
# divide cooking/space/water according to averages in EU28
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = df . index [ df [ " total residential " ] . isna ( ) ]
to_fill = missing . intersection ( eurostat_countries )
uses = [ " space " , " cooking " , " water " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for sector in [ " residential " , " services " , " road " , " rail " ] :
eurostat_sector = sector . capitalize ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# fuel use
for fuel in [ " electricity " , " total " ] :
slicer = idx [ to_fill , : , : , eurostat_sector ]
fill_values = (
eurostat . loc [ slicer , eurostat_fuels [ fuel ] ] . groupby ( level = 0 ) . sum ( )
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , f " { fuel } { sector } " ] = fill_values
for sector in [ " residential " , " services " ] :
# electric use
for use in uses :
fuel_use = df [ f " electricity { sector } { use } " ]
fuel = df [ f " electricity { sector } " ]
avg = fuel_use . div ( fuel ) . mean ( )
2023-02-23 09:30:32 +00:00
logger . debug (
f " { sector } : average fraction of electricity for { use } is { avg : .3f } "
)
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , f " electricity { sector } { use } " ] = (
avg * df . loc [ to_fill , f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# non-electric use
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for use in uses :
nonelectric_use = (
df [ f " total { sector } { use } " ] - df [ f " electricity { sector } { use } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
nonelectric = df [ f " total { sector } " ] - df [ f " electricity { sector } " ]
avg = nonelectric_use . div ( nonelectric ) . mean ( )
2023-02-23 09:30:32 +00:00
logger . debug (
f " { sector } : average fraction of non-electric for { use } is { avg : .3f } "
)
2021-07-01 18:09:04 +00:00
electric_use = df . loc [ to_fill , f " electricity { sector } { use } " ]
nonelectric = (
df . loc [ to_fill , f " total { sector } " ]
- df . loc [ to_fill , f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
df . loc [ to_fill , f " total { sector } { use } " ] = electric_use + avg * nonelectric
# Fix Norway space and water heating fractions
# http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
# The main heating source for about 73 per cent of the households is based on electricity
# => 26% is non-electric
elec_fraction = 0.73
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
no_norway = df . drop ( " NO " )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for sector in [ " residential " , " services " ] :
# assume non-electric is heating
nonelectric = (
df . loc [ " NO " , f " total { sector } " ] - df . loc [ " NO " , f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
total_heating = nonelectric / ( 1 - elec_fraction )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for use in uses :
nonelectric_use = (
no_norway [ f " total { sector } { use } " ]
- no_norway [ f " electricity { sector } { use } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
nonelectric = (
no_norway [ f " total { sector } " ] - no_norway [ f " electricity { sector } " ]
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
fraction = nonelectric_use . div ( nonelectric ) . mean ( )
df . loc [ " NO " , f " total { sector } { use } " ] = total_heating * fraction
df . loc [ " NO " , f " electricity { sector } { use } " ] = (
total_heating * fraction * elec_fraction
2023-03-06 08:27:45 +00:00
)
2021-07-01 18:09:04 +00:00
# Missing aviation
slicer = idx [ to_fill , : , : , " Domestic aviation " ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = 0 ) . sum ( )
df . loc [ to_fill , " total domestic aviation " ] = fill_values
slicer = idx [ to_fill , : , : , " International aviation " ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = 0 ) . sum ( )
df . loc [ to_fill , " total international aviation " ] = fill_values
# missing domestic navigation
slicer = idx [ to_fill , : , : , " Domestic Navigation " ]
fill_values = eurostat . loc [ slicer , " Total all products " ] . groupby ( level = 0 ) . sum ( )
df . loc [ to_fill , " total domestic navigation " ] = fill_values
# split road traffic for non-IDEES
missing = df . index [ df [ " total passenger cars " ] . isna ( ) ]
for fuel in [ " total " , " electricity " ] :
selection = [
f " { fuel } passenger cars " ,
f " { fuel } other road passenger " ,
f " { fuel } light duty road freight " ,
]
if fuel == " total " :
selection . extend ( [ f " { fuel } two-wheel " , f " { fuel } heavy duty road freight " ] )
road = df [ selection ] . sum ( )
road_fraction = road / road . sum ( )
fill_values = cartesian ( df . loc [ missing , f " { fuel } road " ] , road_fraction )
df . loc [ missing , road_fraction . index ] = fill_values
# split rail traffic for non-IDEES
missing = df . index [ df [ " total rail passenger " ] . isna ( ) ]
for fuel in [ " total " , " electricity " ] :
selection = [ f " { fuel } rail passenger " , f " { fuel } rail freight " ]
rail = df [ selection ] . sum ( )
rail_fraction = rail / rail . sum ( )
fill_values = cartesian ( df . loc [ missing , f " { fuel } rail " ] , rail_fraction )
df . loc [ missing , rail_fraction . index ] = fill_values
# split aviation traffic for non-IDEES
missing = df . index [ df [ " total domestic aviation passenger " ] . isna ( ) ]
for destination in [ " domestic " , " international " ] :
selection = [
f " total { destination } aviation passenger " ,
f " total { destination } aviation freight " ,
]
aviation = df [ selection ] . sum ( )
aviation_fraction = aviation / aviation . sum ( )
fill_values = cartesian (
df . loc [ missing , f " total { destination } aviation " ] , aviation_fraction
)
df . loc [ missing , aviation_fraction . index ] = fill_values
for purpose in [ " passenger " , " freight " ] :
attrs = [
f " total domestic aviation { purpose } " ,
f " total international aviation { purpose } " ,
]
2021-09-29 12:36:56 +00:00
df . loc [ missing , f " total aviation { purpose } " ] = df . loc [ missing , attrs ] . sum (
axis = 1
)
2021-07-01 18:09:04 +00:00
if " BA " in df . index :
# fill missing data for BA (services and road energy data)
# proportional to RS with ratio of total residential demand
missing = df . loc [ " BA " ] == 0.0
ratio = df . at [ " BA " , " total residential " ] / df . at [ " RS " , " total residential " ]
df . loc [ " BA " , missing ] = ratio * df . loc [ " RS " , missing ]
2021-09-29 12:36:56 +00:00
# Missing district heating share
dh_share = pd . read_csv (
snakemake . input . district_heat_share , index_col = 0 , usecols = [ 0 , 1 ]
)
# make conservative assumption and take minimum from both data sets
df [ " district heat share " ] = pd . concat (
[ df [ " district heat share " ] , dh_share . reindex ( index = df . index ) / 100 ] , axis = 1
) . min ( axis = 1 )
2021-07-01 18:09:04 +00:00
return df
2019-04-16 14:03:51 +00:00
2022-08-01 13:21:11 +00:00
def build_eea_co2 ( input_co2 , year = 1990 , emissions_scope = " CO2 " ) :
2021-07-01 18:09:04 +00:00
# https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16
# downloaded 201228 (modified by EEA last on 201221)
2023-02-21 21:36:49 +00:00
df = pd . read_csv ( input_co2 , encoding = " latin-1 " , low_memory = False )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
df . replace ( dict ( Year = " 1985-1987 " ) , 1986 , inplace = True )
df . Year = df . Year . astype ( int )
index_col = [ " Country_code " , " Pollutant_name " , " Year " , " Sector_name " ]
df = df . set_index ( index_col ) . sort_index ( )
2019-04-16 14:03:51 +00:00
2022-08-01 13:21:11 +00:00
emissions_scope = emissions_scope
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
cts = [ " CH " , " EUA " , " NO " ] + eu28_eea
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
slicer = idx [ cts , emissions_scope , year , to_ipcc . values ( ) ]
emissions = (
df . loc [ slicer , " emissions " ]
. unstack ( " Sector_name " )
. rename ( columns = reverse ( to_ipcc ) )
. droplevel ( [ 1 , 2 ] )
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
emissions . rename ( index = { " EUA " : " EU28 " , " UK " : " GB " } , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
to_subtract = [
" electricity " ,
" services non-elec " ,
" residential non-elec " ,
" road non-elec " ,
" rail non-elec " ,
" domestic aviation " ,
" international aviation " ,
" domestic navigation " ,
" international navigation " ,
2021-08-18 12:17:21 +00:00
" agriculture, forestry and fishing " ,
2021-07-01 18:09:04 +00:00
]
emissions [ " industrial non-elec " ] = emissions [ " total energy " ] - emissions [
to_subtract
] . sum ( axis = 1 )
2019-04-16 14:03:51 +00:00
2021-07-06 16:32:35 +00:00
emissions [ " agriculture " ] + = emissions [ " agriculture, forestry and fishing " ]
to_drop = [
" total energy " ,
" total wL " ,
" total woL " ,
" agriculture, forestry and fishing " ,
]
2021-07-01 18:09:04 +00:00
emissions . drop ( columns = to_drop , inplace = True )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# convert from Gg to Mt
return emissions / 1e3
2019-04-16 14:03:51 +00:00
2022-08-01 13:21:11 +00:00
def build_eurostat_co2 ( input_eurostat , countries , report_year , year = 1990 ) :
eurostat = build_eurostat ( input_eurostat , countries , report_year , year )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
specific_emissions = pd . Series ( index = eurostat . columns , dtype = float )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# emissions in tCO2_equiv per MWh_th
specific_emissions [ " Solid fuels " ] = 0.36 # Approximates coal
specific_emissions [ " Oil (total) " ] = 0.285 # Average of distillate and residue
specific_emissions [ " Gas " ] = 0.2 # For natural gas
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11
# Distillate oil (No. 2) 0.276
# Residual oil (No. 6) 0.298
# https://www.eia.gov/electricity/annual/html/epa_a_03.html
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return eurostat . multiply ( specific_emissions ) . sum ( axis = 1 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_co2_totals ( countries , eea_co2 , eurostat_co2 ) :
co2 = eea_co2 . reindex ( countries )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for ct in countries . intersection ( [ " BA " , " RS " , " AL " , " ME " , " MK " ] ) :
mappings = {
" electricity " : (
ct ,
" + " ,
" Conventional Thermal Power Stations " ,
" of which From Coal " ,
) ,
" residential non-elec " : ( ct , " + " , " + " , " Residential " ) ,
" services non-elec " : ( ct , " + " , " + " , " Services " ) ,
" road non-elec " : ( ct , " + " , " + " , " Road " ) ,
" rail non-elec " : ( ct , " + " , " + " , " Rail " ) ,
" domestic navigation " : ( ct , " + " , " + " , " Domestic Navigation " ) ,
" international navigation " : ( ct , " - " , " Bunkers " ) ,
" domestic aviation " : ( ct , " + " , " + " , " Domestic aviation " ) ,
" international aviation " : ( ct , " + " , " + " , " International aviation " ) ,
# does not include industrial process emissions or fuel processing/refining
" industrial non-elec " : ( ct , " + " , " Industry " ) ,
# does not include non-energy emissions
2021-08-16 14:26:20 +00:00
" agriculture " : ( eurostat_co2 . index . get_level_values ( 0 ) == ct )
& eurostat_co2 . index . isin ( [ " Agriculture / Forestry " , " Fishing " ] , level = 3 ) ,
2021-07-01 18:09:04 +00:00
}
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
for i , mi in mappings . items ( ) :
co2 . at [ ct , i ] = eurostat_co2 . loc [ mi ] . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
return co2
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
def build_transport_data ( countries , population , idees ) :
transport_data = pd . DataFrame ( index = countries )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# collect number of cars
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
transport_data [ " number cars " ] = idees [ " passenger cars " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# CH from http://ec.europa.eu/eurostat/statistics-explained/index.php/Passenger_cars_in_the_EU#Luxembourg_has_the_highest_number_of_passenger_cars_per_inhabitant
transport_data . at [ " CH " , " number cars " ] = 4.136e6
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = transport_data . index [ transport_data [ " number cars " ] . isna ( ) ]
2023-02-23 09:30:32 +00:00
logger . info (
f " Missing data on cars from: \n { list ( missing ) } \n Filling gaps with averaged data. "
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
cars_pp = transport_data [ " number cars " ] / population
transport_data . loc [ missing , " number cars " ] = cars_pp . mean ( ) * population
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
# collect average fuel efficiency in kWh/km
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
transport_data [ " average fuel efficiency " ] = idees [ " passenger car efficiency " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
missing = transport_data . index [ transport_data [ " average fuel efficiency " ] . isna ( ) ]
2023-02-23 09:30:32 +00:00
logger . info (
f " Missing data on fuel efficiency from: \n { list ( missing ) } \n Filling gapswith averaged data. "
)
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
fill_values = transport_data [ " average fuel efficiency " ] . mean ( )
transport_data . loc [ missing , " average fuel efficiency " ] = fill_values
2019-04-16 14:03:51 +00:00
return transport_data
if __name__ == " __main__ " :
if " snakemake " not in globals ( ) :
2021-07-01 18:09:04 +00:00
from helper import mock_snakemake
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
snakemake = mock_snakemake ( " build_energy_totals " )
2023-03-06 08:27:45 +00:00
2023-03-06 13:27:15 +00:00
logging . basicConfig ( level = snakemake . config [ " logging " ] [ " level " ] )
2023-02-23 09:30:32 +00:00
2021-07-01 18:09:04 +00:00
config = snakemake . config [ " energy " ]
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
nuts3 = gpd . read_file ( snakemake . input . nuts3_shapes ) . set_index ( " index " )
population = nuts3 [ " pop " ] . groupby ( nuts3 . country ) . sum ( )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
countries = population . index
idees_countries = countries . intersection ( eu28 )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
data_year = config [ " energy_totals_year " ]
2022-08-01 13:21:11 +00:00
report_year = snakemake . config [ " energy " ] [ " eurostat_report_year " ]
input_eurostat = snakemake . input . eurostat
eurostat = build_eurostat ( input_eurostat , countries , report_year , data_year )
2021-07-01 18:09:04 +00:00
swiss = build_swiss ( data_year )
idees = build_idees ( idees_countries , data_year )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
energy = build_energy_totals ( countries , eurostat , swiss , idees )
energy . to_csv ( snakemake . output . energy_name )
2019-04-16 14:03:51 +00:00
2021-07-01 18:09:04 +00:00
base_year_emissions = config [ " base_emissions_year " ]
2022-08-01 13:21:11 +00:00
emissions_scope = snakemake . config [ " energy " ] [ " emissions " ]
eea_co2 = build_eea_co2 ( snakemake . input . co2 , base_year_emissions , emissions_scope )
eurostat_co2 = build_eurostat_co2 (
input_eurostat , countries , report_year , base_year_emissions
)
2021-07-01 18:09:04 +00:00
co2 = build_co2_totals ( countries , eea_co2 , eurostat_co2 )
2020-12-29 10:31:00 +00:00
co2 . to_csv ( snakemake . output . co2_name )
2021-07-01 18:09:04 +00:00
transport = build_transport_data ( countries , population , idees )
transport . to_csv ( snakemake . output . transport_name )