pypsa-eur/scripts/build_energy_totals.py
martavp 5db196f92a build_eea_co2() now reads version23 of UNFCCC inventory.
**This requires updating to UNFCCC_v23.csv in the data bundle**

This enables that the same function is used to read emissions in 2018, which are assumed to remain constant in 2019 and 2020 and subtracted from carbon budget (estimated from 2018 on).

I checked and 1990 emissions calculated with UNFCCC_v23 are very similar to those calculated with UNFCCC_v21 (<1% differences in all values at EU level).
Some countries show higher deviations, mainly in domestic aviation and navigation. I guess because those values started to be reported later and v23 should include more accurate values.
2020-12-29 11:31:00 +01:00

567 lines
22 KiB
Python

import pandas as pd
import geopandas as gpd
idx = pd.IndexSlice
#translations for Eurostat
country_to_code = {
'EU28' : 'EU',
'EA19' : 'EA',
'Belgium' : 'BE',
'Bulgaria' : 'BG',
'Czech Republic' : 'CZ',
'Denmark' : 'DK',
'Germany' : 'DE',
'Estonia' : 'EE',
'Ireland' : 'IE',
'Greece' : 'GR',
'Spain' : 'ES',
'France' : 'FR',
'Croatia' : 'HR',
'Italy' : 'IT',
'Cyprus' : 'CY',
'Latvia' : 'LV',
'Lithuania' : 'LT',
'Luxembourg' : 'LU',
'Hungary' : 'HU',
'Malta' : 'MA',
'Netherlands' : 'NL',
'Austria' : 'AT',
'Poland' : 'PL',
'Portugal' : 'PT',
'Romania' : 'RO',
'Slovenia' : 'SI',
'Slovakia' : 'SK',
'Finland' : 'FI',
'Sweden' : 'SE',
'United Kingdom' : 'GB',
'Iceland' : 'IS',
'Norway' : 'NO',
'Montenegro' : 'ME',
'FYR of Macedonia' : 'MK',
'Albania' : 'AL',
'Serbia' : 'RS',
'Turkey' : 'TU',
'Bosnia and Herzegovina' : 'BA',
'Kosovo\n(UNSCR 1244/99)' : 'KO', #2017 version
'Kosovo\n(under United Nations Security Council Resolution 1244/99)' : 'KO', #2016 version
'Moldova' : 'MO',
'Ukraine' : 'UK',
'Switzerland' : 'CH',
}
non_EU = ['NO', 'CH', 'ME', 'MK', 'RS', 'BA', 'AL']
rename = {"GR" : "EL",
"GB" : "UK"}
eu28 = ['FR', 'DE', 'GB', 'IT', 'ES', 'PL', 'SE', 'NL', 'BE', 'FI', 'CZ',
'DK', 'PT', 'RO', 'AT', 'BG', 'EE', 'GR', 'LV',
'HU', 'IE', 'SK', 'LT', 'HR', 'LU', 'SI'] + ['CY','MT']
eu28_eea = eu28[:]
eu28_eea.remove("GB")
eu28_eea.append("UK")
def build_eurostat(year):
"""Return multi-index for all countries' energy data in TWh/a."""
stats_from_year = 2016
fns = {2016: "data/eurostat-energy_balances-june_2016_edition/{year}-Energy-Balances-June2016edition.xlsx",
2017: "data/eurostat-energy_balances-june_2017_edition/{year}-ENERGY-BALANCES-June2017edition.xlsx"}
#2016 includes BA, 2017 doesn't
#with sheet as None, an ordered dictionary of all sheets is returned
dfs = pd.read_excel(fns[stats_from_year].format(year=year),
None,
skiprows=1,
index_col=list(range(4)))
#sorted_index necessary for slicing
df = pd.concat({country_to_code[df.columns[0]] : df for ct,df in dfs.items()},sort=True).sort_index()
#drop non-numeric columns; convert ktoe/a to TWh/a
return df.drop(df.columns[df.dtypes != float],axis=1)*11.63/1e3
def build_swiss(year):
fn = "data/switzerland-sfoe/switzerland-new_format.csv"
#convert PJ/a to TWh/a
return (pd.read_csv(fn,index_col=list(range(2)))/3.6).loc["CH",str(year)]
def build_idees(year):
base_dir = "data/jrc-idees-2015"
totals = pd.DataFrame()
#convert ktoe/a to TWh/a
factor = 11.63/1e3
for ct in population.index:
if ct in non_EU:
print("When reading IDEES, skipping non-EU28 country",ct)
continue
#RESIDENTIAL
filename = "{}/JRC-IDEES-2015_Residential_{}.xlsx".format(base_dir,rename.get(ct,ct))
df = pd.read_excel(filename,"RES_hh_fec")
assert df.iloc[2,0] == "Space heating"
totals.loc[ct,"total residential space"] = df.loc[2,year]
assert df.iloc[10,0] == "Advanced electric heating"
assert df.iloc[11,0] == "Conventional electric heating"
totals.loc[ct,"electricity residential space"] = df.loc[[10,11],year].sum()
assert df.iloc[15,0] == "Water heating"
totals.loc[ct,"total residential water"] = df.loc[15,year]
assert df.iloc[23,0] == "Electricity"
totals.loc[ct,"electricity residential water"] = df.loc[23,year]
assert df.iloc[25,0] == "Cooking"
totals.loc[ct,"total residential cooking"] = df.loc[25,year]
assert df.iloc[30,0] == "Electricity"
totals.loc[ct,"electricity residential cooking"] = df.loc[30,year]
df = pd.read_excel(filename,"RES_summary")
assert df.iloc[34,0] == "Energy consumption by fuel - Eurostat structure (ktoe)"
totals.loc[ct,"total residential"] = df.loc[34,year]
assert df.iloc[47,0] == "Electricity"
totals.loc[ct,"electricity residential"] = df.loc[47,year]
#SERVICES
filename = "{}/JRC-IDEES-2015_Tertiary_{}.xlsx".format(base_dir,rename.get(ct,ct))
df = pd.read_excel(filename,"SER_hh_fec")
assert df.iloc[2,0] == "Space heating"
totals.loc[ct,"total services space"] = df.loc[2,year]
assert df.iloc[11,0] == "Advanced electric heating"
assert df.iloc[12,0] == "Conventional electric heating"
totals.loc[ct,"electricity services space"] = df.loc[[11,12],year].sum()
assert df.iloc[17,0] == "Hot water"
totals.loc[ct,"total services water"] = df.loc[17,year]
assert df.iloc[24,0] == "Electricity"
totals.loc[ct,"electricity services water"] = df.loc[24,year]
assert df.iloc[27,0] == "Catering"
totals.loc[ct,"total services cooking"] = df.loc[27,year]
assert df.iloc[31,0] == "Electricity"
totals.loc[ct,"electricity services cooking"] = df.loc[31,year]
df = pd.read_excel(filename,"SER_summary")
assert df.iloc[37,0] == "Energy consumption by fuel - Eurostat structure (ktoe)"
totals.loc[ct,"total services"] = df.loc[37,year]
assert df.iloc[50,0] == "Electricity"
totals.loc[ct,"electricity services"] = df.loc[50,year]
# TRANSPORT
filename = "{}/JRC-IDEES-2015_Transport_{}.xlsx".format(base_dir,rename.get(ct,ct))
df = pd.read_excel(filename,"TrRoad_ene")
assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
totals.loc[ct,"total road"] = df.loc[2,year]
assert df.iloc[13,0] == "Electricity"
totals.loc[ct,"electricity road"] = df.loc[13,year]
assert df.iloc[17,0] == "Powered 2-wheelers (Gasoline)"
totals.loc[ct,"total two-wheel"] = df.loc[17,year]
assert df.iloc[19,0] == "Passenger cars"
totals.loc[ct,"total passenger cars"] = df.loc[19,year]
assert df.iloc[30,0] == "Battery electric vehicles"
totals.loc[ct,"electricity passenger cars"] = df.loc[30,year]
assert df.iloc[31,0] == "Motor coaches, buses and trolley buses"
totals.loc[ct,"total other road passenger"] = df.loc[31,year]
assert df.iloc[39,0] == "Battery electric vehicles"
totals.loc[ct,"electricity other road passenger"] = df.loc[39,year]
assert df.iloc[41,0] == "Light duty vehicles"
totals.loc[ct,"total light duty road freight"] = df.loc[41,year]
assert df.iloc[49,0] == "Battery electric vehicles"
totals.loc[ct,"electricity light duty road freight"] = df.loc[49,year]
assert df.iloc[50,0] == "Heavy duty vehicles (Diesel oil incl. biofuels)"
totals.loc[ct,"total heavy duty road freight"] = df.loc[50,year]
assert df.iloc[61,0] == "Passenger cars"
totals.loc[ct,"passenger car efficiency"] = df.loc[61,year]
df = pd.read_excel(filename,"TrRail_ene")
assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
totals.loc[ct,"total rail"] = df.loc[2,year]
assert df.iloc[12,0] == "Electricity"
totals.loc[ct,"electricity rail"] = df.loc[12,year]
assert df.iloc[15,0] == "Passenger transport"
totals.loc[ct,"total rail passenger"] = df.loc[15,year]
assert df.iloc[16,0] == "Metro and tram, urban light rail"
assert df.iloc[19,0] == "Electric"
assert df.iloc[20,0] == "High speed passenger trains"
totals.loc[ct,"electricity rail passenger"] = df.loc[[16,19,20],year].sum()
assert df.iloc[21,0] == "Freight transport"
totals.loc[ct,"total rail freight"] = df.loc[21,year]
assert df.iloc[23,0] == "Electric"
totals.loc[ct,"electricity rail freight"] = df.loc[23,year]
df = pd.read_excel(filename,"TrAvia_ene")
assert df.iloc[6,0] == "Passenger transport"
totals.loc[ct,"total aviation passenger"] = df.loc[6,year]
assert df.iloc[10,0] == "Freight transport"
totals.loc[ct,"total aviation freight"] = df.loc[10,year]
assert df.iloc[7,0] == "Domestic"
totals.loc[ct,"total domestic aviation passenger"] = df.loc[7,year]
assert df.iloc[8,0] == "International - Intra-EU"
assert df.iloc[9,0] == "International - Extra-EU"
totals.loc[ct,"total international aviation passenger"] = df.loc[[8,9],year].sum()
assert df.iloc[11,0] == "Domestic and International - Intra-EU"
totals.loc[ct,"total domestic aviation freight"] = df.loc[11,year]
assert df.iloc[12,0] == "International - Extra-EU"
totals.loc[ct,"total international aviation freight"] = df.loc[12,year]
totals.loc[ct,"total domestic aviation"] = totals.loc[ct,["total domestic aviation freight","total domestic aviation passenger"]].sum()
totals.loc[ct,"total international aviation"] = totals.loc[ct,["total international aviation freight","total international aviation passenger"]].sum()
df = pd.read_excel(filename,"TrNavi_ene")
#coastal and inland
assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)"
totals.loc[ct,"total domestic navigation"] = df.loc[2,year]
df = pd.read_excel(filename,"TrRoad_act")
assert df.iloc[85,0] == "Passenger cars"
totals.loc[ct,"passenger cars"] = df.loc[85,year]
totals = totals*factor
totals["passenger cars"] = totals["passenger cars"]/factor
#convert ktoe/100km to kWh per km
totals["passenger car efficiency"] = 10*totals["passenger car efficiency"]
return totals
def build_energy_totals():
clean_df = idees.reindex(population.index).drop(["passenger cars","passenger car efficiency"],axis=1)
print("International navigation")
in_eurostat = clean_df.index.intersection(eurostat.index.levels[0])
clean_df.loc[in_eurostat,"total international navigation"] = eurostat.loc[idx[in_eurostat,:,"Bunkers",:],"Total all products"].groupby(level=0).sum()
clean_df.loc["CH"] = swiss
#get values for missing countries based on Eurostat EnergyBalances
#divide cooking/space/water according to averages in EU28
missing = clean_df.index[clean_df["total residential"].isnull()]
missing_in_eurostat = missing.intersection(eurostat.index.levels[0])
uses = ["space","cooking","water"]
for sector,eurostat_sector in [("residential","Residential"),("services","Services"),
("road","Road"),("rail","Rail")]:
for fuel,eurostat_fuel in [("electricity","Electricity"),("total","Total all products")]:
clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)] = eurostat.loc[idx[missing_in_eurostat,:,:,eurostat_sector],eurostat_fuel].groupby(level=0).sum()
if sector in ["road","rail"]:
continue
fuel = "electricity"
for use in uses:
avg = (clean_df["{} {} {}".format(fuel,sector,use)]/clean_df["{} {}".format(fuel,sector)]).mean()
print("{}: average fraction of {} for {} is {}".format(sector,fuel,use,avg))
clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = avg*clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)]
fuel = "total"
for use in uses:
avg = ((clean_df["{} {} {}".format("total",sector,use)]-clean_df["{} {} {}".format("electricity",sector,use)])/
(clean_df["{} {}".format("total",sector)]-clean_df["{} {}".format("electricity",sector)])).mean()
print("{}: average fraction of non-electric for {} is {}".format(sector,use,avg))
clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = \
clean_df.loc[missing_in_eurostat,"{} {} {}".format("electricity",sector,use)] \
+ avg*(clean_df.loc[missing_in_eurostat,"{} {}".format("total",sector)] - clean_df.loc[missing_in_eurostat,"{} {}".format("electricity",sector)])
#Fix Norway space and water heating fractions
#http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14
#The main heating source for about 73 per cent of the households is based on electricity
#=> 26% is non-electric
elec_fraction = 0.73
without_norway = clean_df.drop("NO")
for sector in ["residential","services"]:
#assume non-electric is heating
total_heating = (clean_df.loc["NO","{} {}".format("total",sector)]-clean_df.loc["NO","{} {}".format("electricity",sector)])/(1-elec_fraction)
for use in uses:
fraction = ((without_norway["{} {} {}".format("total",sector,use)]-without_norway["{} {} {}".format("electricity",sector,use)])/
(without_norway["{} {}".format("total",sector)]-without_norway["{} {}".format("electricity",sector)])).mean()
clean_df.loc["NO","{} {} {}".format("total",sector,use)] = total_heating*fraction
clean_df.loc["NO","{} {} {}".format("electricity",sector,use)] = total_heating*fraction*elec_fraction
#Missing aviation
print("Aviation")
clean_df.loc[missing_in_eurostat,"total domestic aviation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"Domestic aviation"],"Total all products"].groupby(level=0).sum()
clean_df.loc[missing_in_eurostat,"total international aviation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"International aviation"],"Total all products"].groupby(level=0).sum()
print("Domestic navigation")
clean_df.loc[missing_in_eurostat,"total domestic navigation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"Domestic Navigation"],"Total all products"].groupby(level=0).sum()
#split road traffic for non-IDEES
missing = clean_df.index[clean_df["total passenger cars"].isnull()]
for fuel in ["total","electricity"]:
selection = [fuel+" passenger cars",fuel+" other road passenger",fuel+" light duty road freight"]
if fuel == "total":
selection = [fuel+" two-wheel"] + selection + [fuel+" heavy duty road freight"]
road = clean_df[selection].sum()
road_fraction = road/road.sum()
for i in road_fraction.index:
clean_df.loc[missing,i] = road_fraction[i]*clean_df.loc[missing,fuel+" road"]
#split rail traffic for non-IDEES
missing = clean_df.index[clean_df["total rail passenger"].isnull()]
for fuel in ["total","electricity"]:
selection = [fuel+" rail passenger",fuel+" rail freight"]
rail = clean_df[selection].sum()
rail_fraction = rail/rail.sum()
for i in rail_fraction.index:
clean_df.loc[missing,i] = rail_fraction[i]*clean_df.loc[missing,fuel+" rail"].values
#split aviation traffic for non-IDEES
missing = clean_df.index[clean_df["total domestic aviation passenger"].isnull()]
for destination in ["domestic","international"]:
selection = ["total " + destination+" aviation passenger","total " + destination+" aviation freight"]
aviation = clean_df[selection].sum()
aviation_fraction = aviation/aviation.sum()
for i in aviation_fraction.index:
clean_df.loc[missing,i] = aviation_fraction[i]*clean_df.loc[missing,"total "+ destination + " aviation"].values
clean_df.loc[missing,"total aviation passenger"] = clean_df.loc[missing,["total domestic aviation passenger","total international aviation passenger"]].sum(axis=1)
clean_df.loc[missing,"total aviation freight"] = clean_df.loc[missing,["total domestic aviation freight","total international aviation freight"]].sum(axis=1)
if "BA" in clean_df.index:
#fix missing data for BA (services and road energy data)
missing = (clean_df.loc["BA"] == 0.)
#add back in proportional to RS with ratio of total residential demand
clean_df.loc["BA",missing] = clean_df.loc["BA","total residential"]/clean_df.loc["RS","total residential"]*clean_df.loc["RS",missing]
clean_df.to_csv(snakemake.output.energy_name)
return clean_df
def build_eea_co2(year=1990):
# see ../notebooks/compute_1990_Europe_emissions_for_targets.ipynb
#https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-16
#downloaded 201228 (modified by EEA last on 201221)
fn = "data/eea/UNFCCC_v23.csv"
df = pd.read_csv(fn, encoding="latin-1")
df.loc[df["Year"] == "1985-1987","Year"] = 1986
df["Year"] = df["Year"].astype(int)
df = df.set_index(['Country_code', 'Pollutant_name', 'Year', 'Sector_name']).sort_index()
e = pd.Series()
e["electricity"] = '1.A.1.a - Public Electricity and Heat Production'
e['residential non-elec'] = '1.A.4.b - Residential'
e['services non-elec'] = '1.A.4.a - Commercial/Institutional'
e['rail non-elec'] = "1.A.3.c - Railways"
e["road non-elec"] = '1.A.3.b - Road Transportation'
e["domestic navigation"] = "1.A.3.d - Domestic Navigation"
e['international navigation'] = '1.D.1.b - International Navigation'
e["domestic aviation"] = '1.A.3.a - Domestic Aviation'
e["international aviation"] = '1.D.1.a - International Aviation'
e['total energy'] = '1 - Energy'
e['industrial processes'] = '2 - Industrial Processes and Product Use'
e['agriculture'] = '3 - Agriculture'
e['LULUCF'] = '4 - Land Use, Land-Use Change and Forestry'
e['waste management'] = '5 - Waste management'
e['other'] = '6 - Other Sector'
e['indirect'] = 'ind_CO2 - Indirect CO2'
e["total wL"] = "Total (with LULUCF)"
e["total woL"] = "Total (without LULUCF)"
pol = "CO2" #["All greenhouse gases - (CO2 equivalent)","CO2"]
cts = ["CH","EUA","NO"] + eu28_eea
emissions = df.loc[idx[cts,pol,year,e.values],"emissions"].unstack("Sector_name").rename(columns=pd.Series(e.index,e.values)).rename(index={"All greenhouse gases - (CO2 equivalent)" : "GHG"},level=1)
#only take level 0, since level 1 (pol) and level 2 (year) are trivial
emissions = emissions.groupby(level=0,axis=0).sum()
emissions.rename(index={"EUA" : "EU28", "UK" : "GB"},inplace=True)
emissions['industrial non-elec'] = emissions['total energy'] - emissions[['electricity', 'services non-elec','residential non-elec', 'road non-elec',
'rail non-elec', 'domestic aviation', 'international aviation', 'domestic navigation',
'international navigation']].sum(axis=1)
emissions.drop(columns=["total energy", "total wL", "total woL"],inplace=True)
return emissions/1e3
def build_eurostat_co2(year=1990):
eurostat_for_co2 = build_eurostat(year)
se = pd.Series(index=eurostat_for_co2.columns,dtype=float)
#emissions in tCO2_equiv per MWh_th
se["Solid fuels"] = 0.36 #Approximates coal
se["Oil (total)"] = 0.285 #Average of distillate and residue
se["Gas"] = 0.2 #For natural gas
#oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11
#Distillate oil (No. 2) 0.276
#Residual oil (No. 6) 0.298
#https://www.eia.gov/electricity/annual/html/epa_a_03.html
eurostat_co2 = eurostat_for_co2.multiply(se).sum(axis=1)
return eurostat_co2
def build_co2_totals(eea_co2, eurostat_co2, year=1990):
co2 = eea_co2.reindex(["EU28","NO","CH","BA","RS","AL","ME","MK"] + eu28)
for ct in ["BA","RS","AL","ME","MK"]:
co2.loc[ct,"electricity"] = eurostat_co2[ct,"+","Conventional Thermal Power Stations","of which From Coal"].sum()
co2.loc[ct,"residential non-elec"] = eurostat_co2[ct,"+","+","Residential"].sum()
co2.loc[ct,"services non-elec"] = eurostat_co2[ct,"+","+","Services"].sum()
co2.loc[ct,"road non-elec"] = eurostat_co2[ct,"+","+","Road"].sum()
co2.loc[ct,"rail non-elec"] = eurostat_co2[ct,"+","+","Rail"].sum()
co2.loc[ct,"domestic navigation"] = eurostat_co2[ct,"+","+","Domestic Navigation"].sum()
co2.loc[ct,'international navigation'] = eurostat_co2[ct,"-","Bunkers"].sum()
co2.loc[ct,"domestic aviation"] = eurostat_co2[ct,"+","+","Domestic aviation"].sum()
co2.loc[ct,"international aviation"] = eurostat_co2[ct,"+","+","International aviation"].sum()
#doesn't include industrial process emissions or fuel processing/refining
co2.loc[ct,'industrial non-elec'] = eurostat_co2[ct,"+","Industry"].sum()
#doesn't include non-energy emissions
co2.loc[ct,'agriculture'] = eurostat_co2[ct,"+","+","Agriculture / Forestry"].sum()
return co2
def build_transport_data():
transport_data = pd.DataFrame(columns=["number cars","average fuel efficiency"],
index=population.index)
## collect number of cars
transport_data["number cars"] = idees["passenger cars"]
#CH from http://ec.europa.eu/eurostat/statistics-explained/index.php/Passenger_cars_in_the_EU#Luxembourg_has_the_highest_number_of_passenger_cars_per_inhabitant
transport_data.loc["CH","number cars"] = 4.136e6
missing = transport_data.index[transport_data["number cars"].isnull()]
print("Missing data on cars from:")
print(missing)
cars_pp = transport_data["number cars"]/population
transport_data.loc[missing,"number cars"] = cars_pp.mean()*population
## collect average fuel efficiency in kWh/km
transport_data["average fuel efficiency"] = idees["passenger car efficiency"]
missing = transport_data.index[transport_data["average fuel efficiency"].isnull()]
print("Missing data on fuel efficiency from:")
print(missing)
transport_data.loc[missing,"average fuel efficiency"] = transport_data["average fuel efficiency"].mean()
transport_data.to_csv(snakemake.output.transport_name)
return transport_data
if __name__ == "__main__":
# Detect running outside of snakemake and mock snakemake for testing
if 'snakemake' not in globals():
from vresutils import Dict
snakemake = Dict()
snakemake.output = Dict()
snakemake.output['energy_name'] = "data/energy_totals.csv"
snakemake.output['co2_name'] = "data/co2_totals.csv"
snakemake.output['transport_name'] = "data/transport_data.csv"
snakemake.input = Dict()
snakemake.input['nuts3_shapes'] = '../pypsa-eur/resources/nuts3_shapes.geojson'
nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index('index')
population = nuts3['pop'].groupby(nuts3.country).sum()
year = 2011
eurostat = build_eurostat(year)
swiss = build_swiss(year)
idees = build_idees(year)
build_energy_totals()
eea_co2 = build_eea_co2()
eurostat_co2 = build_eurostat_co2()
co2=build_co2_totals(eea_co2, eurostat_co2, year)
co2.to_csv(snakemake.output.co2_name)
build_transport_data()