import pandas as pd import geopandas as gpd idx = pd.IndexSlice #translations for Eurostat country_to_code = { 'EU28' : 'EU', 'EA19' : 'EA', 'Belgium' : 'BE', 'Bulgaria' : 'BG', 'Czech Republic' : 'CZ', 'Denmark' : 'DK', 'Germany' : 'DE', 'Estonia' : 'EE', 'Ireland' : 'IE', 'Greece' : 'GR', 'Spain' : 'ES', 'France' : 'FR', 'Croatia' : 'HR', 'Italy' : 'IT', 'Cyprus' : 'CY', 'Latvia' : 'LV', 'Lithuania' : 'LT', 'Luxembourg' : 'LU', 'Hungary' : 'HU', 'Malta' : 'MA', 'Netherlands' : 'NL', 'Austria' : 'AT', 'Poland' : 'PL', 'Portugal' : 'PT', 'Romania' : 'RO', 'Slovenia' : 'SI', 'Slovakia' : 'SK', 'Finland' : 'FI', 'Sweden' : 'SE', 'United Kingdom' : 'GB', 'Iceland' : 'IS', 'Norway' : 'NO', 'Montenegro' : 'ME', 'FYR of Macedonia' : 'MK', 'Albania' : 'AL', 'Serbia' : 'RS', 'Turkey' : 'TU', 'Bosnia and Herzegovina' : 'BA', 'Kosovo\n(UNSCR 1244/99)' : 'KO', #2017 version 'Kosovo\n(under United Nations Security Council Resolution 1244/99)' : 'KO', #2016 version 'Moldova' : 'MO', 'Ukraine' : 'UK', 'Switzerland' : 'CH', } non_EU = ['NO', 'CH', 'ME', 'MK', 'RS', 'BA', 'AL'] rename = {"GR" : "EL", "GB" : "UK"} eu28 = ['FR', 'DE', 'GB', 'IT', 'ES', 'PL', 'SE', 'NL', 'BE', 'FI', 'CZ', 'DK', 'PT', 'RO', 'AT', 'BG', 'EE', 'GR', 'LV', 'HU', 'IE', 'SK', 'LT', 'HR', 'LU', 'SI'] + ['CY','MT'] eu28_eea = eu28[:] eu28_eea.remove("GB") eu28_eea.append("UK") def build_eurostat(year): """Return multi-index for all countries' energy data in TWh/a.""" stats_from_year = 2016 fns = {2016: "data/eurostat-energy_balances-june_2016_edition/{year}-Energy-Balances-June2016edition.xlsx", 2017: "data/eurostat-energy_balances-june_2017_edition/{year}-ENERGY-BALANCES-June2017edition.xlsx"} #2016 includes BA, 2017 doesn't #with sheet as None, an ordered dictionary of all sheets is returned dfs = pd.read_excel(fns[stats_from_year].format(year=year), None, skiprows=1, index_col=list(range(4))) #sorted_index necessary for slicing df = pd.concat({country_to_code[df.columns[0]] : df for ct,df in dfs.items()},sort=True).sort_index() #drop non-numeric columns; convert ktoe/a to TWh/a return df.drop(df.columns[df.dtypes != float],axis=1)*11.63/1e3 def build_swiss(year): fn = "data/switzerland-sfoe/switzerland-new_format.csv" #convert PJ/a to TWh/a return (pd.read_csv(fn,index_col=list(range(2)))/3.6).loc["CH",str(year)] def build_idees(year): base_dir = "data/jrc-idees-2015" totals = pd.DataFrame() #convert ktoe/a to TWh/a factor = 11.63/1e3 for ct in population.index: if ct in non_EU: print("When reading IDEES, skipping non-EU28 country",ct) continue #RESIDENTIAL filename = "{}/JRC-IDEES-2015_Residential_{}.xlsx".format(base_dir,rename.get(ct,ct)) df = pd.read_excel(filename,"RES_hh_fec") assert df.iloc[2,0] == "Space heating" totals.loc[ct,"total residential space"] = df.loc[2,year] assert df.iloc[10,0] == "Advanced electric heating" assert df.iloc[11,0] == "Conventional electric heating" totals.loc[ct,"electricity residential space"] = df.loc[[10,11],year].sum() assert df.iloc[15,0] == "Water heating" totals.loc[ct,"total residential water"] = df.loc[15,year] assert df.iloc[23,0] == "Electricity" totals.loc[ct,"electricity residential water"] = df.loc[23,year] assert df.iloc[25,0] == "Cooking" totals.loc[ct,"total residential cooking"] = df.loc[25,year] assert df.iloc[30,0] == "Electricity" totals.loc[ct,"electricity residential cooking"] = df.loc[30,year] df = pd.read_excel(filename,"RES_summary") assert df.iloc[34,0] == "Energy consumption by fuel - Eurostat structure (ktoe)" totals.loc[ct,"total residential"] = df.loc[34,year] assert df.iloc[47,0] == "Electricity" totals.loc[ct,"electricity residential"] = df.loc[47,year] #SERVICES filename = "{}/JRC-IDEES-2015_Tertiary_{}.xlsx".format(base_dir,rename.get(ct,ct)) df = pd.read_excel(filename,"SER_hh_fec") assert df.iloc[2,0] == "Space heating" totals.loc[ct,"total services space"] = df.loc[2,year] assert df.iloc[11,0] == "Advanced electric heating" assert df.iloc[12,0] == "Conventional electric heating" totals.loc[ct,"electricity services space"] = df.loc[[11,12],year].sum() assert df.iloc[17,0] == "Hot water" totals.loc[ct,"total services water"] = df.loc[17,year] assert df.iloc[24,0] == "Electricity" totals.loc[ct,"electricity services water"] = df.loc[24,year] assert df.iloc[27,0] == "Catering" totals.loc[ct,"total services cooking"] = df.loc[27,year] assert df.iloc[31,0] == "Electricity" totals.loc[ct,"electricity services cooking"] = df.loc[31,year] df = pd.read_excel(filename,"SER_summary") assert df.iloc[37,0] == "Energy consumption by fuel - Eurostat structure (ktoe)" totals.loc[ct,"total services"] = df.loc[37,year] assert df.iloc[50,0] == "Electricity" totals.loc[ct,"electricity services"] = df.loc[50,year] # TRANSPORT filename = "{}/JRC-IDEES-2015_Transport_{}.xlsx".format(base_dir,rename.get(ct,ct)) df = pd.read_excel(filename,"TrRoad_ene") assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)" totals.loc[ct,"total road"] = df.loc[2,year] assert df.iloc[13,0] == "Electricity" totals.loc[ct,"electricity road"] = df.loc[13,year] assert df.iloc[17,0] == "Powered 2-wheelers (Gasoline)" totals.loc[ct,"total two-wheel"] = df.loc[17,year] assert df.iloc[19,0] == "Passenger cars" totals.loc[ct,"total passenger cars"] = df.loc[19,year] assert df.iloc[30,0] == "Battery electric vehicles" totals.loc[ct,"electricity passenger cars"] = df.loc[30,year] assert df.iloc[31,0] == "Motor coaches, buses and trolley buses" totals.loc[ct,"total other road passenger"] = df.loc[31,year] assert df.iloc[39,0] == "Battery electric vehicles" totals.loc[ct,"electricity other road passenger"] = df.loc[39,year] assert df.iloc[41,0] == "Light duty vehicles" totals.loc[ct,"total light duty road freight"] = df.loc[41,year] assert df.iloc[49,0] == "Battery electric vehicles" totals.loc[ct,"electricity light duty road freight"] = df.loc[49,year] assert df.iloc[50,0] == "Heavy duty vehicles (Diesel oil incl. biofuels)" totals.loc[ct,"total heavy duty road freight"] = df.loc[50,year] assert df.iloc[61,0] == "Passenger cars" totals.loc[ct,"passenger car efficiency"] = df.loc[61,year] df = pd.read_excel(filename,"TrRail_ene") assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)" totals.loc[ct,"total rail"] = df.loc[2,year] assert df.iloc[12,0] == "Electricity" totals.loc[ct,"electricity rail"] = df.loc[12,year] assert df.iloc[15,0] == "Passenger transport" totals.loc[ct,"total rail passenger"] = df.loc[15,year] assert df.iloc[16,0] == "Metro and tram, urban light rail" assert df.iloc[19,0] == "Electric" assert df.iloc[20,0] == "High speed passenger trains" totals.loc[ct,"electricity rail passenger"] = df.loc[[16,19,20],year].sum() assert df.iloc[21,0] == "Freight transport" totals.loc[ct,"total rail freight"] = df.loc[21,year] assert df.iloc[23,0] == "Electric" totals.loc[ct,"electricity rail freight"] = df.loc[23,year] df = pd.read_excel(filename,"TrAvia_ene") assert df.iloc[6,0] == "Passenger transport" totals.loc[ct,"total aviation passenger"] = df.loc[6,year] assert df.iloc[10,0] == "Freight transport" totals.loc[ct,"total aviation freight"] = df.loc[10,year] assert df.iloc[7,0] == "Domestic" totals.loc[ct,"total domestic aviation passenger"] = df.loc[7,year] assert df.iloc[8,0] == "International - Intra-EU" assert df.iloc[9,0] == "International - Extra-EU" totals.loc[ct,"total international aviation passenger"] = df.loc[[8,9],year].sum() assert df.iloc[11,0] == "Domestic and International - Intra-EU" totals.loc[ct,"total domestic aviation freight"] = df.loc[11,year] assert df.iloc[12,0] == "International - Extra-EU" totals.loc[ct,"total international aviation freight"] = df.loc[12,year] totals.loc[ct,"total domestic aviation"] = totals.loc[ct,["total domestic aviation freight","total domestic aviation passenger"]].sum() totals.loc[ct,"total international aviation"] = totals.loc[ct,["total international aviation freight","total international aviation passenger"]].sum() df = pd.read_excel(filename,"TrNavi_ene") #coastal and inland assert df.iloc[2,0] == "by fuel (EUROSTAT DATA)" totals.loc[ct,"total domestic navigation"] = df.loc[2,year] df = pd.read_excel(filename,"TrRoad_act") assert df.iloc[85,0] == "Passenger cars" totals.loc[ct,"passenger cars"] = df.loc[85,year] totals = totals*factor totals["passenger cars"] = totals["passenger cars"]/factor #convert ktoe/100km to kWh per km totals["passenger car efficiency"] = 10*totals["passenger car efficiency"] return totals def build_energy_totals(): clean_df = idees.reindex(population.index).drop(["passenger cars","passenger car efficiency"],axis=1) print("International navigation") in_eurostat = clean_df.index.intersection(eurostat.index.levels[0]) clean_df.loc[in_eurostat,"total international navigation"] = eurostat.loc[idx[in_eurostat,:,"Bunkers",:],"Total all products"].groupby(level=0).sum() clean_df.loc["CH"] = swiss #get values for missing countries based on Eurostat EnergyBalances #divide cooking/space/water according to averages in EU28 missing = clean_df.index[clean_df["total residential"].isnull()] missing_in_eurostat = missing.intersection(eurostat.index.levels[0]) uses = ["space","cooking","water"] for sector,eurostat_sector in [("residential","Residential"),("services","Services"), ("road","Road"),("rail","Rail")]: for fuel,eurostat_fuel in [("electricity","Electricity"),("total","Total all products")]: clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)] = eurostat.loc[idx[missing_in_eurostat,:,:,eurostat_sector],eurostat_fuel].groupby(level=0).sum() if sector in ["road","rail"]: continue fuel = "electricity" for use in uses: avg = (clean_df["{} {} {}".format(fuel,sector,use)]/clean_df["{} {}".format(fuel,sector)]).mean() print("{}: average fraction of {} for {} is {}".format(sector,fuel,use,avg)) clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = avg*clean_df.loc[missing_in_eurostat,"{} {}".format(fuel,sector)] fuel = "total" for use in uses: avg = ((clean_df["{} {} {}".format("total",sector,use)]-clean_df["{} {} {}".format("electricity",sector,use)])/ (clean_df["{} {}".format("total",sector)]-clean_df["{} {}".format("electricity",sector)])).mean() print("{}: average fraction of non-electric for {} is {}".format(sector,use,avg)) clean_df.loc[missing_in_eurostat,"{} {} {}".format(fuel,sector,use)] = \ clean_df.loc[missing_in_eurostat,"{} {} {}".format("electricity",sector,use)] \ + avg*(clean_df.loc[missing_in_eurostat,"{} {}".format("total",sector)] - clean_df.loc[missing_in_eurostat,"{} {}".format("electricity",sector)]) #Fix Norway space and water heating fractions #http://www.ssb.no/en/energi-og-industri/statistikker/husenergi/hvert-3-aar/2014-07-14 #The main heating source for about 73 per cent of the households is based on electricity #=> 26% is non-electric elec_fraction = 0.73 without_norway = clean_df.drop("NO") for sector in ["residential","services"]: #assume non-electric is heating total_heating = (clean_df.loc["NO","{} {}".format("total",sector)]-clean_df.loc["NO","{} {}".format("electricity",sector)])/(1-elec_fraction) for use in uses: fraction = ((without_norway["{} {} {}".format("total",sector,use)]-without_norway["{} {} {}".format("electricity",sector,use)])/ (without_norway["{} {}".format("total",sector)]-without_norway["{} {}".format("electricity",sector)])).mean() clean_df.loc["NO","{} {} {}".format("total",sector,use)] = total_heating*fraction clean_df.loc["NO","{} {} {}".format("electricity",sector,use)] = total_heating*fraction*elec_fraction #Missing aviation print("Aviation") clean_df.loc[missing_in_eurostat,"total domestic aviation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"Domestic aviation"],"Total all products"].groupby(level=0).sum() clean_df.loc[missing_in_eurostat,"total international aviation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"International aviation"],"Total all products"].groupby(level=0).sum() print("Domestic navigation") clean_df.loc[missing_in_eurostat,"total domestic navigation"] = eurostat.loc[idx[missing_in_eurostat,:,:,"Domestic Navigation"],"Total all products"].groupby(level=0).sum() #split road traffic for non-IDEES missing = clean_df.index[clean_df["total passenger cars"].isnull()] for fuel in ["total","electricity"]: selection = [fuel+" passenger cars",fuel+" other road passenger",fuel+" light duty road freight"] if fuel == "total": selection = [fuel+" two-wheel"] + selection + [fuel+" heavy duty road freight"] road = clean_df[selection].sum() road_fraction = road/road.sum() for i in road_fraction.index: clean_df.loc[missing,i] = road_fraction[i]*clean_df.loc[missing,fuel+" road"] #split rail traffic for non-IDEES missing = clean_df.index[clean_df["total rail passenger"].isnull()] for fuel in ["total","electricity"]: selection = [fuel+" rail passenger",fuel+" rail freight"] rail = clean_df[selection].sum() rail_fraction = rail/rail.sum() for i in rail_fraction.index: clean_df.loc[missing,i] = rail_fraction[i]*clean_df.loc[missing,fuel+" rail"].values #split aviation traffic for non-IDEES missing = clean_df.index[clean_df["total domestic aviation passenger"].isnull()] for destination in ["domestic","international"]: selection = ["total " + destination+" aviation passenger","total " + destination+" aviation freight"] aviation = clean_df[selection].sum() aviation_fraction = aviation/aviation.sum() for i in aviation_fraction.index: clean_df.loc[missing,i] = aviation_fraction[i]*clean_df.loc[missing,"total "+ destination + " aviation"].values clean_df.loc[missing,"total aviation passenger"] = clean_df.loc[missing,["total domestic aviation passenger","total international aviation passenger"]].sum(axis=1) clean_df.loc[missing,"total aviation freight"] = clean_df.loc[missing,["total domestic aviation freight","total international aviation freight"]].sum(axis=1) #fix missing data for BA (services and road energy data) missing = (clean_df.loc["BA"] == 0.) #add back in proportional to RS with ratio of total residential demand clean_df.loc["BA",missing] = clean_df.loc["BA","total residential"]/clean_df.loc["RS","total residential"]*clean_df.loc["RS",missing] clean_df.to_csv(snakemake.output.energy_name) return clean_df def build_eea_co2(): # see ../notebooks/compute_1990_Europe_emissions_for_targets.ipynb #https://www.eea.europa.eu/data-and-maps/data/national-emissions-reported-to-the-unfccc-and-to-the-eu-greenhouse-gas-monitoring-mechanism-14 #downloaded 190222 (modified by EEA last on 181130) fn = "data/eea/UNFCCC_v21.csv" df = pd.read_csv(fn, encoding="latin-1") df.loc[df["Year"] == "1985-1987","Year"] = 1986 df["Year"] = df["Year"].astype(int) df = df.set_index(['Country_code', 'Pollutant_name', 'Year', 'Sector_name']).sort_index() e = pd.Series() e["electricity"] = '1.A.1.a - Public Electricity and Heat Production' e['residential non-elec'] = '1.A.4.b - Residential' e['services non-elec'] = '1.A.4.a - Commercial/Institutional' e['rail non-elec'] = "1.A.3.c - Railways" e["road non-elec"] = '1.A.3.b - Road Transportation' e["domestic navigation"] = "1.A.3.d - Domestic Navigation" e['international navigation'] = '1.D.1.b - International Navigation' e["domestic aviation"] = '1.A.3.a - Domestic Aviation' e["international aviation"] = '1.D.1.a - International Aviation' e['total energy'] = '1 - Energy' e['industrial processes'] = '2 - Industrial Processes and Product Use' e['agriculture'] = '3 - Agriculture' e['LULUCF'] = '4 - Land Use, Land-Use Change and Forestry' e['waste management'] = '5 - Waste management' e['other'] = '6 - Other Sector' e['indirect'] = 'ind_CO2 - Indirect CO2' e["total wL"] = "Total (with LULUCF, with indirect CO2)" e["total woL"] = "Total (without LULUCF, with indirect CO2)" pol = "CO2" #["All greenhouse gases - (CO2 equivalent)","CO2"] cts = ["CH","EUA","NO"] + eu28_eea year = 1990 emissions = df.loc[idx[cts,pol,year,e.values],"emissions"].unstack("Sector_name").rename(columns=pd.Series(e.index,e.values)).rename(index={"All greenhouse gases - (CO2 equivalent)" : "GHG"},level=1) #only take level 0, since level 1 (pol) and level 2 (year) are trivial emissions = emissions.groupby(level=0,axis=0).sum() emissions.rename(index={"EUA" : "EU28", "UK" : "GB"},inplace=True) emissions['industrial non-elec'] = emissions['total energy'] - emissions[['electricity', 'services non-elec','residential non-elec', 'road non-elec', 'rail non-elec', 'domestic aviation', 'international aviation', 'domestic navigation', 'international navigation']].sum(axis=1) emissions.drop(columns=["total energy", "total wL", "total woL"],inplace=True) return emissions/1e3 def build_eurostat_co2(year=1990): eurostat_for_co2 = build_eurostat(year) se = pd.Series(index=eurostat_for_co2.columns,dtype=float) #emissions in tCO2_equiv per MWh_th se["Solid fuels"] = 0.36 #Approximates coal se["Oil (total)"] = 0.285 #Average of distillate and residue se["Gas"] = 0.2 #For natural gas #oil values from https://www.eia.gov/tools/faqs/faq.cfm?id=74&t=11 #Distillate oil (No. 2) 0.276 #Residual oil (No. 6) 0.298 #https://www.eia.gov/electricity/annual/html/epa_a_03.html eurostat_co2 = eurostat_for_co2.multiply(se).sum(axis=1) return eurostat_co2 def build_co2_totals(year=1990): co2 = eea_co2.reindex(["EU28","NO","CH","BA","RS","AL","ME","MK"] + eu28) for ct in ["BA","RS","AL","ME","MK"]: co2.loc[ct,"electricity"] = eurostat_co2[ct,"+","Conventional Thermal Power Stations","of which From Coal"].sum() co2.loc[ct,"residential non-elec"] = eurostat_co2[ct,"+","+","Residential"].sum() co2.loc[ct,"services non-elec"] = eurostat_co2[ct,"+","+","Services"].sum() co2.loc[ct,"road non-elec"] = eurostat_co2[ct,"+","+","Road"].sum() co2.loc[ct,"rail non-elec"] = eurostat_co2[ct,"+","+","Rail"].sum() co2.loc[ct,"domestic navigation"] = eurostat_co2[ct,"+","+","Domestic Navigation"].sum() co2.loc[ct,'international navigation'] = eurostat_co2[ct,"-","Bunkers"].sum() co2.loc[ct,"domestic aviation"] = eurostat_co2[ct,"+","+","Domestic aviation"].sum() co2.loc[ct,"international aviation"] = eurostat_co2[ct,"+","+","International aviation"].sum() #doesn't include industrial process emissions or fuel processing/refining co2.loc[ct,'industrial non-elec'] = eurostat_co2[ct,"+","Industry"].sum() #doesn't include non-energy emissions co2.loc[ct,'agriculture'] = eurostat_co2[ct,"+","+","Agriculture / Forestry"].sum() co2.to_csv(snakemake.output.co2_name) return co2 def build_transport_data(): transport_data = pd.DataFrame(columns=["number cars","average fuel efficiency"], index=population.index) ## collect number of cars transport_data["number cars"] = idees["passenger cars"] #CH from http://ec.europa.eu/eurostat/statistics-explained/index.php/Passenger_cars_in_the_EU#Luxembourg_has_the_highest_number_of_passenger_cars_per_inhabitant transport_data.loc["CH","number cars"] = 4.136e6 missing = transport_data.index[transport_data["number cars"].isnull()] print("Missing data on cars from:") print(missing) cars_pp = transport_data["number cars"]/population transport_data.loc[missing,"number cars"] = cars_pp.mean()*population ## collect average fuel efficiency in kWh/km transport_data["average fuel efficiency"] = idees["passenger car efficiency"] missing = transport_data.index[transport_data["average fuel efficiency"].isnull()] print("Missing data on fuel efficiency from:") print(missing) transport_data.loc[missing,"average fuel efficiency"] = transport_data["average fuel efficiency"].mean() transport_data.to_csv(snakemake.output.transport_name) return transport_data if __name__ == "__main__": # Detect running outside of snakemake and mock snakemake for testing if 'snakemake' not in globals(): from vresutils import Dict snakemake = Dict() snakemake.output = Dict() snakemake.output['energy_name'] = "data/energy_totals.csv" snakemake.output['co2_name'] = "data/co2_totals.csv" snakemake.output['transport_name'] = "data/transport_data.csv" snakemake.input = Dict() snakemake.input['nuts3_shapes'] = 'resources/nuts3_shapes.geojson' nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index('index') population = nuts3['pop'].groupby(nuts3.country).sum() year = 2011 eurostat = build_eurostat(year) swiss = build_swiss(year) idees = build_idees(year) build_energy_totals() eea_co2 = build_eea_co2() eurostat_co2 = build_eurostat_co2() build_co2_totals() build_transport_data()