build_energy_totals: pre-process data for all years first [incomplete]

This commit is contained in:
Fabian Neumann 2024-03-06 20:42:45 +01:00
parent 7abbb47efd
commit a8682db4fc

View File

@ -69,38 +69,58 @@ to_ipcc = {
} }
def build_eurostat(input_eurostat, countries, year): def eurostat_per_country(input_eurostat, country):
filename = (
f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
)
sheet = pd.read_excel(
filename,
engine="pyxlsb",
sheet_name=None,
skiprows=4,
index_col=list(range(4)),
)
sheet.pop("Cover")
return pd.concat(sheet)
def build_eurostat(input_eurostat, countries):
""" """
Return multi-index for all countries' energy data in TWh/a. Return multi-index for all countries' energy data in TWh/a.
""" """
df = {}
countries = {idees_rename.get(country, country) for country in countries} - {"CH"} countries = {idees_rename.get(country, country) for country in countries} - {"CH"}
for country in countries:
filename = ( nprocesses = snakemake.threads
f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb" disable_progress = snakemake.config["run"].get("disable_progressbar", False)
)
sheet = pd.read_excel( func = partial(eurostat_per_country, input_eurostat)
filename, tqdm_kwargs = dict(
engine="pyxlsb", ascii=False,
sheet_name=str(year), unit=" country",
skiprows=4, total=len(countries),
index_col=list(range(4)), desc="Build from eurostat database",
) disable=disable_progress,
df[country] = sheet )
df = pd.concat(df, axis=0) with mute_print():
with mp.Pool(processes=nprocesses) as pool:
dfs = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
index_names = ["country", "year", "lvl1", "lvl2", "lvl3", "lvl4"]
df = pd.concat(dfs, keys=countries, names=index_names)
df.index = df.index.set_levels(df.index.levels[1].astype(int), level=1)
# drop columns with all NaNs # drop columns with all NaNs
unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")] unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")]
df.drop(unnamed_cols, axis=1, inplace=True) df.drop(unnamed_cols, axis=1, inplace=True)
df.drop(year, axis=1, inplace=True) df.drop(list(range(1990, 2022)), axis=1, inplace=True, errors="ignore")
# make numeric values where possible # make numeric values where possible
df.replace("Z", 0, inplace=True) df.replace("Z", 0, inplace=True)
df = df.apply(pd.to_numeric, errors="coerce") df = df.apply(pd.to_numeric, errors="coerce")
df = df.select_dtypes(include=[np.number]) df = df.select_dtypes(include=[np.number])
# write 'International aviation' to the 2nd level of the multiindex # write 'International aviation' to the lower level of the multiindex
int_avia = df.index.get_level_values(2) == "International aviation" int_avia = df.index.get_level_values(3) == "International aviation"
temp = df.loc[int_avia] temp = df.loc[int_avia]
temp.index = pd.MultiIndex.from_frame( temp.index = pd.MultiIndex.from_frame(
temp.index.to_frame().fillna("International aviation") temp.index.to_frame().fillna("International aviation")
@ -113,11 +133,11 @@ def build_eurostat(input_eurostat, countries, year):
"Commercial & public services": "Services", "Commercial & public services": "Services",
"Domestic navigation": "Domestic Navigation", "Domestic navigation": "Domestic Navigation",
"International maritime bunkers": "Bunkers", "International maritime bunkers": "Bunkers",
"UK": "GB",
} }
columns_rename = {"Total": "Total all products", "UK": "GB"} columns_rename = {"Total": "Total all products"}
df.rename(index=index_rename, columns=columns_rename, inplace=True) df.rename(index=index_rename, columns=columns_rename, inplace=True)
df.sort_index(inplace=True) df.sort_index(inplace=True)
df.index.names = [None] * len(df.index.names)
# convert to TWh/a from ktoe/a # convert to TWh/a from ktoe/a
df *= 11.63 / 1e3 df *= 11.63 / 1e3
@ -125,13 +145,21 @@ def build_eurostat(input_eurostat, countries, year):
return df return df
def build_swiss(year): def build_swiss():
""" """
Return a pd.Series of Swiss energy data in TWh/a. Return a pd.DataFrame of Swiss energy data in TWh/a.
""" """
fn = snakemake.input.swiss fn = snakemake.input.swiss
df = pd.read_csv(fn, index_col=[0, 1]).loc["CH", str(year)] df = pd.read_csv(fn, index_col=[0, 1])
df.columns = df.columns.astype(int)
df.columns.name = "year"
df = df.stack().unstack("item")
df.columns.name = None
# convert PJ/a to TWh/a # convert PJ/a to TWh/a
df /= 3.6 df /= 3.6
@ -139,35 +167,35 @@ def build_swiss(year):
return df return df
def idees_per_country(ct, year, base_dir): def idees_per_country(ct, base_dir):
ct_idees = idees_rename.get(ct, ct) ct_idees = idees_rename.get(ct, ct)
fn_residential = f"{base_dir}/JRC-IDEES-2015_Residential_{ct_idees}.xlsx" fn_residential = f"{base_dir}/JRC-IDEES-2015_Residential_{ct_idees}.xlsx"
fn_tertiary = f"{base_dir}/JRC-IDEES-2015_Tertiary_{ct_idees}.xlsx" fn_tertiary = f"{base_dir}/JRC-IDEES-2015_Tertiary_{ct_idees}.xlsx"
fn_transport = f"{base_dir}/JRC-IDEES-2015_Transport_{ct_idees}.xlsx" fn_transport = f"{base_dir}/JRC-IDEES-2015_Transport_{ct_idees}.xlsx"
ct_totals = {}
# residential # residential
df = pd.read_excel(fn_residential, "RES_hh_fec", index_col=0)[year] df = pd.read_excel(fn_residential, "RES_hh_fec", index_col=0)
rows = ["Advanced electric heating", "Conventional electric heating"] rows = ["Advanced electric heating", "Conventional electric heating"]
ct_totals = { ct_totals["electricity residential space"] = df.loc[rows].sum()
"total residential space": df["Space heating"], ct_totals["total residential space"] = df.loc["Space heating"]
"electricity residential space": df[rows].sum(), ct_totals["total residential water"] = df.loc["Water heating"]
}
ct_totals["total residential water"] = df.at["Water heating"]
assert df.index[23] == "Electricity" assert df.index[23] == "Electricity"
ct_totals["electricity residential water"] = df.iloc[23] ct_totals["electricity residential water"] = df.iloc[23]
ct_totals["total residential cooking"] = df["Cooking"] ct_totals["total residential cooking"] = df.loc["Cooking"]
assert df.index[30] == "Electricity" assert df.index[30] == "Electricity"
ct_totals["electricity residential cooking"] = df.iloc[30] ct_totals["electricity residential cooking"] = df.iloc[30]
df = pd.read_excel(fn_residential, "RES_summary", index_col=0)[year] df = pd.read_excel(fn_residential, "RES_summary", index_col=0)
row = "Energy consumption by fuel - Eurostat structure (ktoe)" row = "Energy consumption by fuel - Eurostat structure (ktoe)"
ct_totals["total residential"] = df[row] ct_totals["total residential"] = df.loc[row]
assert df.index[47] == "Electricity" assert df.index[47] == "Electricity"
ct_totals["electricity residential"] = df.iloc[47] ct_totals["electricity residential"] = df.iloc[47]
@ -180,27 +208,27 @@ def idees_per_country(ct, year, base_dir):
# services # services
df = pd.read_excel(fn_tertiary, "SER_hh_fec", index_col=0)[year] df = pd.read_excel(fn_tertiary, "SER_hh_fec", index_col=0)
ct_totals["total services space"] = df["Space heating"] ct_totals["total services space"] = df.loc["Space heating"]
rows = ["Advanced electric heating", "Conventional electric heating"] rows = ["Advanced electric heating", "Conventional electric heating"]
ct_totals["electricity services space"] = df[rows].sum() ct_totals["electricity services space"] = df.loc[rows].sum()
ct_totals["total services water"] = df["Hot water"] ct_totals["total services water"] = df.loc["Hot water"]
assert df.index[24] == "Electricity" assert df.index[24] == "Electricity"
ct_totals["electricity services water"] = df.iloc[24] ct_totals["electricity services water"] = df.iloc[24]
ct_totals["total services cooking"] = df["Catering"] ct_totals["total services cooking"] = df.loc["Catering"]
assert df.index[31] == "Electricity" assert df.index[31] == "Electricity"
ct_totals["electricity services cooking"] = df.iloc[31] ct_totals["electricity services cooking"] = df.iloc[31]
df = pd.read_excel(fn_tertiary, "SER_summary", index_col=0)[year] df = pd.read_excel(fn_tertiary, "SER_summary", index_col=0)
row = "Energy consumption by fuel - Eurostat structure (ktoe)" row = "Energy consumption by fuel - Eurostat structure (ktoe)"
ct_totals["total services"] = df[row] ct_totals["total services"] = df.loc[row]
assert df.index[50] == "Electricity" assert df.index[50] == "Electricity"
ct_totals["electricity services"] = df.iloc[50] ct_totals["electricity services"] = df.iloc[50]
@ -216,7 +244,7 @@ def idees_per_country(ct, year, base_dir):
start = "Detailed split of energy consumption (ktoe)" start = "Detailed split of energy consumption (ktoe)"
end = "Market shares of energy uses (%)" end = "Market shares of energy uses (%)"
df = pd.read_excel(fn_tertiary, "AGR_fec", index_col=0).loc[start:end, year] df = pd.read_excel(fn_tertiary, "AGR_fec", index_col=0).loc[start:end]
rows = [ rows = [
"Lighting", "Lighting",
@ -224,30 +252,30 @@ def idees_per_country(ct, year, base_dir):
"Specific electricity uses", "Specific electricity uses",
"Pumping devices (electric)", "Pumping devices (electric)",
] ]
ct_totals["total agriculture electricity"] = df[rows].sum() ct_totals["total agriculture electricity"] = df.loc[rows].sum()
rows = ["Specific heat uses", "Low enthalpy heat"] rows = ["Specific heat uses", "Low enthalpy heat"]
ct_totals["total agriculture heat"] = df[rows].sum() ct_totals["total agriculture heat"] = df.loc[rows].sum()
rows = [ rows = [
"Motor drives", "Motor drives",
"Farming machine drives (diesel oil incl. biofuels)", "Farming machine drives (diesel oil incl. biofuels)",
"Pumping devices (diesel oil incl. biofuels)", "Pumping devices (diesel oil incl. biofuels)",
] ]
ct_totals["total agriculture machinery"] = df[rows].sum() ct_totals["total agriculture machinery"] = df.loc[rows].sum()
row = "Agriculture, forestry and fishing" row = "Agriculture, forestry and fishing"
ct_totals["total agriculture"] = df[row] ct_totals["total agriculture"] = df.loc[row]
# transport # transport
df = pd.read_excel(fn_transport, "TrRoad_ene", index_col=0)[year] df = pd.read_excel(fn_transport, "TrRoad_ene", index_col=0)
ct_totals["total road"] = df["by fuel (EUROSTAT DATA)"] ct_totals["total road"] = df.loc["by fuel (EUROSTAT DATA)"]
ct_totals["electricity road"] = df["Electricity"] ct_totals["electricity road"] = df.loc["Electricity"]
ct_totals["total two-wheel"] = df["Powered 2-wheelers (Gasoline)"] ct_totals["total two-wheel"] = df.loc["Powered 2-wheelers (Gasoline)"]
assert df.index[19] == "Passenger cars" assert df.index[19] == "Passenger cars"
ct_totals["total passenger cars"] = df.iloc[19] ct_totals["total passenger cars"] = df.iloc[19]
@ -268,16 +296,16 @@ def idees_per_country(ct, year, base_dir):
ct_totals["electricity light duty road freight"] = df.iloc[49] ct_totals["electricity light duty road freight"] = df.iloc[49]
row = "Heavy duty vehicles (Diesel oil incl. biofuels)" row = "Heavy duty vehicles (Diesel oil incl. biofuels)"
ct_totals["total heavy duty road freight"] = df[row] ct_totals["total heavy duty road freight"] = df.loc[row]
assert df.index[61] == "Passenger cars" assert df.index[61] == "Passenger cars"
ct_totals["passenger car efficiency"] = df.iloc[61] ct_totals["passenger car efficiency"] = df.iloc[61]
df = pd.read_excel(fn_transport, "TrRail_ene", index_col=0)[year] df = pd.read_excel(fn_transport, "TrRail_ene", index_col=0)
ct_totals["total rail"] = df["by fuel (EUROSTAT DATA)"] ct_totals["total rail"] = df.loc["by fuel (EUROSTAT DATA)"]
ct_totals["electricity rail"] = df["Electricity"] ct_totals["electricity rail"] = df.loc["Electricity"]
assert df.index[15] == "Passenger transport" assert df.index[15] == "Passenger transport"
ct_totals["total rail passenger"] = df.iloc[15] ct_totals["total rail passenger"] = df.iloc[15]
@ -293,7 +321,7 @@ def idees_per_country(ct, year, base_dir):
assert df.index[23] == "Electric" assert df.index[23] == "Electric"
ct_totals["electricity rail freight"] = df.iloc[23] ct_totals["electricity rail freight"] = df.iloc[23]
df = pd.read_excel(fn_transport, "TrAvia_ene", index_col=0)[year] df = pd.read_excel(fn_transport, "TrAvia_ene", index_col=0)
assert df.index[6] == "Passenger transport" assert df.index[6] == "Passenger transport"
ct_totals["total aviation passenger"] = df.iloc[6] ct_totals["total aviation passenger"] = df.iloc[6]
@ -324,24 +352,24 @@ def idees_per_country(ct, year, base_dir):
+ ct_totals["total international aviation passenger"] + ct_totals["total international aviation passenger"]
) )
df = pd.read_excel(fn_transport, "TrNavi_ene", index_col=0)[year] df = pd.read_excel(fn_transport, "TrNavi_ene", index_col=0)
# coastal and inland # coastal and inland
ct_totals["total domestic navigation"] = df["by fuel (EUROSTAT DATA)"] ct_totals["total domestic navigation"] = df.loc["by fuel (EUROSTAT DATA)"]
df = pd.read_excel(fn_transport, "TrRoad_act", index_col=0)[year] df = pd.read_excel(fn_transport, "TrRoad_act", index_col=0)
assert df.index[85] == "Passenger cars" assert df.index[85] == "Passenger cars"
ct_totals["passenger cars"] = df.iloc[85] ct_totals["passenger cars"] = df.iloc[85]
return pd.Series(ct_totals, name=ct) return pd.DataFrame(ct_totals)
def build_idees(countries, year): def build_idees(countries):
nprocesses = snakemake.threads nprocesses = snakemake.threads
disable_progress = snakemake.config["run"].get("disable_progressbar", False) disable_progress = snakemake.config["run"].get("disable_progressbar", False)
func = partial(idees_per_country, year=year, base_dir=snakemake.input.idees) func = partial(idees_per_country, base_dir=snakemake.input.idees)
tqdm_kwargs = dict( tqdm_kwargs = dict(
ascii=False, ascii=False,
unit=" country", unit=" country",
@ -353,53 +381,67 @@ def build_idees(countries, year):
with mp.Pool(processes=nprocesses) as pool: with mp.Pool(processes=nprocesses) as pool:
totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs)) totals_list = list(tqdm(pool.imap(func, countries), **tqdm_kwargs))
totals = pd.concat(totals_list, axis=1) totals = pd.concat(
totals_list,
keys=countries,
names=["country", "year"],
)
# convert ktoe to TWh # convert ktoe to TWh
exclude = totals.index.str.fullmatch("passenger cars") exclude = totals.columns.str.fullmatch("passenger cars")
totals.loc[~exclude] *= 11.63 / 1e3 totals.loc[:, ~exclude] *= 11.63 / 1e3
# convert TWh/100km to kWh/km # convert TWh/100km to kWh/km
totals.loc["passenger car efficiency"] *= 10 totals.loc[:, "passenger car efficiency"] *= 10
return totals.T return totals
def build_energy_totals(countries, eurostat, swiss, idees): def build_energy_totals(countries, eurostat, swiss, idees):
eurostat_fuels = {"electricity": "Electricity", "total": "Total all products"} eurostat_fuels = {"electricity": "Electricity", "total": "Total all products"}
eurostat_countries = eurostat.index.levels[0]
eurostat_years = eurostat.index.levels[1]
to_drop = ["passenger cars", "passenger car efficiency"] to_drop = ["passenger cars", "passenger car efficiency"]
df = idees.reindex(countries).drop(to_drop, axis=1) new_index = pd.MultiIndex.from_product(
[countries, eurostat_years], names=["country", "year"]
)
eurostat_countries = eurostat.index.levels[0] df = idees.reindex(new_index).drop(to_drop, axis=1)
in_eurostat = df.index.intersection(eurostat_countries)
in_eurostat = df.index.levels[0].intersection(eurostat_countries)
# add international navigation # add international navigation
slicer = idx[in_eurostat, :, "Bunkers", :] slicer = idx[in_eurostat, :, :, "Bunkers", :]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum() fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
df.loc[in_eurostat, "total international navigation"] = fill_values df.loc[in_eurostat, "total international navigation"] = fill_values
# add swiss energy data # add swiss energy data
df.loc["CH"] = swiss df = pd.concat([df.drop("CH"), swiss]).sort_index()
# get values for missing countries based on Eurostat EnergyBalances # get values for missing countries based on Eurostat EnergyBalances
# divide cooking/space/water according to averages in EU28 # divide cooking/space/water according to averages in EU28
missing = df.index[df["total residential"].isna()]
to_fill = missing.intersection(eurostat_countries)
uses = ["space", "cooking", "water"] uses = ["space", "cooking", "water"]
to_fill = df.index[
df["total residential"].isna()
& df.index.get_level_values("country").isin(eurostat_countries)
]
c = to_fill.get_level_values("country")
y = to_fill.get_level_values("year")
for sector in ["residential", "services", "road", "rail"]: for sector in ["residential", "services", "road", "rail"]:
eurostat_sector = sector.capitalize() eurostat_sector = sector.capitalize()
# fuel use # fuel use
for fuel in ["electricity", "total"]: for fuel in ["electricity", "total"]:
slicer = idx[to_fill, :, :, eurostat_sector] slicer = idx[c, y, :, :, eurostat_sector]
fill_values = ( fill_values = (
eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=0).sum() eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=[0, 1]).sum()
) )
df.loc[to_fill, f"{fuel} {sector}"] = fill_values df.loc[to_fill, f"{fuel} {sector}"] = fill_values
@ -461,25 +503,27 @@ def build_energy_totals(countries, eurostat, swiss, idees):
no_norway[f"total {sector}"] - no_norway[f"electricity {sector}"] no_norway[f"total {sector}"] - no_norway[f"electricity {sector}"]
) )
fraction = nonelectric_use.div(nonelectric).mean() fraction = nonelectric_use.div(nonelectric).mean()
df.loc["NO", f"total {sector} {use}"] = total_heating * fraction df.loc["NO", f"total {sector} {use}"] = (
total_heating * fraction
).values
df.loc["NO", f"electricity {sector} {use}"] = ( df.loc["NO", f"electricity {sector} {use}"] = (
total_heating * fraction * elec_fraction total_heating * fraction * elec_fraction
) ).values
# Missing aviation # Missing aviation
slicer = idx[to_fill, :, :, "Domestic aviation"] slicer = idx[c, y, :, :, "Domestic aviation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum() fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
df.loc[to_fill, "total domestic aviation"] = fill_values df.loc[to_fill, "total domestic aviation"] = fill_values
slicer = idx[to_fill, :, :, "International aviation"] slicer = idx[c, y, :, :, "International aviation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum() fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
df.loc[to_fill, "total international aviation"] = fill_values df.loc[to_fill, "total international aviation"] = fill_values
# missing domestic navigation # missing domestic navigation
slicer = idx[to_fill, :, :, "Domestic Navigation"] slicer = idx[c, y, :, :, "Domestic Navigation"]
fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=0).sum() fill_values = eurostat.loc[slicer, "Total all products"].groupby(level=[0, 1]).sum()
df.loc[to_fill, "total domestic navigation"] = fill_values df.loc[to_fill, "total domestic navigation"] = fill_values
# split road traffic for non-IDEES # split road traffic for non-IDEES
@ -532,9 +576,11 @@ def build_energy_totals(countries, eurostat, swiss, idees):
if "BA" in df.index: if "BA" in df.index:
# fill missing data for BA (services and road energy data) # fill missing data for BA (services and road energy data)
# proportional to RS with ratio of total residential demand # proportional to RS with ratio of total residential demand
missing = df.loc["BA"] == 0.0 mean_BA = df.loc["BA"].loc[2014:2021, "total residential"].mean()
ratio = df.at["BA", "total residential"] / df.at["RS", "total residential"] mean_RS = df.loc["RS"].loc[2014:2021, "total residential"].mean()
df.loc["BA", missing] = ratio * df.loc["RS", missing] ratio = mean_BA / mean_RS
df.loc["BA"] = df.loc["BA"].replace(0.0, np.nan).values
df.loc["BA"] = df.loc["BA"].combine_first(ratio * df.loc["RS"]).values
return df return df
@ -550,7 +596,7 @@ def build_district_heat_share(countries, idees):
district_heat_share = district_heat / total_heat district_heat_share = district_heat / total_heat
district_heat_share = district_heat_share.reindex(countries) district_heat_share = district_heat_share.reindex(countries, level="country")
# Missing district heating share # Missing district heating share
dh_share = ( dh_share = (
@ -578,8 +624,6 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
index_col = ["Country_code", "Pollutant_name", "Year", "Sector_name"] index_col = ["Country_code", "Pollutant_name", "Year", "Sector_name"]
df = df.set_index(index_col).sort_index() df = df.set_index(index_col).sort_index()
emissions_scope = emissions_scope
cts = ["CH", "EUA", "NO"] + eu28_eea cts = ["CH", "EUA", "NO"] + eu28_eea
slicer = idx[cts, emissions_scope, year, to_ipcc.values()] slicer = idx[cts, emissions_scope, year, to_ipcc.values()]
@ -622,8 +666,8 @@ def build_eea_co2(input_co2, year=1990, emissions_scope="CO2"):
return emissions / 1e3 return emissions / 1e3
def build_eurostat_co2(input_eurostat, countries, year=1990): def build_eurostat_co2(eurostat, year=1990):
eurostat = build_eurostat(input_eurostat, countries, year) eurostat_year = eurostat.xs(year, level="year")
specific_emissions = pd.Series(index=eurostat.columns, dtype=float) specific_emissions = pd.Series(index=eurostat.columns, dtype=float)
@ -637,7 +681,7 @@ def build_eurostat_co2(input_eurostat, countries, year=1990):
# Residual oil (No. 6) 0.298 # Residual oil (No. 6) 0.298
# https://www.eia.gov/electricity/annual/html/epa_a_03.html # https://www.eia.gov/electricity/annual/html/epa_a_03.html
return eurostat.multiply(specific_emissions).sum(axis=1) return eurostat_year.multiply(specific_emissions).sum(axis=1)
def build_co2_totals(countries, eea_co2, eurostat_co2): def build_co2_totals(countries, eea_co2, eurostat_co2):
@ -704,7 +748,9 @@ def build_transport_data(countries, population, idees):
def rescale_idees_from_eurostat( def rescale_idees_from_eurostat(
idees_countries, energy, eurostat, input_eurostat, countries idees_countries,
energy,
eurostat,
): ):
""" """
Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat
@ -714,11 +760,10 @@ def rescale_idees_from_eurostat(
""" """
main_cols = ["Total all products", "Electricity"] main_cols = ["Total all products", "Electricity"]
# read in the eurostat data for 2015 # read in the eurostat data for 2015
eurostat_2015 = build_eurostat(input_eurostat, countries, 2015)[main_cols] eurostat_2015 = eurostat.xs(2015, level="year")[main_cols]
eurostat_year = eurostat[main_cols]
# calculate the ratio of the two data sets # calculate the ratio of the two data sets
ratio = eurostat_year / eurostat_2015 ratio = eurostat[main_cols] / eurostat_2015
ratio = ratio.droplevel([1, 4]) ratio = ratio.droplevel([2, 5])
cols_rename = {"Total all products": "total", "Electricity": "ele"} cols_rename = {"Total all products": "total", "Electricity": "ele"}
index_rename = {v: k for k, v in idees_rename.items()} index_rename = {v: k for k, v in idees_rename.items()}
ratio.rename(columns=cols_rename, index=index_rename, inplace=True) ratio.rename(columns=cols_rename, index=index_rename, inplace=True)
@ -811,19 +856,45 @@ def rescale_idees_from_eurostat(
] ]
for country in idees_countries: for country in idees_countries:
slicer_target = idx[country, 2016:2021, :, :]
slicer_source = idx[country, 2015, :, :]
for sector, mapping in mappings.items(): for sector, mapping in mappings.items():
sector_ratio = ratio.loc[(country, slice(None), sector)] sector_ratio = ratio.loc[(country, slice(None), slice(None), sector)].droplevel("lvl2")
energy.loc[country, mapping["total"]] *= sector_ratio["total"].iloc[0] energy.loc[slicer_target, mapping["total"]] = cartesian(
energy.loc[country, mapping["elec"]] *= sector_ratio["ele"].iloc[0] sector_ratio.loc[2016:2021, "total"],
energy.loc[slicer_source, mapping["total"]].squeeze()
).values
energy.loc[slicer_target, mapping["elec"]] = cartesian(
sector_ratio.loc[2016:2021, "ele"],
energy.loc[slicer_source, mapping["elec"]].squeeze()
).values
avi_d = ratio.loc[(country, slice(None), "Domestic aviation"), "total"] level_drops = ["country", "lvl2", "lvl3"]
avi_i = ratio.loc[(country, "International aviation", slice(None)), "total"]
energy.loc[country, avia_inter] *= avi_i.iloc[0]
energy.loc[country, avia_domestic] *= avi_d.iloc[0]
nav = ratio.loc[(country, slice(None), "Domestic Navigation"), "total"] slicer = idx[country, :, :, "Domestic aviation"]
energy.loc[country, navigation] *= nav.iloc[0] avi_d = ratio.loc[slicer, "total"].droplevel(level_drops)
slicer = idx[country, :, :, "International aviation"]
avi_i = ratio.loc[slicer, "total"].droplevel(level_drops)
slicer = idx[country, :, :, "Domestic Navigation"]
nav = ratio.loc[slicer, "total"].droplevel(level_drops)
energy.loc[slicer_target, avia_inter] = cartesian(
avi_i.loc[2016:2021],
energy.loc[slicer_source, avia_inter].squeeze()
).values
energy.loc[slicer_target, avia_domestic] = cartesian(
avi_d.loc[2016:2021],
energy.loc[slicer_source, avia_domestic].squeeze()
).values
energy.loc[slicer_target, navigation] = cartesian(
nav.loc[2016:2021],
energy.loc[slicer_source, navigation]
).values
return energy return energy
@ -845,20 +916,18 @@ if __name__ == "__main__":
countries = snakemake.params.countries countries = snakemake.params.countries
idees_countries = pd.Index(countries).intersection(eu28) idees_countries = pd.Index(countries).intersection(eu28)
data_year = params["energy_totals_year"]
input_eurostat = snakemake.input.eurostat input_eurostat = snakemake.input.eurostat
eurostat = build_eurostat(input_eurostat, countries, data_year) eurostat = build_eurostat(input_eurostat, countries)
swiss = build_swiss(data_year) swiss = build_swiss()
# data from idees only exists from 2000-2015. read in latest data and rescale later idees = build_idees(idees_countries)
idees = build_idees(idees_countries, min(2015, data_year))
energy = build_energy_totals(countries, eurostat, swiss, idees) energy = build_energy_totals(countries, eurostat, swiss, idees)
if data_year > 2015: # Data from IDEES only exists from 2000-2015.
logger.info("Data year is after 2015. Rescaling IDEES data based on eurostat.") logger.info("Extrapolate IDEES data based on eurostat for years 2015-2021.")
energy = rescale_idees_from_eurostat( energy = rescale_idees_from_eurostat(
idees_countries, energy, eurostat, input_eurostat, countries idees_countries, energy, eurostat
) )
energy.to_csv(snakemake.output.energy_name) energy.to_csv(snakemake.output.energy_name)
@ -871,7 +940,7 @@ if __name__ == "__main__":
base_year_emissions = params["base_emissions_year"] base_year_emissions = params["base_emissions_year"]
emissions_scope = snakemake.params.energy["emissions"] emissions_scope = snakemake.params.energy["emissions"]
eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope) eea_co2 = build_eea_co2(snakemake.input.co2, base_year_emissions, emissions_scope)
eurostat_co2 = build_eurostat_co2(input_eurostat, countries, base_year_emissions) eurostat_co2 = build_eurostat_co2(eurostat, base_year_emissions)
co2 = build_co2_totals(countries, eea_co2, eurostat_co2) co2 = build_co2_totals(countries, eea_co2, eurostat_co2)
co2.to_csv(snakemake.output.co2_name) co2.to_csv(snakemake.output.co2_name)