build_energy_totals: revision of eurostat report upgrade

This commit is contained in:
Fabian Neumann 2024-03-05 18:43:24 +01:00
parent 5b513f81db
commit bf60da973b

View File

@ -37,54 +37,6 @@ def reverse(dictionary):
return {v: k for k, v in dictionary.items()} return {v: k for k, v in dictionary.items()}
eurostat_codes = {
"EU28": "EU",
"EA19": "EA",
"Belgium": "BE",
"Bulgaria": "BG",
"Czech Republic": "CZ",
"Denmark": "DK",
"Germany": "DE",
"Estonia": "EE",
"Ireland": "IE",
"Greece": "GR",
"Spain": "ES",
"France": "FR",
"Croatia": "HR",
"Italy": "IT",
"Cyprus": "CY",
"Latvia": "LV",
"Lithuania": "LT",
"Luxembourg": "LU",
"Hungary": "HU",
"Malta": "MA",
"Netherlands": "NL",
"Austria": "AT",
"Poland": "PL",
"Portugal": "PT",
"Romania": "RO",
"Slovenia": "SI",
"Slovakia": "SK",
"Finland": "FI",
"Sweden": "SE",
"United Kingdom": "GB",
"Iceland": "IS",
"Norway": "NO",
"Montenegro": "ME",
"FYR of Macedonia": "MK",
"Albania": "AL",
"Serbia": "RS",
"Turkey": "TU",
"Bosnia and Herzegovina": "BA",
"Kosovo\n(UNSCR 1244/99)": "KO", # 2017 version
# 2016 version
"Kosovo\n(under United Nations Security Council Resolution 1244/99)": "KO",
"Moldova": "MO",
"Ukraine": "UK",
"Switzerland": "CH",
}
idees_rename = {"GR": "EL", "GB": "UK"} idees_rename = {"GR": "EL", "GB": "UK"}
eu28 = cc.EU28as("ISO2").ISO2.tolist() eu28 = cc.EU28as("ISO2").ISO2.tolist()
@ -121,79 +73,54 @@ def build_eurostat(input_eurostat, countries, year):
""" """
Return multi-index for all countries' energy data in TWh/a. Return multi-index for all countries' energy data in TWh/a.
""" """
# read in every country file in countries df = {}
eurostat = pd.DataFrame() countries = {idees_rename.get(country, country) for country in countries} - {"CH"}
countries = [country if country != "GB" else "UK" for country in countries]
countries = [country if country != "GR" else "EL" for country in countries]
for country in countries: for country in countries:
filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb" filename = (
if os.path.exists(input_eurostat + filename): f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
df = pd.read_excel( )
input_eurostat + filename, sheet = pd.read_excel(
engine="pyxlsb", filename,
sheet_name=str(year), engine="pyxlsb",
skiprows=4, sheet_name=str(year),
index_col=list(range(4)), skiprows=4,
) index_col=list(range(4)),
# replace entry 'Z' with 0 )
df.replace("Z", 0, inplace=True) df[country] = sheet
# write 'International aviation' to the 2nd level of the multiindex df = pd.concat(df, axis=0)
index_number = (
df.index.get_level_values(1) == "International aviation" # drop columns with all NaNs
).argmax() unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")]
new_index = ( df.drop(unnamed_cols, axis=1, inplace=True)
"-", df.drop(year, axis=1, inplace=True)
"International aviation",
"International aviation", # make numeric values where possible
"ktoe", df.replace("Z", 0, inplace=True)
) df = df.apply(pd.to_numeric, errors="coerce")
modified_index = list(df.index) df = df.select_dtypes(include=[np.number])
modified_index[index_number] = new_index
df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names) # write 'International aviation' to the 2nd level of the multiindex
# drop the annoying subhead line int_avia = df.index.get_level_values(2) == "International aviation"
df.drop(df[df[year] == year].index, inplace=True) temp = df.loc[int_avia]
# replace 'Z' with 0 temp.index = pd.MultiIndex.from_frame(
df = df.replace("Z", 0) temp.index.to_frame().fillna("International aviation")
# add country to the multiindex )
new_tuple = [(country, *idx) for idx in df.index] df = pd.concat([temp, df.loc[~int_avia]])
new_mindex = pd.MultiIndex.from_tuples(
new_tuple, names=["country", None, "name", None, "unit"]
)
df.index = new_mindex
# make numeric values where possible
df = df.apply(pd.to_numeric, errors="coerce")
# drop non-numeric columns
non_numeric_cols = df.columns[df.dtypes != float]
df.drop(non_numeric_cols, axis=1, inplace=True)
# concatenate the dataframes
eurostat = pd.concat([eurostat, df], axis=0)
eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
# Renaming some indices # Renaming some indices
rename = { index_rename = {
"Households": "Residential", "Households": "Residential",
"Commercial & public services": "Services", "Commercial & public services": "Services",
"Domestic navigation": "Domestic Navigation", "Domestic navigation": "Domestic Navigation",
"International maritime bunkers": "Bunkers",
} }
for name, rename in rename.items(): columns_rename = {"Total": "Total all products", "UK": "GB"}
eurostat.index = eurostat.index.set_levels( df.rename(index=index_rename, columns=columns_rename, inplace=True)
eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename), df.sort_index(inplace=True)
level=3, df.index.names = [None] * len(df.index.names)
)
new_index = eurostat.index.set_levels(
eurostat.index.levels[2].where(
eurostat.index.levels[2] != "International maritime bunkers", "Bunkers"
),
level=2,
)
eurostat.index = new_index
eurostat.rename(columns={"Total": "Total all products"}, inplace=True) # convert to TWh/a from ktoe/a
eurostat.index = eurostat.index.set_levels( df *= 11.63 / 1e3
eurostat.index.levels[0].where(eurostat.index.levels[0] != "UK", "GB"), level=0
)
df = eurostat * 11.63 / 1e3
return df return df
@ -776,25 +703,25 @@ def build_transport_data(countries, population, idees):
return transport_data return transport_data
def rescale(idees_countries, energy, eurostat): def rescale_idees_from_eurostat(
idees_countries, energy, eurostat, input_eurostat, countries
):
""" """
Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat
data and the 2015 eurostat data. data and the 2015 eurostat data.
missing data: ['passenger car efficiency', 'passenger cars'] missing data: ['passenger car efficiency', 'passenger cars']
""" """
main_cols = ["Total all products", "Electricity"]
# read in the eurostat data for 2015 # read in the eurostat data for 2015
eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[ eurostat_2015 = build_eurostat(input_eurostat, countries, 2015)[main_cols]
["Total all products", "Electricity"] eurostat_year = eurostat[main_cols]
]
eurostat_year = eurostat[["Total all products", "Electricity"]]
# calculate the ratio of the two data sets # calculate the ratio of the two data sets
ratio = eurostat_year / eurostat_2015 ratio = eurostat_year / eurostat_2015
ratio = ratio.droplevel([1, 4]) ratio = ratio.droplevel([1, 4])
ratio.rename( cols_rename = {"Total all products": "total", "Electricity": "ele"}
columns={"Total all products": "total", "Electricity": "ele"}, inplace=True index_rename = {v: k for k, v in idees_rename.items()}
) ratio.rename(columns=cols_rename, index=index_rename, inplace=True)
ratio = ratio.rename(index={"EL": "GR"}, level=0)
mappings = { mappings = {
"Residential": { "Residential": {
@ -887,16 +814,16 @@ def rescale(idees_countries, energy, eurostat):
for sector, mapping in mappings.items(): for sector, mapping in mappings.items():
sector_ratio = ratio.loc[(country, slice(None), sector)] sector_ratio = ratio.loc[(country, slice(None), sector)]
energy.loc[country, mapping["total"]] *= sector_ratio[["total"]].iloc[0, 0] energy.loc[country, mapping["total"]] *= sector_ratio["total"].iloc[0]
energy.loc[country, mapping["elec"]] *= sector_ratio[["ele"]].iloc[0, 0] energy.loc[country, mapping["elec"]] *= sector_ratio["ele"].iloc[0]
avi_d = ratio.loc[(country, slice(None), "Domestic aviation")] avi_d = ratio.loc[(country, slice(None), "Domestic aviation"), "total"]
avi_i = ratio.loc[(country, "International aviation", slice(None))] avi_i = ratio.loc[(country, "International aviation", slice(None)), "total"]
energy.loc[country, avia_inter] *= avi_i[["total"]].iloc[0, 0] energy.loc[country, avia_inter] *= avi_i.iloc[0]
energy.loc[country, avia_domestic] *= avi_d[["total"]].iloc[0, 0] energy.loc[country, avia_domestic] *= avi_d.iloc[0]
nav = ratio.loc[(country, slice(None), "Domestic Navigation")] nav = ratio.loc[(country, slice(None), "Domestic Navigation"), "total"]
energy.loc[country, navigation] *= nav[["total"]].iloc[0, 0] energy.loc[country, navigation] *= nav.iloc[0]
return energy return energy
@ -922,17 +849,16 @@ if __name__ == "__main__":
input_eurostat = snakemake.input.eurostat input_eurostat = snakemake.input.eurostat
eurostat = build_eurostat(input_eurostat, countries, data_year) eurostat = build_eurostat(input_eurostat, countries, data_year)
swiss = build_swiss(data_year) swiss = build_swiss(data_year)
# data from idees only exists from 2000-2015 # data from idees only exists from 2000-2015. read in latest data and rescale later
if data_year > 2015: idees = build_idees(idees_countries, min(2015, data_year))
# read in latest data and rescale later
idees = build_idees(idees_countries, 2015)
else:
idees = build_idees(idees_countries, data_year)
energy = build_energy_totals(countries, eurostat, swiss, idees) energy = build_energy_totals(countries, eurostat, swiss, idees)
if data_year > 2015: if data_year > 2015:
energy = rescale(idees_countries, energy, eurostat) logger.info("Data year is after 2015. Rescaling IDEES data based on eurostat.")
energy = rescale_idees_from_eurostat(
idees_countries, energy, eurostat, input_eurostat, countries
)
energy.to_csv(snakemake.output.energy_name) energy.to_csv(snakemake.output.energy_name)