build_energy_totals: revision of eurostat report upgrade
This commit is contained in:
parent
5b513f81db
commit
bf60da973b
@ -37,54 +37,6 @@ def reverse(dictionary):
|
|||||||
return {v: k for k, v in dictionary.items()}
|
return {v: k for k, v in dictionary.items()}
|
||||||
|
|
||||||
|
|
||||||
eurostat_codes = {
|
|
||||||
"EU28": "EU",
|
|
||||||
"EA19": "EA",
|
|
||||||
"Belgium": "BE",
|
|
||||||
"Bulgaria": "BG",
|
|
||||||
"Czech Republic": "CZ",
|
|
||||||
"Denmark": "DK",
|
|
||||||
"Germany": "DE",
|
|
||||||
"Estonia": "EE",
|
|
||||||
"Ireland": "IE",
|
|
||||||
"Greece": "GR",
|
|
||||||
"Spain": "ES",
|
|
||||||
"France": "FR",
|
|
||||||
"Croatia": "HR",
|
|
||||||
"Italy": "IT",
|
|
||||||
"Cyprus": "CY",
|
|
||||||
"Latvia": "LV",
|
|
||||||
"Lithuania": "LT",
|
|
||||||
"Luxembourg": "LU",
|
|
||||||
"Hungary": "HU",
|
|
||||||
"Malta": "MA",
|
|
||||||
"Netherlands": "NL",
|
|
||||||
"Austria": "AT",
|
|
||||||
"Poland": "PL",
|
|
||||||
"Portugal": "PT",
|
|
||||||
"Romania": "RO",
|
|
||||||
"Slovenia": "SI",
|
|
||||||
"Slovakia": "SK",
|
|
||||||
"Finland": "FI",
|
|
||||||
"Sweden": "SE",
|
|
||||||
"United Kingdom": "GB",
|
|
||||||
"Iceland": "IS",
|
|
||||||
"Norway": "NO",
|
|
||||||
"Montenegro": "ME",
|
|
||||||
"FYR of Macedonia": "MK",
|
|
||||||
"Albania": "AL",
|
|
||||||
"Serbia": "RS",
|
|
||||||
"Turkey": "TU",
|
|
||||||
"Bosnia and Herzegovina": "BA",
|
|
||||||
"Kosovo\n(UNSCR 1244/99)": "KO", # 2017 version
|
|
||||||
# 2016 version
|
|
||||||
"Kosovo\n(under United Nations Security Council Resolution 1244/99)": "KO",
|
|
||||||
"Moldova": "MO",
|
|
||||||
"Ukraine": "UK",
|
|
||||||
"Switzerland": "CH",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
idees_rename = {"GR": "EL", "GB": "UK"}
|
idees_rename = {"GR": "EL", "GB": "UK"}
|
||||||
|
|
||||||
eu28 = cc.EU28as("ISO2").ISO2.tolist()
|
eu28 = cc.EU28as("ISO2").ISO2.tolist()
|
||||||
@ -121,79 +73,54 @@ def build_eurostat(input_eurostat, countries, year):
|
|||||||
"""
|
"""
|
||||||
Return multi-index for all countries' energy data in TWh/a.
|
Return multi-index for all countries' energy data in TWh/a.
|
||||||
"""
|
"""
|
||||||
# read in every country file in countries
|
df = {}
|
||||||
eurostat = pd.DataFrame()
|
countries = {idees_rename.get(country, country) for country in countries} - {"CH"}
|
||||||
countries = [country if country != "GB" else "UK" for country in countries]
|
|
||||||
countries = [country if country != "GR" else "EL" for country in countries]
|
|
||||||
for country in countries:
|
for country in countries:
|
||||||
filename = f"/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
|
filename = (
|
||||||
if os.path.exists(input_eurostat + filename):
|
f"{input_eurostat}/{country}-Energy-balance-sheets-April-2023-edition.xlsb"
|
||||||
df = pd.read_excel(
|
)
|
||||||
input_eurostat + filename,
|
sheet = pd.read_excel(
|
||||||
engine="pyxlsb",
|
filename,
|
||||||
sheet_name=str(year),
|
engine="pyxlsb",
|
||||||
skiprows=4,
|
sheet_name=str(year),
|
||||||
index_col=list(range(4)),
|
skiprows=4,
|
||||||
)
|
index_col=list(range(4)),
|
||||||
# replace entry 'Z' with 0
|
)
|
||||||
df.replace("Z", 0, inplace=True)
|
df[country] = sheet
|
||||||
# write 'International aviation' to the 2nd level of the multiindex
|
df = pd.concat(df, axis=0)
|
||||||
index_number = (
|
|
||||||
df.index.get_level_values(1) == "International aviation"
|
# drop columns with all NaNs
|
||||||
).argmax()
|
unnamed_cols = df.columns[df.columns.astype(str).str.startswith("Unnamed")]
|
||||||
new_index = (
|
df.drop(unnamed_cols, axis=1, inplace=True)
|
||||||
"-",
|
df.drop(year, axis=1, inplace=True)
|
||||||
"International aviation",
|
|
||||||
"International aviation",
|
# make numeric values where possible
|
||||||
"ktoe",
|
df.replace("Z", 0, inplace=True)
|
||||||
)
|
df = df.apply(pd.to_numeric, errors="coerce")
|
||||||
modified_index = list(df.index)
|
df = df.select_dtypes(include=[np.number])
|
||||||
modified_index[index_number] = new_index
|
|
||||||
df.index = pd.MultiIndex.from_tuples(modified_index, names=df.index.names)
|
# write 'International aviation' to the 2nd level of the multiindex
|
||||||
# drop the annoying subhead line
|
int_avia = df.index.get_level_values(2) == "International aviation"
|
||||||
df.drop(df[df[year] == year].index, inplace=True)
|
temp = df.loc[int_avia]
|
||||||
# replace 'Z' with 0
|
temp.index = pd.MultiIndex.from_frame(
|
||||||
df = df.replace("Z", 0)
|
temp.index.to_frame().fillna("International aviation")
|
||||||
# add country to the multiindex
|
)
|
||||||
new_tuple = [(country, *idx) for idx in df.index]
|
df = pd.concat([temp, df.loc[~int_avia]])
|
||||||
new_mindex = pd.MultiIndex.from_tuples(
|
|
||||||
new_tuple, names=["country", None, "name", None, "unit"]
|
|
||||||
)
|
|
||||||
df.index = new_mindex
|
|
||||||
# make numeric values where possible
|
|
||||||
df = df.apply(pd.to_numeric, errors="coerce")
|
|
||||||
# drop non-numeric columns
|
|
||||||
non_numeric_cols = df.columns[df.dtypes != float]
|
|
||||||
df.drop(non_numeric_cols, axis=1, inplace=True)
|
|
||||||
# concatenate the dataframes
|
|
||||||
eurostat = pd.concat([eurostat, df], axis=0)
|
|
||||||
|
|
||||||
eurostat.drop(["Unnamed: 4", year, "Unnamed: 6"], axis=1, inplace=True)
|
|
||||||
# Renaming some indices
|
# Renaming some indices
|
||||||
rename = {
|
index_rename = {
|
||||||
"Households": "Residential",
|
"Households": "Residential",
|
||||||
"Commercial & public services": "Services",
|
"Commercial & public services": "Services",
|
||||||
"Domestic navigation": "Domestic Navigation",
|
"Domestic navigation": "Domestic Navigation",
|
||||||
|
"International maritime bunkers": "Bunkers",
|
||||||
}
|
}
|
||||||
for name, rename in rename.items():
|
columns_rename = {"Total": "Total all products", "UK": "GB"}
|
||||||
eurostat.index = eurostat.index.set_levels(
|
df.rename(index=index_rename, columns=columns_rename, inplace=True)
|
||||||
eurostat.index.levels[3].where(eurostat.index.levels[3] != name, rename),
|
df.sort_index(inplace=True)
|
||||||
level=3,
|
df.index.names = [None] * len(df.index.names)
|
||||||
)
|
|
||||||
new_index = eurostat.index.set_levels(
|
|
||||||
eurostat.index.levels[2].where(
|
|
||||||
eurostat.index.levels[2] != "International maritime bunkers", "Bunkers"
|
|
||||||
),
|
|
||||||
level=2,
|
|
||||||
)
|
|
||||||
eurostat.index = new_index
|
|
||||||
|
|
||||||
eurostat.rename(columns={"Total": "Total all products"}, inplace=True)
|
# convert to TWh/a from ktoe/a
|
||||||
eurostat.index = eurostat.index.set_levels(
|
df *= 11.63 / 1e3
|
||||||
eurostat.index.levels[0].where(eurostat.index.levels[0] != "UK", "GB"), level=0
|
|
||||||
)
|
|
||||||
|
|
||||||
df = eurostat * 11.63 / 1e3
|
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
@ -776,25 +703,25 @@ def build_transport_data(countries, population, idees):
|
|||||||
return transport_data
|
return transport_data
|
||||||
|
|
||||||
|
|
||||||
def rescale(idees_countries, energy, eurostat):
|
def rescale_idees_from_eurostat(
|
||||||
|
idees_countries, energy, eurostat, input_eurostat, countries
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat
|
Takes JRC IDEES data from 2015 and rescales it by the ratio of the eurostat
|
||||||
data and the 2015 eurostat data.
|
data and the 2015 eurostat data.
|
||||||
|
|
||||||
missing data: ['passenger car efficiency', 'passenger cars']
|
missing data: ['passenger car efficiency', 'passenger cars']
|
||||||
"""
|
"""
|
||||||
|
main_cols = ["Total all products", "Electricity"]
|
||||||
# read in the eurostat data for 2015
|
# read in the eurostat data for 2015
|
||||||
eurostat_2015 = build_eurostat(input_eurostat, countries, 2023, 2015)[
|
eurostat_2015 = build_eurostat(input_eurostat, countries, 2015)[main_cols]
|
||||||
["Total all products", "Electricity"]
|
eurostat_year = eurostat[main_cols]
|
||||||
]
|
|
||||||
eurostat_year = eurostat[["Total all products", "Electricity"]]
|
|
||||||
# calculate the ratio of the two data sets
|
# calculate the ratio of the two data sets
|
||||||
ratio = eurostat_year / eurostat_2015
|
ratio = eurostat_year / eurostat_2015
|
||||||
ratio = ratio.droplevel([1, 4])
|
ratio = ratio.droplevel([1, 4])
|
||||||
ratio.rename(
|
cols_rename = {"Total all products": "total", "Electricity": "ele"}
|
||||||
columns={"Total all products": "total", "Electricity": "ele"}, inplace=True
|
index_rename = {v: k for k, v in idees_rename.items()}
|
||||||
)
|
ratio.rename(columns=cols_rename, index=index_rename, inplace=True)
|
||||||
ratio = ratio.rename(index={"EL": "GR"}, level=0)
|
|
||||||
|
|
||||||
mappings = {
|
mappings = {
|
||||||
"Residential": {
|
"Residential": {
|
||||||
@ -887,16 +814,16 @@ def rescale(idees_countries, energy, eurostat):
|
|||||||
for sector, mapping in mappings.items():
|
for sector, mapping in mappings.items():
|
||||||
sector_ratio = ratio.loc[(country, slice(None), sector)]
|
sector_ratio = ratio.loc[(country, slice(None), sector)]
|
||||||
|
|
||||||
energy.loc[country, mapping["total"]] *= sector_ratio[["total"]].iloc[0, 0]
|
energy.loc[country, mapping["total"]] *= sector_ratio["total"].iloc[0]
|
||||||
energy.loc[country, mapping["elec"]] *= sector_ratio[["ele"]].iloc[0, 0]
|
energy.loc[country, mapping["elec"]] *= sector_ratio["ele"].iloc[0]
|
||||||
|
|
||||||
avi_d = ratio.loc[(country, slice(None), "Domestic aviation")]
|
avi_d = ratio.loc[(country, slice(None), "Domestic aviation"), "total"]
|
||||||
avi_i = ratio.loc[(country, "International aviation", slice(None))]
|
avi_i = ratio.loc[(country, "International aviation", slice(None)), "total"]
|
||||||
energy.loc[country, avia_inter] *= avi_i[["total"]].iloc[0, 0]
|
energy.loc[country, avia_inter] *= avi_i.iloc[0]
|
||||||
energy.loc[country, avia_domestic] *= avi_d[["total"]].iloc[0, 0]
|
energy.loc[country, avia_domestic] *= avi_d.iloc[0]
|
||||||
|
|
||||||
nav = ratio.loc[(country, slice(None), "Domestic Navigation")]
|
nav = ratio.loc[(country, slice(None), "Domestic Navigation"), "total"]
|
||||||
energy.loc[country, navigation] *= nav[["total"]].iloc[0, 0]
|
energy.loc[country, navigation] *= nav.iloc[0]
|
||||||
|
|
||||||
return energy
|
return energy
|
||||||
|
|
||||||
@ -922,17 +849,16 @@ if __name__ == "__main__":
|
|||||||
input_eurostat = snakemake.input.eurostat
|
input_eurostat = snakemake.input.eurostat
|
||||||
eurostat = build_eurostat(input_eurostat, countries, data_year)
|
eurostat = build_eurostat(input_eurostat, countries, data_year)
|
||||||
swiss = build_swiss(data_year)
|
swiss = build_swiss(data_year)
|
||||||
# data from idees only exists from 2000-2015
|
# data from idees only exists from 2000-2015. read in latest data and rescale later
|
||||||
if data_year > 2015:
|
idees = build_idees(idees_countries, min(2015, data_year))
|
||||||
# read in latest data and rescale later
|
|
||||||
idees = build_idees(idees_countries, 2015)
|
|
||||||
else:
|
|
||||||
idees = build_idees(idees_countries, data_year)
|
|
||||||
|
|
||||||
energy = build_energy_totals(countries, eurostat, swiss, idees)
|
energy = build_energy_totals(countries, eurostat, swiss, idees)
|
||||||
|
|
||||||
if data_year > 2015:
|
if data_year > 2015:
|
||||||
energy = rescale(idees_countries, energy, eurostat)
|
logger.info("Data year is after 2015. Rescaling IDEES data based on eurostat.")
|
||||||
|
energy = rescale_idees_from_eurostat(
|
||||||
|
idees_countries, energy, eurostat, input_eurostat, countries
|
||||||
|
)
|
||||||
|
|
||||||
energy.to_csv(snakemake.output.energy_name)
|
energy.to_csv(snakemake.output.energy_name)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user