use ffill and bfill

This commit is contained in:
lisazeyen 2024-07-31 16:06:16 +02:00
parent c52737db72
commit d48500fc3b

View File

@ -587,7 +587,8 @@ def build_idees(countries: List[str]) -> pd.DataFrame:
def fill_missing_years(fill_values: pd.Series) -> pd.Series: def fill_missing_years(fill_values: pd.Series) -> pd.Series:
""" """
Fill missing years for some countries by mean over the other years. Fill missing years for some countries by first using forward fill (ffill)
and then backward fill (bfill).
Parameters Parameters
---------- ----------
@ -598,16 +599,23 @@ def fill_missing_years(fill_values: pd.Series) -> pd.Series:
Returns Returns
------- -------
pd.Series pd.Series
A pandas Series with zero values replaced by the mean value of the corresponding A pandas Series with zero values replaced by the forward-filled and
country. backward-filled values of the corresponding country.
Notes Notes
----- -----
- The function groups the data by the 'country' level and computes the mean for each group. - The function groups the data by the 'country' level and performs forward fill
- Zero values in the original Series are replaced by the mean value of their respective country group. and backward fill to fill zero values.
- Zero values in the original Series are replaced by the ffilled and bfilled
value of their respective country group.
""" """
means = fill_values.groupby(level="country").transform("mean") # Replace zero values with NaN for correct filling
return fill_values.where(fill_values != 0, means) fill_values = fill_values.replace(0, pd.NA)
# Forward fill and then backward fill within each country group
fill_values = fill_values.groupby(level="country").ffill().bfill()
return fill_values
def build_energy_totals( def build_energy_totals(
@ -724,6 +732,7 @@ def build_energy_totals(
eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=[0, 1]).sum() eurostat.loc[slicer, eurostat_fuels[fuel]].groupby(level=[0, 1]).sum()
) )
# fill missing years for some countries by mean over the other years # fill missing years for some countries by mean over the other years
breakpoint()
fill_values = fill_missing_years(fill_values) fill_values = fill_missing_years(fill_values)
df.loc[to_fill, f"{fuel} {sector}"] = fill_values df.loc[to_fill, f"{fuel} {sector}"] = fill_values