From 96b599aedd291e2888f05c52209278e22291c3d2 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 25 Jul 2022 11:52:51 +0200 Subject: [PATCH] use numpy instead of sklearn for regressino --- envs/environment.yaml | 1 - scripts/build_hydro_profile.py | 15 +++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/envs/environment.yaml b/envs/environment.yaml index f5cb9aae..8bd7428f 100644 --- a/envs/environment.yaml +++ b/envs/environment.yaml @@ -39,7 +39,6 @@ dependencies: - proj - fiona <= 1.18.20 # Till issue https://github.com/Toblerity/Fiona/issues/1085 is not solved - country_converter - - scikit-learn # Keep in conda environment when calling ipython - ipython diff --git a/scripts/build_hydro_profile.py b/scripts/build_hydro_profile.py index f56cb9e3..b8d2eec8 100644 --- a/scripts/build_hydro_profile.py +++ b/scripts/build_hydro_profile.py @@ -66,7 +66,7 @@ import atlite import geopandas as gpd import pandas as pd -from sklearn.linear_model import LinearRegression +from numpy.polynomial import Polynomial import country_converter as coco cc = coco.CountryConverter() @@ -138,17 +138,16 @@ def approximate_missing_eia_stats(eia_stats, runoff_fn, countries): for c in countries: - X = runoff_eia[c].values.reshape(-1, 1) - Y = eia_stats[c].values.reshape(-1, 1) + X = runoff_eia[c] + Y = eia_stats[c] to_predict = runoff.index.difference(eia_stats.index) - X_pred = runoff.loc[to_predict, c].values.reshape(-1, 1) + X_pred = runoff.loc[to_predict, c] - linear_regressor = LinearRegression() - linear_regressor.fit(X, Y) - Y_pred = linear_regressor.predict(X_pred) + p = Polynomial.fit(X, Y, 1) + Y_pred = p(X_pred) - eia_stats_approximated[c] = pd.Series(Y_pred.T[0], index=to_predict) + eia_stats_approximated[c] = pd.Series(Y_pred, index=to_predict) eia_stats_approximated = pd.DataFrame(eia_stats_approximated) return pd.concat([eia_stats, eia_stats_approximated]).sort_index()