use numpy instead of sklearn for regressino

2022-07-25 11:52:51 +02:00 · 2022-07-25 11:52:51 +02:00 · 96b599aedd
commit 96b599aedd
parent 5bb432141d
2 changed files with 7 additions and 9 deletions
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@ -39,7 +39,6 @@ dependencies:
  - proj
  - fiona <= 1.18.20  # Till issue https://github.com/Toblerity/Fiona/issues/1085 is not solved
  - country_converter
  - scikit-learn
  # Keep in conda environment when calling ipython
  - ipython
--- a/scripts/build_hydro_profile.py
+++ b/scripts/build_hydro_profile.py
@ -66,7 +66,7 @@ import atlite
 import geopandas as gpd
 import pandas as pd
-from sklearn.linear_model import LinearRegression
+from numpy.polynomial import Polynomial
 import country_converter as coco
 cc = coco.CountryConverter()
@ -138,17 +138,16 @@ def approximate_missing_eia_stats(eia_stats, runoff_fn, countries):
    for c in countries:
-        X = runoff_eia[c].values.reshape(-1, 1)
+        X = runoff_eia[c]
-        Y = eia_stats[c].values.reshape(-1, 1)
+        Y = eia_stats[c]
        to_predict = runoff.index.difference(eia_stats.index)
-        X_pred = runoff.loc[to_predict, c].values.reshape(-1, 1)
+        X_pred = runoff.loc[to_predict, c]
-        linear_regressor = LinearRegression()
+        p = Polynomial.fit(X, Y, 1)
-        linear_regressor.fit(X, Y)
+        Y_pred = p(X_pred)
        Y_pred = linear_regressor.predict(X_pred)
-        eia_stats_approximated[c] = pd.Series(Y_pred.T[0], index=to_predict)
+        eia_stats_approximated[c] = pd.Series(Y_pred, index=to_predict)
    eia_stats_approximated = pd.DataFrame(eia_stats_approximated)
    return pd.concat([eia_stats, eia_stats_approximated]).sort_index()