diff --git a/assignment 4/iml_assginment4_solved.ipynb b/assignment 4/iml_assginment4_solved.ipynb index c79c50e..d277412 100644 --- a/assignment 4/iml_assginment4_solved.ipynb +++ b/assignment 4/iml_assginment4_solved.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "outputs": [], "source": [ "import pandas as pd\n", @@ -85,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "collapsed": true }, @@ -141,14 +141,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "outputs": [ { "data": { "text/plain": " age sex bmi bp s1 s2 s3 \\\n0 0.794887 1.061173 1.357096 0.459459 -0.917834 -0.734476 -0.958901 \n1 -0.038221 -0.940162 -1.095193 -0.557425 -0.148672 -0.395182 1.714481 \n2 1.779468 1.061173 0.983414 -0.121617 -0.947417 -0.720904 -0.708271 \n3 -1.855910 -0.940162 -0.231053 -0.775328 0.295075 0.561626 -0.791815 \n4 0.113253 -0.940162 -0.768221 0.459459 0.117576 0.358049 0.210704 \n\n s4 s5 s6 target \n0 -0.035628 0.434041 -0.356981 151.0 \n1 -0.856638 -1.429397 -1.923328 75.0 \n2 -0.035628 0.074059 -0.531020 141.0 \n3 0.785382 0.492755 -0.182943 206.0 \n4 -0.035628 -0.661884 -0.966116 135.0 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
agesexbmibps1s2s3s4s5s6target
00.7948871.0611731.3570960.459459-0.917834-0.734476-0.958901-0.0356280.434041-0.356981151.0
1-0.038221-0.940162-1.095193-0.557425-0.148672-0.3951821.714481-0.856638-1.429397-1.92332875.0
21.7794681.0611730.983414-0.121617-0.947417-0.720904-0.708271-0.0356280.074059-0.531020141.0
3-1.855910-0.940162-0.231053-0.7753280.2950750.561626-0.7918150.7853820.492755-0.182943206.0
40.113253-0.940162-0.7682210.4594590.1175760.3580490.210704-0.035628-0.661884-0.966116135.0
\n
" }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -183,16 +183,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "X_train: (353, 10)\n", - "X_test: (89, 10)\n", - "y_train: (353,)\n", - "y_test: (89,)\n" + "X_train: (344, 10)\n", + "X_test: (86, 10)\n", + "y_train: (344,)\n", + "y_test: (86,)\n" ] } ], @@ -201,10 +201,7 @@ "\n", "normalize = True\n", "# Load the data\n", - "# X_train, X_test, y_train, y_test, df = load_data(normalize=normalize)\n", - "data = load_diabetes(scaled=True)\n", - "# split data into train and test sets\n", - "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2)\n", + "X_train, X_test, y_train, y_test, df = load_data(normalize=normalize)\n", "print(\"X_train:\", X_train.shape)\n", "print(\"X_test:\", X_test.shape)\n", "print(\"y_train:\", y_train.shape)\n", @@ -225,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "outputs": [], "source": [ "# Fit the linear regression\n", @@ -238,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "outputs": [], "source": [ "# Fit the ridge regression\n", @@ -251,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "outputs": [], "source": [ "# Fit the lasso regression\n", @@ -273,54 +270,54 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Linear Regression MSE train: 2886.03 test: 2801.42\n", - "Ridge Regression MSE train: 3417.29 test: 3129.15\n", - "Lasso Regression MSE train: 2905.98 test: 2812.49\n", - "Linear Regression RMSE train: 53.72 test: 52.93\n", - "Ridge Regression RMSE train: 58.46 test: 55.94\n", - "Lasso Regression RMSE train: 53.91 test: 53.03\n", - "Linear Regression R2 train: 0.51 test: 0.54\n", - "Ridge Regression R2 train: 0.42 test: 0.49\n", - "Lasso Regression R2 train: 0.51 test: 0.54\n", + "Linear Regression MSE train: 2983.19 test: 2487.62\n", + "Ridge Regression MSE train: 2983.79 test: 2495.37\n", + "Lasso Regression MSE train: 2983.19 test: 2487.98\n", + "Linear Regression RMSE train: 54.62 test: 49.88\n", + "Ridge Regression RMSE train: 54.62 test: 49.95\n", + "Lasso Regression RMSE train: 54.62 test: 49.88\n", + "Linear Regression R2 train: 0.50 test: 0.49\n", + "Ridge Regression R2 train: 0.50 test: 0.49\n", + "Lasso Regression R2 train: 0.50 test: 0.49\n", "Linear Regression features sorted by their coefficients:\n", - "s1: -822.75\n", - "s5: 765.40\n", - "bmi: 514.60\n", - "s2: 424.92\n", - "bp: 355.26\n", - "sex: -241.22\n", - "s4: 230.86\n", - "s3: 129.59\n", - "s6: 40.86\n", - "age: -10.93\n", + "s5: 28.62\n", + "bmi: 23.95\n", + "s1: -23.18\n", + "bp: 17.64\n", + "s2: 14.38\n", + "sex: -12.17\n", + "s4: 4.23\n", + "s3: -4.03\n", + "s6: 2.06\n", + "age: 0.86\n", "Ridge Regression features sorted by their coefficients:\n", - "bmi: 283.92\n", - "s5: 238.87\n", - "bp: 195.46\n", - "s3: -142.91\n", - "s4: 106.92\n", - "s6: 93.55\n", - "sex: -63.98\n", - "s2: -30.82\n", - "age: 24.08\n", - "s1: 0.97\n", + "s5: 26.16\n", + "bmi: 23.91\n", + "bp: 17.60\n", + "s1: -16.67\n", + "sex: -12.13\n", + "s2: 9.11\n", + "s3: -6.64\n", + "s4: 3.73\n", + "s6: 2.12\n", + "age: 0.87\n", "Lasso Regression features sorted by their coefficients:\n", - "bmi: 528.98\n", - "s5: 494.25\n", - "bp: 348.14\n", - "sex: -230.35\n", - "s3: -197.76\n", - "s2: -137.48\n", - "s4: 127.48\n", - "s1: -101.61\n", - "s6: 40.95\n", - "age: -7.39\n", + "s5: 28.48\n", + "bmi: 23.95\n", + "s1: -22.80\n", + "bp: 17.64\n", + "s2: 14.07\n", + "sex: -12.17\n", + "s4: 4.19\n", + "s3: -4.18\n", + "s6: 2.06\n", + "age: 0.86\n", "Linear Regression number of non-zero coefficients: 11\n", "Ridge Regression number of non-zero coefficients: 11\n", "Lasso Regression number of non-zero coefficients: 11\n"