ML_course/assignment 2/iml_assignment2a_solved.ipynb

251 lines
49 KiB
Plaintext
Raw Normal View History

2023-03-31 11:53:42 +00:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"Importing the libraries"
],
"metadata": {
"id": "sCd8w2OwBTLm"
}
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "kWsq3C9zxvlr"
},
"outputs": [],
"source": [
"import csv\n",
"import math\n",
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "markdown",
"source": [
"Create the BasicStatistics class"
],
"metadata": {
"id": "Bw9qskz1BXha"
}
},
{
"cell_type": "code",
"source": [
"class BasicStatistics:\n",
" \"\"\"\n",
" Basic statistics class\n",
"\n",
" Attributes:\n",
" dataset: list of numbers\n",
"\n",
" Methods:\n",
" mean: calculates the mean of the dataset\n",
" median: calculates the median of the dataset\n",
" variance: calculates the variance of the dataset\n",
" normalize: normalizes the dataset\n",
" plot: plots the dataset\n",
" \"\"\"\n",
" def __init__(self, dataset):\n",
" \"\"\"\n",
" Constructor for the BasicStatistics class\n",
" :param dataset: list of numbers\n",
" \"\"\"\n",
" self.dataset = dataset\n",
"\n",
" def mean(self):\n",
" \"\"\"\n",
" Calculates the mean of the dataset\n",
" :return: mean of the dataset\n",
" \"\"\"\n",
" return sum(self.dataset) / len(self.dataset)\n",
"\n",
" def median(self):\n",
" \"\"\"\n",
" Calculates the median of the dataset\n",
" :return: median of the dataset\n",
" \"\"\"\n",
" self.dataset.sort()\n",
" if len(self.dataset) % 2 == 0:\n",
" return (self.dataset[len(self.dataset) // 2] + self.dataset[len(self.dataset) // 2 - 1]) / 2\n",
" else:\n",
" return self.dataset[len(self.dataset) // 2]\n",
"\n",
" def variance(self):\n",
" \"\"\"\n",
" Calculates the variance of the dataset\n",
" :return: variance of the dataset\n",
" \"\"\"\n",
" return sum([(x - self.mean()) ** 2 for x in self.dataset]) / len(self.dataset)\n",
"\n",
" def normalize(self):\n",
" \"\"\"\n",
" Normalizes the dataset\n",
" :return normalized dataset\n",
" \"\"\"\n",
" return [(x - self.mean()) / self.variance() for x in self.dataset]\n",
"\n",
" def standard_deviation(self):\n",
" \"\"\"\n",
" Calculates the standard deviation of the dataset\n",
" :return: standard deviation of the dataset\n",
" \"\"\"\n",
" return math.sqrt(self.variance())\n",
"\n",
" def plot(self):\n",
" \"\"\"\n",
" Plots the dataset\n",
" :return:\n",
" \"\"\"\n",
" # get the data\n",
" mean = self.mean()\n",
" median = self.median()\n",
" variance = self.variance()\n",
" std = math.sqrt(variance)\n",
" data_norm = self.normalize()\n",
" # get the number of bins\n",
" nrBins = math.floor(len(self.dataset) / 50)\n",
"\n",
" # set up figure and plot grid\n",
" fig = plt.figure(figsize=(10, 8))\n",
" grid = plt.GridSpec(2, 2)\n",
" ax1 = plt.subplot(grid[:1, :])\n",
" ax2 = plt.subplot(grid[1:, :1])\n",
" ax3 = plt.subplot(grid[1:, 1:])\n",
"\n",
" # set titles\n",
" fig.suptitle('Basic Statistics')\n",
" ax1.set_title('Data Distribution')\n",
" ax2.set_title('Raw Data')\n",
" ax3.set_title('Normalized Data')\n",
"\n",
" # set axes\n",
" ax1.set_xlabel('Values')\n",
" ax1.set_ylabel('Frequency')\n",
" ax2.set_xlabel('Sample')\n",
" ax2.set_ylabel('Value')\n",
" ax3.set_xlabel('Sample')\n",
" ax3.set_ylabel('Standardized Value')\n",
"\n",
" # data plotting\n",
" ax1.hist(data, bins=nrBins, density=True, label='Histogram')\n",
" ax1.vlines(x=mean, ymin=0, ymax=0.3, colors='r', ls='--', label='Mean')\n",
" ax1.vlines(x=median, ymin=0, ymax=0.3, colors='y', ls='--', label='Median')\n",
" ax1.vlines(x=mean + std, ymin=0, ymax=0.3, colors='g', ls='--', label='Standard deviation')\n",
" ax1.vlines(x=mean - std, ymin=0, ymax=0.3, colors='g', ls='--')\n",
" ax1.legend()\n",
" # raw data plotting\n",
" x = [i for i in range(1, len(data) + 1)]\n",
" ax2.scatter(x, data, s=3, label='Data')\n",
" ax2.hlines(y=mean, xmin=0, xmax=len(data), colors='r', ls='--', label='Mean')\n",
" ax2.hlines(y=mean + std, xmin=0, xmax=len(data), colors='g', ls='--', label='Standard deviation')\n",
" ax2.hlines(y=mean - std, xmin=0, xmax=len(data), colors='g', ls='--')\n",
" ax2.legend()\n",
" # normalized data plotting\n",
" ax3.scatter(x, data_norm, s=3, label='Data')\n",
" ax3.hlines(y=0, xmin=0, xmax=len(data), colors='r', ls='--', label='Mean')\n",
" ax3.hlines(y=0 + 1, xmin=0, xmax=len(data), colors='g', ls='--', label='Standard deviation')\n",
" ax3.hlines(y=0 - 1, xmin=0, xmax=len(data), colors='g', ls='--')\n",
" ax3.legend()\n",
"\n",
" plt.show()\n",
" # save the figure\n",
" fig.savefig(\"Basic_Statistics.png\", format=\"png\")\n"
],
"metadata": {
"id": "cqSJ_htS_04g"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Demonstrate the BasicStatistics class on the data.csv file"
],
"metadata": {
"id": "xeMDhdMqBd2n"
}
},
{
"cell_type": "code",
"source": [
"# define the path to the data file\n",
"path = 'data.csv'\n",
"# open the file and read the data\n",
"with open(path, 'r') as f:\n",
" # create a csv reader\n",
" reader = csv.reader(f)\n",
" # convert the data to a list of floats\n",
" data = list(reader)\n",
" data = [float(x[0]) for x in data]\n",
" # create a BasicStatistics object\n",
" bs = BasicStatistics(data)\n",
" # print the mean, median, and variance\n",
" print(f'Mean: {bs.mean():.2f}')\n",
" print(f'Median: {bs.median():.2f}')\n",
" print(f'Variance: {bs.variance():.2f}')\n",
" # plot the data\n",
" bs.plot()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 606
},
"id": "UQVfNd_j_29Z",
"outputId": "e3cab288-537c-4937-b8e0-a7473770400d"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mean: 4.08\n",
"Median: 4.13\n",
"Variance: 4.15\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 720x576 with 3 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmgAAAIZCAYAAAASkcMtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAAsTAAALEwEAmpwYAAB7GUlEQVR4nO3deXwV1f3/8dfHgISAAgIqq2BVQASDRlSsFoUqKipWKuBS0faL4v6rtlXrXq20ta3FpUpbi1XZ6ooIbQXBFdEgUdlURDY3FgVkUwKf3x8ziTfJTXJD7s3ce/N+Ph73kTtzZvlkGE4+c+bMGXN3RERERCR97BZ1ACIiIiJSlhI0ERERkTSjBE1EREQkzShBExEREUkzStBERERE0owSNBEREZE0owRNRNKemU0zswuijqM8M7vBzP6+i+tuMrP9kx2TiGQH0zhoIpIsZrYM2AfYAWwHXgcucfeVEcVzBnAbsD/wLfAu8FN3/9jMbgUOcPfzEtxWX+Axd2+/C3HMCtfdpWROROoftaCJSLKd5u5NgTbAF8C9UQRhZgcA/wKuAZoBnYH7CZJHEZG0pgRNRFLC3bcBTwAHl8wzs1PNbJ6ZbTSzlWErVklZrpk9ZmbrzGy9mb1lZvuEZbPM7Gcxy/6fmS0ys6/NbKGZHRYnhHzgY3ef4YGv3f1Jd19hZgOAG4Ah4a3Gd8LtXhiz3aVmdnE4vwkwDWgbLr/JzNqa2a1m9lhV8ZvZncCxwH3heveFy3uYRGJmjc3sj2a23Mw2mNmr4bxKj4mIZLcGUQcgItnJzPKAIcAbMbM3Az8BFgCHAC+YWZG7PwNcQNDS1QH4hiDB2hpnuz8GbgUGAYXA9whup5b3NtDVzP4MTAbecvdNAO7+HzP7LRVvca4GBgJLgeOAaWb2lru/bWYnU+4Wp5nF7i9u/O7+azM7hqpvcd4NdAf6AJ8DRwI7geGJHBMRyT5qQRORZHvGzNYDG4AfAn8oKXD3We7+nrvvdPd3gfHAD8Li7UBLgqRph7vPdfeNcbb/M+D37v5W2DK2xN2Xl1/I3ZcCfYF2wCRgrZmNNbOmlQXu7s+7+0fhdl8C/kfQ+pWIROMvw8x2Ay4CrnL3T8J1X3f3b3Z1myKS+ZSgiUiyDXL35kAucDnwkpntC2BmR5rZTDNbY2YbgEuAVuF6jwL/BSaY2adm9nszaxhn+x2AjxIJxN3fcPez3b01QaJ1HPDrypY3s5PN7A0z+zJMMk+Jia86icZfXiuCYxXvd9rVbYpIhlOCJiIpEbb4PEXQKf/74exxBLcbO7h7M+BBwMLlt7v7be5+MMGtvoEEt0PLW0lwW7Om8bwFPEVwaxWgzCPsZtYIeJLgduM+YZI5tSS+8svH2X5V8Ve17lpgG3F+pxocExHJMkrQRCQlLHAG0AJYFM7eA/jS3beZWW/gnJjljzezHmaWA2wkuL23M86m/w5ca2aHh/s4wMz2i7P/74cPE+wdTncFTue7PnFfAJ3CW4wAuwONgDVAcdjn7MSYTX4BtDSzZpX8vlXF/wXBUB8VuPtO4GHgT+GDBzlmdrSZNarBMRGRLKMETUSS7Tkz20SQUNwJXODuC8KyS4Hbzexr4GaCvmEl9iV46nMjQUL3EsEtvjLc/d/hdscBXwPPAHvFiWM9QUL2XhjPf4Cngd+H5f8Of64zs7fd/WvgyjCmrwiSx8kx+11M0GduafhEZdty+6sq/r8Ag83sKzMbHSfWa4H3gLeAL4HfEdTPCR0TEck+GqhWREREJM2oBU1EREQkzShBExEREUkzStBERERE0owSNBEREZE0owRNREREJM0oQRMRERFJM0rQRERERNKMEjQRERGRNKMETURERCTNKEETERERSTNK0ERERETSjBI0ERERkTSjBE1EREQkzShBExEREUkzStBERERE0owSNBEREZE0owRNREREJM0oQRMRERFJM0rQRERERNKMEjQRERGRNKMETURERCTNKEETERERSTNK0ERERETSjBI0ERERkTSjBE1EREQkzShBExEREUkzStBERERE0owSNBEREZE0owRNREREJM0oQRMRERFJM0rQRERERNKMEjQRqdfMbJqZXZCkbR1rZu/HTC8zs/7J2Ha4vQVm1jdZ2xOR9KUETURSIkxOtprZ12a23sxeN7NLzCyhesfMOpmZm1mDWsTgZrbZzDaZ2Tozm2FmQ2KXcfeT3f2RBLd1QFXLuPsr7t5lV+Mtt7+xZnZHue13d/dZydi+iKQ3JWgikkqnufsewH7AKOBXwD/qOIZD3b0p0AUYC9xnZrckeye1SSRFRMpTgiYiKefuG9x9MjAEuMDMDgEws1PNbJ6ZbTSzlWZ2a8xqL4c/14ctYEeb2ffM7MWwNWytmT1uZs0TjGGtuz8KjASuN7OWYQyzzOxn4fcDzOwlM9sQbn9iOL8klnfCWIaYWV8zW2VmvzKzz4F/lswrt+sjzGyhmX1lZv80s9xwm8PN7NXYBUta6cxsBHAu8Mtwf8+F5aW3TM2skZndY2afhp97zKxRWFYS2zVmttrMPjOzCxM5TiKSHpSgiUidcfc3gVXAseGszcBPgObAqcBIMxsUlh0X/mzu7k3dfTZgwF1AW6Ab0AG4tYZhPAs0AHrHKfsN8D+gBdAeuDeMuySWQ8NYJobT+wJ7EbQQjqhkf+cCJwHfAw4CbqwuQHcfAzwO/D7c32lxFvs1cBSQDxwa/j6x294XaAa0A34K3G9mLarbt4ikByVoIlLXPiVIanD3We7+nrvvdPd3gfHADypb0d2XuPsL7v6Nu68B/lTV8pVsYzuwtiSGcrYTJFtt3X2bu78aZ5lYO4Fbwni2VrLMfe6+0t2/BO4EhtUk3iqcC9zu7qvDY3EbcH5M+fawfLu7TwU2EdzmFZEMoARNROpaO+BLADM70sxmmtkaM9sAXAK0qmxFM9vHzCaY2SdmthF4rKrlK9lGQ6B1SQzl/JKgle7N8InJi6rZ3Bp331bNMitjvi8naP1Lhrbh9irb9jp3L46Z3gI0TdK+RSTFlKCJSJ0xsyMIErSSlqlxwGSgg7s3Ax4kSJAAPM4mfhvO7+HuewLnxSyfqDOAYuDN8gXu/rm7/5+7twUuBh6o5snNeDGW1yHme0eCFkQIbu/mlRSY2b413PanBK198bYtIhlOCZqIpJyZ7WlmA4EJwGPu/l5YtAfwpbtvM7PewDkxq60huIW4f8y8PQhu1W0ws3bAL2oQw15mdi5wP/A7d18XZ5kfm1n7cPIrgiRpZzj9RblYEnWZmbU3s70I+o2V9F97B+huZvnhgwO3lluvuv2NB240s9Zm1gq4maBFUUSygBI0EUml58zsa4LbfL8m6DMW+zThpcDt4TI3A5NKCtx9C0GfrdfCcdSOIuhndRiwAXgeeCqBGN4xs03AEuBnwP9z95srWfYIYE64/GTgKndfGpbdCjwSxnJ2AvstMY7gwYOlwEfAHeHv9wFwOzAd+JDvWhVL/AM4ONzfM3G2ewdQCLwLvAe8XbJtEcl85p5IC72IiIiI1BW1oImIiIikGSVoIiIiImlGCZqIiIhImlGCJiIiIpJmsublvq1atfJOnTpFHYaIiIhItebOnbvW3VtXVp41CVqnTp0oLCyMOgwRERGRapnZ8qrKdYtTREREJM0oQRMRERFJM0rQRERERNJM1vRBExERSSfbt29n1apVbNu2LepQJEK5ubm0b9+ehg0b1mi9lCZoZjYA+AuQA/zd3UeVK78EuAzYQfAC5BHuvjAsux74aVh2pbv/N5WxioiIJNOqVavYY4896NSpE2YWdTgSAXdn3bp1rFq1is6dO9do3ZTd4jSzHOB+4GTgYGCYmR1cbrFx7t7D3fOB3xO8SJlwuaFAd2AA8EC4PRERkYywbds2WrZsqeSsHjMzWrZsuUutqKnsg9YbWOLuS939W2ACcEbsAu6+MWayCVDy5vYzgAnu/o27fwwsCbcnIiKSMZScya6eA6m8xdkOWBkzvQo4svxCZnYZ8HNgd+CEmHXfKLduuzjrjgBGAHTs2DEpQYtkk+fefw6A07qcFnEkWeS54JiuPTq
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"source": [
"# New Section"
],
"metadata": {
"id": "p4TrEpvsABUU"
}
}
]
}