### Solution for Assignment 5 of the course "Introduction to Machine Learning" at the University of Leoben.
##### Author: Fotios Lygerakis
##### Semester: SS 2022/2023

# Perceptron Algorithm for Classification of Iris Dataset

In [1]:
# load the iris dataset
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
import numpy as np

iris = load_iris()
X = iris.data
y = iris.target
print(X.shape)
print(y.shape)

(150, 4)
(150,)


Preprocess the data

In [2]:
# Preprocess the data
from sklearn.model_selection import train_test_split

# split the scaled data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Define the perceptron algorithm

In [3]:
# Define the perceptron algorithm
class MultiClassPerceptron:
    def __init__(self, input_dim, output_dim, lr=0.01, epochs=1000):
        self.W = np.random.randn(input_dim, output_dim)
        self.b = np.zeros((1, output_dim))
        self.lr = lr
        self.epochs = epochs

    def forward(self, X):
        self.z = np.dot(X, self.W) + self.b
        self.y_hat = np.exp(self.z) / np.sum(np.exp(self.z), axis=1, keepdims=True)

    def backward(self, X, y):
        m = X.shape[0]  # number of samples
        # Calculate the gradients
        grad_z = self.y_hat # shape (m, C)
        # Subtract 1 from the predicted class for each sample
        grad_z[range(m), y] -= 1 # shape (m, C)
        # Calculate the gradients with respect to the parameters
        grad_W = np.dot(X.T, grad_z) # shape (n, C)
        # Reshape the gradients into a 2-D array
        grad_b = np.sum(grad_z, axis=0, keepdims=True) # shape (1, C)
        # Update the parameters
        self.W -= self.lr * grad_W # shape (n, C)
        self.b -= self.lr * grad_b # shape (1, C)

    def fit(self, X, y):
        for epoch in range(self.epochs):
            self.forward(X)
            self.backward(X, y)

    def predict(self, X):
        self.forward(X)
        return np.argmax(self.y_hat, axis=1)

Train the model

In [4]:
# Train the model
p = MultiClassPerceptron(input_dim=X_train.shape[1], output_dim=3, lr=0.01, epochs=1000)
p.fit(X_train, y_train)
predictions_train = p.predict(X_train)
predictions = p.predict(X_test)

Evaluate the model

In [5]:
# evaluate train accuracy
print("Perceptron classification train accuracy", accuracy_score(y_train, predictions_train))
print("Perceptron classification accuracy", accuracy_score(y_test, predictions))

Perceptron classification train accuracy 0.975
Perceptron classification accuracy 1.0


Non-linear feature transformation on the concrete compressive strength dataset

In [6]:
def polynomial_features(X, degree):
    """
    Creates a new feature matrix consisting of all polynomial combinations of the features with degree less than or equal to the specified degree.
    For example, if an input sample is two dimensional and of the form [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].
    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        The input samples.
    degree : int
        The degree of the polynomial features.
    Returns
    -------
    X_new : array-like, shape (n_samples, 1 + n_features + n_features*(n_features+1)/2)
        The polynomial features with degree `degree`.
    """
    n_samples, n_features = np.shape(X)
    new_features = np.ones(shape=(n_samples, 1))

    for i in range(n_features):
        for j in range(1, degree+1):
            # create a new column for each feature, with values raised to the power of j
            new_col = np.power(X[:, i], j) # shape (n_samples, 1)
            # reshape the new column to a 2-D array
            new_col = new_col.reshape(n_samples, 1) # shape (n_samples, 1)
            # append the new column to the new_features array
            new_features = np.hstack((new_features, new_col)) # shape (n_samples, j+1)

    return new_features

In [1]:
# Non-linear feature transformation
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# load the concrete compressive strength dataset
df = pd.read_excel('Concrete_Data.xls')

# split the data into train and test sets
X = df.drop(['Concrete compressive strength(MPa, megapascals) '], axis=1)
y = df['Concrete compressive strength(MPa, megapascals) ']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# transform the features into second degree polynomial features
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

X_train_poly_custom = polynomial_features(X_train.values, degree=2)
X_test_poly_custom = polynomial_features(X_test.values, degree=2)


NameError: name 'train_test_split' is not defined

Train the linear regression model

In [8]:
# Train the model
lr_poly_custom = LinearRegression()
lr = LinearRegression()
# fit the model
lr_poly_custom.fit(X_train_poly_custom, y_train)
lr.fit(X_train, y_train)
# predict values from the polynomial transformed features
predictions_poly_custom_train = lr_poly_custom.predict(X_train_poly_custom)
predictions_poly_custom = lr_poly_custom.predict(X_test_poly_custom)
# predict values from the original features
predictions_train = lr.predict(X_train)
predictions = lr.predict(X_test)

# mean squared error
print("Mean squared error (train poly custom): {:.2f}".format(mean_squared_error(y_train, predictions_poly_custom_train)))
print("Mean squared error (test poly custom): {:.2f}".format(mean_squared_error(y_test, predictions_poly_custom)))
print("Mean squared error (train): {:.2f}".format(mean_squared_error(y_train, predictions_train)))
print("Mean squared error (test): {:.2f}".format(mean_squared_error(y_test, predictions)))

# coefficient of determination (R^2)
print("R^2 (train poly custom): {:.2f}".format(r2_score(y_train, predictions_poly_custom_train)))
print("R^2 (test poly custom): {:.2f}".format(r2_score(y_test, predictions_poly_custom)))
print("R^2 (train): {:.2f}".format(r2_score(y_train, predictions_train)))
print("R^2 (test): {:.2f}".format(r2_score(y_test, predictions)))



Mean squared error (train poly custom): 64.55
Mean squared error (test poly custom): 58.28
Mean squared error (train): 110.66
Mean squared error (test): 95.98
R^2 (train poly custom): 0.77
R^2 (test poly custom): 0.77
R^2 (train): 0.61
R^2 (test): 0.63


RBFs on the California Housing Prices dataset

In [9]:
def rbf_kernel(X, centers, gamma):
    # Pairwise Euclidean distances calculation:
    # Compute the squared Euclidean distances between each sample and each center using broadcasting:
    # - Subtract each center from each sample to get a difference matrix of shape (n_samples, n_centers, n_features)
    # - Square each element in the difference matrix
    # - Sum the squared differences along the feature axis to get the squared distances matrix of shape (n_samples, n_centers)
    # - Take the square root of each element in the squared distances matrix to obtain the pairwise Euclidean distances matrix of shape (n_samples, n_centers)
    dists = np.sqrt(((X[:, np.newaxis] - centers)**2).sum(axis=2)) # shape (n_samples, n_centers)
    # Compute the RBF values for each distance using the Gaussian kernel
    rbf_vals = np.exp(-gamma * dists**2) # shape (n_samples, n_centers)
    return rbf_vals

In [10]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the California Housing Prices dataset
data = fetch_california_housing()
X = data['data']
y = data['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

# Choose the number of centroids and the RBF kernel width
num_centroids = 100
gamma = 0.1

# Randomly select the centroids from the training set
np.random.seed(42)
idx = np.random.choice(X_train_std.shape[0], num_centroids, replace=False)
centroids = X_train_std[idx] # (100, 8)

# Compute the RBF features for the training and testing sets
rbf_train = rbf_kernel(X_train_std, centroids, gamma)   # (16512, 100)
rbf_test = rbf_kernel(X_test_std, centroids, gamma)    # (4128, 100)

# Fit a linear regression model on the original and RBF-transformed data
linreg_orig = LinearRegression().fit(X_train_std, y_train)
linreg_rbf = LinearRegression().fit(rbf_train, y_train)

# Evaluate the models on the testing set
y_pred_orig = linreg_orig.predict(X_test_std)
mse_orig = mean_squared_error(y_test, y_pred_orig)
r2_orig = r2_score(y_test, y_pred_orig)

y_pred_rbf = linreg_rbf.predict(rbf_test)
mse_rbf = mean_squared_error(y_test, y_pred_rbf)
r2_rbf = r2_score(y_test, y_pred_rbf)

# Print the results
print("Linear regression on original data:")
print("MSE:", mse_orig)
print("R^2:", r2_orig)

print("\nLinear regression on RBF-transformed data:")
print("MSE:", mse_rbf)
print("R^2:", r2_rbf)


Linear regression on original data:
MSE: 0.5558915986952443
R^2: 0.5757877060324508

Linear regression on RBF-transformed data:
MSE: 0.37106446913117447
R^2: 0.7168330839511696


# **(Bonus)** Multilayer Perceptron Algorithm for Regression of Concrete Compressive Strength Dataset

Download the Concrete Compressive Strength Dataset from the UCI Machine Learning Repository.

In [11]:
# Download the Concrete Compressive Strength Dataset from the UCI Machine Learning Repository.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

df = pd.read_excel('Concrete_Data.xls')
print(df.shape)
# df.head()

(1030, 9)


Preprocess the data

In [12]:
# Preprocess the data
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values.reshape(-1, 1)

# Normalize the features
X_norm = StandardScaler().fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=42)

Define the multilayer perceptron algorithm

In [13]:
# a multilayer perceptron algorithm class for regression problems
class MLP:
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.01, epochs=1000):
        self.W1 = np.random.randn(input_dim, hidden_dim)
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, output_dim)
        self.b2 = np.zeros((1, output_dim))
        self.lr = lr
        self.epochs = epochs

    def forward(self, X):
        # forward propagation through our network
        self.z1 = np.dot(X, self.W1) + self.b1
        # activation function
        self.a1 = np.tanh(self.z1)
        # output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        # final activation function
        self.y_hat = self.z2

    def backward(self, X, y):
        # number of samples
        m = X.shape[0]
        # output layer gradient
        self.loss = np.mean((self.y_hat - y) ** 2) # MSE loss. shape (n_samples, output_dim)
        # output layer gradient
        delta2 = (self.y_hat - y) # shape (n_samples, output_dim)
        # hidden layer gradient
        dW2 = np.dot(self.a1.T, delta2) # shape (hidden_dim, output_dim)
        # bias gradient
        db2 = np.sum(delta2, axis=0, keepdims=True) # shape (1, output_dim)
        # hidden layer gradient
        delta1 = np.dot(delta2, self.W2.T) * (1 - np.power(self.a1, 2)) # shape (n_samples, hidden_dim)
        # input layer gradient
        dW1 = np.dot(X.T, delta1) # shape (input_dim, hidden_dim)
        # bias gradient
        db1 = np.sum(delta1, axis=0) # shape (1, hidden_dim)
        # update parameters
        self.W2 -= self.lr * dW2 / m
        self.b2 -= self.lr * db2 / m
        self.W1 -= self.lr * dW1 / m
        self.b1 -= self.lr * db1 / m

    def fit(self, X, y):
        for epoch in range(self.epochs):
            self.forward(X)
            self.backward(X, y)

    def predict(self, X):
        self.forward(X)
        return self.y_hat


Train the model

In [14]:
# Create an instance of the MLP class
mlp = MLP(input_dim=X_train.shape[1], hidden_dim=10, output_dim=1, lr=0.01, epochs=1000)
# Train the model
mlp.fit(X_train, y_train)

Evaluate the model

In [15]:
# Evaluate the model
from sklearn.metrics import mean_squared_error

y_pred = mlp.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

Mean Squared Error: 36.8911071801165


Compare the results with the linear regression model

In [16]:
# Compare the results with the linear regression model
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

Mean Squared Error: 95.97548435337708
