Regression examples

Wine quality

We use the wine quality dataset from http://archive.ics.uci.edu/.

Example of parameter selection and cross-validation using GTM regression (GTR) and SVM classification (SVR):

from ugtm import eGTR
import numpy as np
from numpy import sqrt
from sklearn import model_selection
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyRegressor
import pandas as pd

# Load red wine data
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url,sep=";")
y = data['quality']
X = data.drop(labels='quality',axis=1)


X_train, X_test, y_train, y_test = model_selection.train_test_split(
X, y, test_size=0.10, random_state=42, shuffle=True, random_state=42)


std = StandardScaler().fit(X_train)
X_train = std.transform(X_train)
X_test = std.transform(X_test)

performances = {}

# GTM classifier (GTR), bayesian

tuned_params = {'regul': [0.0001, 0.001, 0.01, 0.1, 1],
        's': [0.1, 0.2, 0.3],
        'k': [25],
        'm': [5]}

gs = model_selection.GridSearchCV(eGTR(), tuned_params, cv=3, iid=False, scoring='neg_mean_squared_error')

gs.fit(X_train, y_train)

# Returns best score and best parameters
print(gs.best_score_)
print(gs.best_params_)

# Test data using model built with best parameters
y_true, y_pred = y_test, gs.predict(X_test)

# Record performance on test set (RMSE)
performances['gtr'] = sqrt(mean_squared_error(y_true, y_pred))

# SVM regressor (SVR)

tuned_params = {'C':[1,10,100,1000],
        'gamma':[1,0.1,0.001,0.0001],
        'kernel':['rbf']}

gs = model_selection.GridSearchCV(SVR(), tuned_params, cv=3, iid=False, scoring='neg_mean_squared_error')

gs.fit(X_train, y_train)

# Returns best score and best parameters
print(gs.best_score_)
print(gs.best_params_)

# Test data using model built with best parameters
y_true, y_pred = y_test, gs.predict(X_test)

# Record performance on test set
performances['svm'] = sqrt(mean_squared_error(y_test, y_pred))

# Create a dummy regressor
dummy = DummyRegressor(strategy='mean')

# Train dummy regressor
dummy.fit(X_train, y_train)
y_true, y_pred = y_test, dummy.predict(X_test)

# Dummy performance
performances['dummy'] = sqrt(mean_squared_error(y_test, y_pred))