175 lines
6.2 KiB
Matlab
175 lines
6.2 KiB
Matlab
%% Scenario2 (TSK - Superconductivity, High-dimensional)
|
||
%
|
||
% Assignment 3 in Fuzzy systems
|
||
%
|
||
% author:
|
||
% Christos Choutouridis ΑΕΜ 8997
|
||
% cchoutou@ece.auth.gr
|
||
%
|
||
% Notes:
|
||
% - 5-fold CV grid-search over (#features, SC radius) with careful
|
||
% no-leakage pipeline (scaling & ReliefF inside each fold)
|
||
% - Final training on Train with Validation on Val, Test on Test
|
||
% - Diagnostics: CV heatmap, error-vs-rules, error-vs-#features
|
||
% - Classic plots: learning curves, pred vs actual, residuals, MFs (subset)
|
||
|
||
clear; clc; close all;
|
||
|
||
% Configuration
|
||
% --------------------------------
|
||
% Grid of hyperparameters
|
||
config.feature_grid = [5 8 11 15];
|
||
config.radius_grid = [0.25 0.5 0.75 1.0];
|
||
config.K = 5; % 5-fold CV
|
||
config.Nepochs = 100;
|
||
|
||
rng(42,'twister');
|
||
|
||
fprintf('Scenario 2 — Superconduct\n');
|
||
fprintf('===============================\n\n');
|
||
|
||
% Load dataset
|
||
fprintf('Loading dataset (superconduct.csv)...\n');
|
||
data = load("Datasets/superconduct.csv");
|
||
fprintf(' Done: %d samples, %d features + 1 target\n\n', size(data,1), size(data,2)-1);
|
||
|
||
% Split 60/20/20 and scale using TRAIN stats
|
||
fprintf('Splitting dataset into Train/Val/Test [0.6/0.2/0.2] ...\n');
|
||
[X_trn, y_trn, X_val_raw, y_val, X_chk_raw, y_chk] = split_data(data, [0.6 0.2 0.2], 42);
|
||
fprintf(' Done: Train=%d, Val=%d, Test=%d\n', ...
|
||
size(X_trn,1), size(X_val_raw,1), size(X_chk_raw,1));
|
||
|
||
fprintf('Preprocessing (Min–Max scaling to [0,1]) ...\n');
|
||
[X_trn_s, X_val_s, X_chk_s, scale_stats] = preprocess_data(X_trn, X_val_raw, X_chk_raw, 1);
|
||
fprintf(' Done (mode = %s)\n\n', scale_stats.type);
|
||
|
||
|
||
|
||
fprintf('Starting %d×%d grid-search (%d folds per point)...\n', ...
|
||
numel(config.feature_grid), numel(config.radius_grid), config.K);
|
||
|
||
cv_scores = zeros(numel(config.feature_grid), numel(config.radius_grid));
|
||
cv_rules = zeros(numel(config.feature_grid), numel(config.radius_grid));
|
||
|
||
Ntr = size(X_trn_s,1);
|
||
folds = cvpartition(Ntr, "KFold", config.K);
|
||
|
||
for fi = 1:numel(config.feature_grid)
|
||
kfeat = config.feature_grid(fi);
|
||
for rj = 1:numel(config.radius_grid)
|
||
rad = config.radius_grid(rj);
|
||
fprintf(' Combo: features=%2d | radius=%.2f ...\n', kfeat, rad);
|
||
|
||
fold_err = zeros(config.K,1);
|
||
fold_rule = zeros(config.K,1);
|
||
|
||
for k = 1:config.K
|
||
fprintf(' Fold %d/%d ... ', k, config.K);
|
||
|
||
Itr = training(folds, k);
|
||
Ivl = test(folds, k);
|
||
|
||
% Raw (pre-global-scale) slices
|
||
x_tr_raw = X_trn(Itr,:); y_tr_f = y_trn(Itr);
|
||
x_vl_raw = X_trn(Ivl,:); y_vl_f = y_trn(Ivl);
|
||
|
||
% Per-fold scaling
|
||
[x_tr_f, x_vl_f, ~, ~] = preprocess_data(x_tr_raw, x_vl_raw, x_vl_raw, 1);
|
||
|
||
% Feature selection (ReliefF)
|
||
[idxF, ~] = relieff(x_tr_f, y_tr_f, 10);
|
||
kkeep = min(kfeat, size(x_tr_f,2));
|
||
sel = idxF(1:kkeep);
|
||
|
||
x_tr_f = x_tr_f(:, sel);
|
||
x_vl_f = x_vl_f(:, sel);
|
||
|
||
% Init FIS (SC)
|
||
gopt = genfisOptions("SubtractiveClustering", ...
|
||
"ClusterInfluenceRange", rad);
|
||
init_fis = genfis(x_tr_f, y_tr_f, gopt);
|
||
|
||
% Train (Hybrid)
|
||
aopt = anfisOptions("InitialFis", init_fis, ...
|
||
"ValidationData", [x_vl_f y_vl_f], ...
|
||
"EpochNumber", config.Nepochs, ...
|
||
"OptimizationMethod", 1, ...
|
||
"DisplayErrorValues", 0, ...
|
||
"DisplayStepSize", 0);
|
||
|
||
[~, ~, ~, vl_fis, vl_err] = anfis([x_tr_f y_tr_f], aopt);
|
||
|
||
fold_err(k) = min(vl_err);
|
||
fold_rule(k) = numel(vl_fis.Rules);
|
||
fprintf(' Done (val err=%.4g, rules=%d)\n', fold_err(k), fold_rule(k));
|
||
end
|
||
|
||
cv_scores(fi, rj) = mean(fold_err);
|
||
cv_rules(fi, rj) = round(mean(fold_rule));
|
||
fprintf(' Mean CV error=%.4g | Mean rules=%d\n\n', ...
|
||
cv_scores(fi,rj), cv_rules(fi,rj));
|
||
end
|
||
end
|
||
|
||
fprintf('Grid-search completed.\n');
|
||
|
||
% Pick best hyper-parameters
|
||
[minErr, ix] = min(cv_scores(:));
|
||
[fi_best, rj_best] = ind2sub(size(cv_scores), ix);
|
||
best_feats = config.feature_grid(fi_best);
|
||
best_rad = config.radius_grid(rj_best);
|
||
fprintf('\n');
|
||
fprintf(' Best combo found: features=%d, radius=%.2f\n', best_feats, best_rad);
|
||
fprintf(' Mean CV error : %.4g\n', minErr);
|
||
fprintf('===========================================\n\n');
|
||
|
||
% Final training phase
|
||
fprintf('Final training using best hyper-parameters...\n');
|
||
[idx_all, ~] = relieff(X_trn_s, y_trn, 10);
|
||
kkeep = min(best_feats, size(X_trn_s,2));
|
||
sel_final = idx_all(1:kkeep);
|
||
|
||
XtrF = X_trn_s(:, sel_final);
|
||
XvlF = X_val_s(:, sel_final);
|
||
XteF = X_chk_s(:, sel_final);
|
||
|
||
fprintf(' Selected %d features (ReliefF top indices)\n', kkeep);
|
||
fprintf(' Building initial FIS (SC radius=%.2f)...\n', best_rad);
|
||
|
||
gopt = genfisOptions("SubtractiveClustering", "ClusterInfluenceRange", best_rad);
|
||
init_fis = genfis(XtrF, y_trn, gopt);
|
||
fprintf(' Initial FIS created with %d rules\n', numel(init_fis.Rules));
|
||
|
||
aopt = anfisOptions("InitialFis", init_fis, ...
|
||
"ValidationData", [XvlF y_val], ...
|
||
"EpochNumber", config.Nepochs, ...
|
||
"OptimizationMethod", 1, ...
|
||
"DisplayErrorValues", 1, ...
|
||
"DisplayStepSize", 0);
|
||
|
||
fprintf('Training ANFIS (Hybrid optimization)...\n');
|
||
[trn_fis, trn_error, ~, fin_fis, val_error] = anfis([XtrF y_trn], aopt);
|
||
fprintf(' Done. Final FIS has %d rules.\n\n', numel(fin_fis.Rules));
|
||
|
||
% Evaluation on TEST
|
||
fprintf('Evaluating on Test set ...\n');
|
||
y_hat = evalfis(fin_fis, XteF);
|
||
[mse, rmse, r2, nmse, ndei] = evaluate(y_hat, y_chk);
|
||
|
||
fprintf('\n');
|
||
fprintf(' FINAL MODEL PERFORMANCE (Test set)\n');
|
||
fprintf('-------------------------------------------\n');
|
||
fprintf(' MSE : %g\n', mse);
|
||
fprintf(' RMSE: %g\n', rmse);
|
||
fprintf(' R2 : %g\n', r2);
|
||
fprintf(' NMSE: %g\n', nmse);
|
||
fprintf(' NDEI: %g\n', ndei);
|
||
fprintf('\n');
|
||
|
||
% Plots
|
||
fprintf('Generating diagnostic plots ...\n');
|
||
plot_results2( ...
|
||
init_fis, fin_fis, trn_error, val_error, y_chk, y_hat, ...
|
||
config.feature_grid, config.radius_grid, cv_scores, cv_rules, sel_final ...
|
||
);
|
||
fprintf(' All plots saved in ./figures_scn2\n'); |