FuzzySystems/Work 3/source/scenario2.m

175 lines
6.2 KiB
Matlab
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

%% Scenario2 (TSK - Superconductivity, High-dimensional)
%
% Assignment 3 in Fuzzy systems
%
% author:
% Christos Choutouridis ΑΕΜ 8997
% cchoutou@ece.auth.gr
%
% Notes:
% - 5-fold CV grid-search over (#features, SC radius) with careful
% no-leakage pipeline (scaling & ReliefF inside each fold)
% - Final training on Train with Validation on Val, Test on Test
% - Diagnostics: CV heatmap, error-vs-rules, error-vs-#features
% - Classic plots: learning curves, pred vs actual, residuals, MFs (subset)
clear; clc; close all;
% Configuration
% --------------------------------
% Grid of hyperparameters
config.feature_grid = [5 8 11 15];
config.radius_grid = [0.25 0.5 0.75 1.0];
config.K = 5; % 5-fold CV
config.Nepochs = 100;
rng(42,'twister');
fprintf('Scenario 2 — Superconduct\n');
fprintf('===============================\n\n');
% Load dataset
fprintf('Loading dataset (superconduct.csv)...\n');
data = load("Datasets/superconduct.csv");
fprintf(' Done: %d samples, %d features + 1 target\n\n', size(data,1), size(data,2)-1);
% Split 60/20/20 and scale using TRAIN stats
fprintf('Splitting dataset into Train/Val/Test [0.6/0.2/0.2] ...\n');
[X_trn, y_trn, X_val_raw, y_val, X_chk_raw, y_chk] = split_data(data, [0.6 0.2 0.2], 42);
fprintf(' Done: Train=%d, Val=%d, Test=%d\n', ...
size(X_trn,1), size(X_val_raw,1), size(X_chk_raw,1));
fprintf('Preprocessing (MinMax scaling to [0,1]) ...\n');
[X_trn_s, X_val_s, X_chk_s, scale_stats] = preprocess_data(X_trn, X_val_raw, X_chk_raw, 1);
fprintf(' Done (mode = %s)\n\n', scale_stats.type);
fprintf('Starting %d×%d grid-search (%d folds per point)...\n', ...
numel(config.feature_grid), numel(config.radius_grid), config.K);
cv_scores = zeros(numel(config.feature_grid), numel(config.radius_grid));
cv_rules = zeros(numel(config.feature_grid), numel(config.radius_grid));
Ntr = size(X_trn_s,1);
folds = cvpartition(Ntr, "KFold", config.K);
for fi = 1:numel(config.feature_grid)
kfeat = config.feature_grid(fi);
for rj = 1:numel(config.radius_grid)
rad = config.radius_grid(rj);
fprintf(' Combo: features=%2d | radius=%.2f ...\n', kfeat, rad);
fold_err = zeros(config.K,1);
fold_rule = zeros(config.K,1);
for k = 1:config.K
fprintf(' Fold %d/%d ... ', k, config.K);
Itr = training(folds, k);
Ivl = test(folds, k);
% Raw (pre-global-scale) slices
x_tr_raw = X_trn(Itr,:); y_tr_f = y_trn(Itr);
x_vl_raw = X_trn(Ivl,:); y_vl_f = y_trn(Ivl);
% Per-fold scaling
[x_tr_f, x_vl_f, ~, ~] = preprocess_data(x_tr_raw, x_vl_raw, x_vl_raw, 1);
% Feature selection (ReliefF)
[idxF, ~] = relieff(x_tr_f, y_tr_f, 10);
kkeep = min(kfeat, size(x_tr_f,2));
sel = idxF(1:kkeep);
x_tr_f = x_tr_f(:, sel);
x_vl_f = x_vl_f(:, sel);
% Init FIS (SC)
gopt = genfisOptions("SubtractiveClustering", ...
"ClusterInfluenceRange", rad);
init_fis = genfis(x_tr_f, y_tr_f, gopt);
% Train (Hybrid)
aopt = anfisOptions("InitialFis", init_fis, ...
"ValidationData", [x_vl_f y_vl_f], ...
"EpochNumber", config.Nepochs, ...
"OptimizationMethod", 1, ...
"DisplayErrorValues", 0, ...
"DisplayStepSize", 0);
[~, ~, ~, vl_fis, vl_err] = anfis([x_tr_f y_tr_f], aopt);
fold_err(k) = min(vl_err);
fold_rule(k) = numel(vl_fis.Rules);
fprintf(' Done (val err=%.4g, rules=%d)\n', fold_err(k), fold_rule(k));
end
cv_scores(fi, rj) = mean(fold_err);
cv_rules(fi, rj) = round(mean(fold_rule));
fprintf(' Mean CV error=%.4g | Mean rules=%d\n\n', ...
cv_scores(fi,rj), cv_rules(fi,rj));
end
end
fprintf('Grid-search completed.\n');
% Pick best hyper-parameters
[minErr, ix] = min(cv_scores(:));
[fi_best, rj_best] = ind2sub(size(cv_scores), ix);
best_feats = config.feature_grid(fi_best);
best_rad = config.radius_grid(rj_best);
fprintf('\n');
fprintf(' Best combo found: features=%d, radius=%.2f\n', best_feats, best_rad);
fprintf(' Mean CV error : %.4g\n', minErr);
fprintf('===========================================\n\n');
% Final training phase
fprintf('Final training using best hyper-parameters...\n');
[idx_all, ~] = relieff(X_trn_s, y_trn, 10);
kkeep = min(best_feats, size(X_trn_s,2));
sel_final = idx_all(1:kkeep);
XtrF = X_trn_s(:, sel_final);
XvlF = X_val_s(:, sel_final);
XteF = X_chk_s(:, sel_final);
fprintf(' Selected %d features (ReliefF top indices)\n', kkeep);
fprintf(' Building initial FIS (SC radius=%.2f)...\n', best_rad);
gopt = genfisOptions("SubtractiveClustering", "ClusterInfluenceRange", best_rad);
init_fis = genfis(XtrF, y_trn, gopt);
fprintf(' Initial FIS created with %d rules\n', numel(init_fis.Rules));
aopt = anfisOptions("InitialFis", init_fis, ...
"ValidationData", [XvlF y_val], ...
"EpochNumber", config.Nepochs, ...
"OptimizationMethod", 1, ...
"DisplayErrorValues", 1, ...
"DisplayStepSize", 0);
fprintf('Training ANFIS (Hybrid optimization)...\n');
[trn_fis, trn_error, ~, fin_fis, val_error] = anfis([XtrF y_trn], aopt);
fprintf(' Done. Final FIS has %d rules.\n\n', numel(fin_fis.Rules));
% Evaluation on TEST
fprintf('Evaluating on Test set ...\n');
y_hat = evalfis(fin_fis, XteF);
[mse, rmse, r2, nmse, ndei] = evaluate(y_hat, y_chk);
fprintf('\n');
fprintf(' FINAL MODEL PERFORMANCE (Test set)\n');
fprintf('-------------------------------------------\n');
fprintf(' MSE : %g\n', mse);
fprintf(' RMSE: %g\n', rmse);
fprintf(' R2 : %g\n', r2);
fprintf(' NMSE: %g\n', nmse);
fprintf(' NDEI: %g\n', ndei);
fprintf('\n');
% Plots
fprintf('Generating diagnostic plots ...\n');
plot_results2( ...
init_fis, fin_fis, trn_error, val_error, y_chk, y_hat, ...
config.feature_grid, config.radius_grid, cv_scores, cv_rules, sel_final ...
);
fprintf(' All plots saved in ./figures_scn2\n');