FuzzySystems/Work 4/source/scenario1.m

223 lines
7.5 KiB
Matlab
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman)
% TSK classification with Subtractive Clustering (SC)
% Modes: (A) class-independent SC, (B) class-dependent SC)
% Uses: split_data, preprocess_data, evaluate_classification, plot_results1
%
% Dataset: ./Datasets/haberman.data
% Columns: [age, op_year, axillary_nodes, class] with class in {1,2}
close all; clear; clc;
% ============================ CONFIGURATION ================================
cfg = struct();
rng(42, 'twister'); % reproducibility
% Data handling
cfg.split = [0.6 0.2 0.2]; % train / val / test (stratified in split_data)
cfg.standardize = true; % z-score features
% SC radii sweep
cfg.radii = [0.20 0.80];
% ANFIS options
cfg.maxEpochs = 100;
cfg.errorGoal = 0;
cfg.initialStep = 0.01;
cfg.stepDecrease = 0.9;
cfg.stepIncrease = 1.1;
cfg.displayANFIS = 0; % quiet
% Modes
cfg.modes = {'class-independent','class-dependent'};
% Output
cfg.outDir = 'figures_scn1';
if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end
% =============================== DATA =====================================
dataPath = './Datasets/haberman.data';
assert(isfile(dataPath), 'Dataset not found at: %s', dataPath);
%
raw = load(dataPath);
assert(size(raw,2) == 4, 'Expected 4 columns in haberman.data');
X = raw(:,1:3);
Y = raw(:,4);
Y = Y(:);
classLabels = unique(Y);
minLabel = min(classLabels); maxLabel = max(classLabels);
% =========================== SPLIT & PREPROCESS ===========================
[trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split);
if cfg.standardize
[trainX, mu, sigma] = preprocess_data(trainX);
valX = preprocess_data(valX, mu, sigma);
testX = preprocess_data(testX, mu, sigma);
else
mu = []; sigma = [];
end
% For manual sugfis construction
inRanges = [min(trainX,[],1); max(trainX,[],1)];
% ============================== TRAINING ==================================
results = []; runId = 0;
for m = 1:numel(cfg.modes)
modeName = cfg.modes{m};
for r = 1:numel(cfg.radii)
radius = cfg.radii(r);
runId = runId + 1;
fprintf('\n=== Run %d: mode=%s, radius=%.2f ===\n', runId, modeName, radius);
% ----- Initial FIS -----
switch modeName
case 'class-independent'
% Use new-style API like your colleague
opt = genfisOptions('SubtractiveClustering', ...
'ClusterInfluenceRange', radius);
initFis = genfis(trainX, double(trainY), opt);
% genfis(Subtractive) already builds Sugeno with constant consequents.
case 'class-dependent'
% Our custom builder (fixes colleague's bug: feed only features to subclust)
initFis = build_classdep_fis(trainX, trainY, classLabels, radius, inRanges);
otherwise
error('Unknown mode: %s', modeName);
end
% ----- ANFIS training -----
trData = [trainX double(trainY)];
ckData = [valX double(valY)];
anfisOpts = [cfg.maxEpochs cfg.errorGoal cfg.initialStep cfg.stepDecrease cfg.stepIncrease];
if cfg.displayANFIS
[fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [], ckData);
else
[fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [0 0 0 0], ckData);
end
% ----- Evaluate on test set -----
yhat_cont = evalfis(testX, fisTrained);
yhat = round(yhat_cont);
% clip into valid label range (important for small rulebases)
yhat(yhat < minLabel) = minLabel;
yhat(yhat > maxLabel) = maxLabel;
% Metrics (note: our evaluate expects (yTrue, yPred))
R = evaluate_classification(testY, yhat, classLabels);
% Collect
res = struct();
res.runId = runId;
res.mode = modeName;
res.radius = radius;
res.fis = fisTrained;
res.nRules = numel(fisTrained.rule);
res.metrics = R;
res.trError = trError;
res.ckError = ckError;
res.mu = mu;
res.sigma = sigma;
res.initFis = initFis;
res.yhat = yhat;
res.ytrue = testY;
results = [results; res];
fprintf('Rules: %d | OA: %.2f%% | Kappa: %.3f\n', res.nRules, 100*R.OA, R.Kappa);
end
end
% ============================== PLOTTING ==================================
plot_results1(results, classLabels, cfg);
% =============================== SAVE ALL =================================
save('results_scn1.mat','results','cfg','classLabels','mu','sigma', ...
'trainX','valX','testX','trainY','valY','testY');
fprintf('\nDone. Figures saved in: %s\n', cfg.outDir);
% ============================ LOCAL FUNCTIONS =============================
function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges)
% BUILD_CLASSDEP_FIS — class-dependent SC for Sugeno FIS (ANFIS-ready)
% Creates ONE constant output MF PER RULE (required by ANFIS).
% Runs SUBCLUST on FEATURES ONLY for each class.
D = size(X,2);
fis = sugfis('Name','TSK_ClassDependent');
% Inputs with ranges from training data
for d = 1:D
fis = addInput(fis, [inRanges(1,d) inRanges(2,d)], 'Name', sprintf('x%d', d));
end
% Single scalar output y (range just spans label space)
outRange = [min(classLabels) max(classLabels)];
fis = addOutput(fis, outRange, 'Name', 'y');
ruleList = [];
% Build rules class-by-class
for k = 1:numel(classLabels)
c = classLabels(k);
Xi = X(Y==c, :);
if isempty(Xi), continue; end
% Subtractive clustering on class features
[centers, sigmas] = subclust(Xi, radius);
nCl = size(centers,1);
% ---- robust sigma broadcasting to M×D ----
if isscalar(sigmas)
S = repmat(sigmas, nCl, D);
elseif size(sigmas,1) == 1 && size(sigmas,2) == D
S = repmat(sigmas, nCl, 1);
elseif size(sigmas,1) == nCl && size(sigmas,2) == D
S = sigmas;
else
S = repmat(0.5*(inRanges(2,:)-inRanges(1,:)), nCl, 1);
end
% -----------------------------------------
% For each cluster: add one Gaussian MF per input, one constant MF for output,
% and one rule that ties those together (AND=prod).
for i = 1:nCl
antecedentIdx = zeros(1,D);
% Add input MFs for this cluster (and remember their indices)
for d = 1:D
mfName = sprintf('c%d_r%d_x%d', c, i, d);
params = [S(i,d) centers(i,d)]; % [sigma center]
fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName);
antecedentIdx(d) = numel(fis.Inputs(d).MembershipFunctions);
end
% Add ONE output MF (constant) for THIS rule (ANFIS requirement)
outMfName = sprintf('const_c%d_r%d', c, i);
fis = addMF(fis, 'y', 'constant', double(c), 'Name', outMfName);
outIdx = numel(fis.Outputs(1).MembershipFunctions);
% Rule row: [inMFs outMF weight AND=1]
rule = [antecedentIdx, outIdx, 1, 1];
ruleList = [ruleList; rule]; %#ok<AGROW>
end
end
if ~isempty(ruleList)
fis = addRule(fis, ruleList);
end
% Standard TSK operators
fis.AndMethod = 'prod';
fis.OrMethod = 'probor';
fis.ImplicationMethod = 'prod';
fis.AggregationMethod = 'sum';
fis.DefuzzificationMethod = 'wtaver';
end