223 lines
7.5 KiB
Matlab
223 lines
7.5 KiB
Matlab
% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman)
|
||
% TSK classification with Subtractive Clustering (SC)
|
||
% Modes: (A) class-independent SC, (B) class-dependent SC)
|
||
% Uses: split_data, preprocess_data, evaluate_classification, plot_results1
|
||
%
|
||
% Dataset: ./Datasets/haberman.data
|
||
% Columns: [age, op_year, axillary_nodes, class] with class in {1,2}
|
||
|
||
close all; clear; clc;
|
||
|
||
% ============================ CONFIGURATION ================================
|
||
cfg = struct();
|
||
rng(42, 'twister'); % reproducibility
|
||
|
||
% Data handling
|
||
cfg.split = [0.6 0.2 0.2]; % train / val / test (stratified in split_data)
|
||
cfg.standardize = true; % z-score features
|
||
|
||
% SC radii sweep
|
||
cfg.radii = [0.20 0.80];
|
||
|
||
% ANFIS options
|
||
cfg.maxEpochs = 100;
|
||
cfg.errorGoal = 0;
|
||
cfg.initialStep = 0.01;
|
||
cfg.stepDecrease = 0.9;
|
||
cfg.stepIncrease = 1.1;
|
||
cfg.displayANFIS = 0; % quiet
|
||
|
||
% Modes
|
||
cfg.modes = {'class-independent','class-dependent'};
|
||
|
||
% Output
|
||
cfg.outDir = 'figures_scn1';
|
||
if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end
|
||
|
||
% =============================== DATA =====================================
|
||
dataPath = './Datasets/haberman.data';
|
||
assert(isfile(dataPath), 'Dataset not found at: %s', dataPath);
|
||
|
||
%
|
||
raw = load(dataPath);
|
||
assert(size(raw,2) == 4, 'Expected 4 columns in haberman.data');
|
||
|
||
X = raw(:,1:3);
|
||
Y = raw(:,4);
|
||
Y = Y(:);
|
||
|
||
classLabels = unique(Y);
|
||
minLabel = min(classLabels); maxLabel = max(classLabels);
|
||
|
||
% =========================== SPLIT & PREPROCESS ===========================
|
||
[trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split);
|
||
|
||
if cfg.standardize
|
||
[trainX, mu, sigma] = preprocess_data(trainX);
|
||
valX = preprocess_data(valX, mu, sigma);
|
||
testX = preprocess_data(testX, mu, sigma);
|
||
else
|
||
mu = []; sigma = [];
|
||
end
|
||
|
||
% For manual sugfis construction
|
||
inRanges = [min(trainX,[],1); max(trainX,[],1)];
|
||
|
||
% ============================== TRAINING ==================================
|
||
results = []; runId = 0;
|
||
|
||
for m = 1:numel(cfg.modes)
|
||
modeName = cfg.modes{m};
|
||
|
||
for r = 1:numel(cfg.radii)
|
||
radius = cfg.radii(r);
|
||
runId = runId + 1;
|
||
fprintf('\n=== Run %d: mode=%s, radius=%.2f ===\n', runId, modeName, radius);
|
||
|
||
% ----- Initial FIS -----
|
||
switch modeName
|
||
case 'class-independent'
|
||
% Use new-style API like your colleague
|
||
opt = genfisOptions('SubtractiveClustering', ...
|
||
'ClusterInfluenceRange', radius);
|
||
initFis = genfis(trainX, double(trainY), opt);
|
||
% genfis(Subtractive) already builds Sugeno with constant consequents.
|
||
|
||
case 'class-dependent'
|
||
% Our custom builder (fixes colleague's bug: feed only features to subclust)
|
||
initFis = build_classdep_fis(trainX, trainY, classLabels, radius, inRanges);
|
||
|
||
otherwise
|
||
error('Unknown mode: %s', modeName);
|
||
end
|
||
|
||
% ----- ANFIS training -----
|
||
trData = [trainX double(trainY)];
|
||
ckData = [valX double(valY)];
|
||
anfisOpts = [cfg.maxEpochs cfg.errorGoal cfg.initialStep cfg.stepDecrease cfg.stepIncrease];
|
||
|
||
if cfg.displayANFIS
|
||
[fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [], ckData);
|
||
else
|
||
[fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [0 0 0 0], ckData);
|
||
end
|
||
|
||
% ----- Evaluate on test set -----
|
||
yhat_cont = evalfis(testX, fisTrained);
|
||
yhat = round(yhat_cont);
|
||
% clip into valid label range (important for small rulebases)
|
||
yhat(yhat < minLabel) = minLabel;
|
||
yhat(yhat > maxLabel) = maxLabel;
|
||
|
||
% Metrics (note: our evaluate expects (yTrue, yPred))
|
||
R = evaluate_classification(testY, yhat, classLabels);
|
||
|
||
% Collect
|
||
res = struct();
|
||
res.runId = runId;
|
||
res.mode = modeName;
|
||
res.radius = radius;
|
||
res.fis = fisTrained;
|
||
res.nRules = numel(fisTrained.rule);
|
||
res.metrics = R;
|
||
res.trError = trError;
|
||
res.ckError = ckError;
|
||
res.mu = mu;
|
||
res.sigma = sigma;
|
||
res.initFis = initFis;
|
||
res.yhat = yhat;
|
||
res.ytrue = testY;
|
||
results = [results; res];
|
||
|
||
fprintf('Rules: %d | OA: %.2f%% | Kappa: %.3f\n', res.nRules, 100*R.OA, R.Kappa);
|
||
end
|
||
end
|
||
|
||
% ============================== PLOTTING ==================================
|
||
plot_results1(results, classLabels, cfg);
|
||
|
||
% =============================== SAVE ALL =================================
|
||
save('results_scn1.mat','results','cfg','classLabels','mu','sigma', ...
|
||
'trainX','valX','testX','trainY','valY','testY');
|
||
|
||
fprintf('\nDone. Figures saved in: %s\n', cfg.outDir);
|
||
|
||
% ============================ LOCAL FUNCTIONS =============================
|
||
function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges)
|
||
% BUILD_CLASSDEP_FIS — class-dependent SC for Sugeno FIS (ANFIS-ready)
|
||
% Creates ONE constant output MF PER RULE (required by ANFIS).
|
||
% Runs SUBCLUST on FEATURES ONLY for each class.
|
||
|
||
D = size(X,2);
|
||
fis = sugfis('Name','TSK_ClassDependent');
|
||
|
||
% Inputs with ranges from training data
|
||
for d = 1:D
|
||
fis = addInput(fis, [inRanges(1,d) inRanges(2,d)], 'Name', sprintf('x%d', d));
|
||
end
|
||
|
||
% Single scalar output y (range just spans label space)
|
||
outRange = [min(classLabels) max(classLabels)];
|
||
fis = addOutput(fis, outRange, 'Name', 'y');
|
||
|
||
ruleList = [];
|
||
|
||
% Build rules class-by-class
|
||
for k = 1:numel(classLabels)
|
||
c = classLabels(k);
|
||
Xi = X(Y==c, :);
|
||
if isempty(Xi), continue; end
|
||
|
||
% Subtractive clustering on class features
|
||
[centers, sigmas] = subclust(Xi, radius);
|
||
nCl = size(centers,1);
|
||
|
||
% ---- robust sigma broadcasting to M×D ----
|
||
if isscalar(sigmas)
|
||
S = repmat(sigmas, nCl, D);
|
||
elseif size(sigmas,1) == 1 && size(sigmas,2) == D
|
||
S = repmat(sigmas, nCl, 1);
|
||
elseif size(sigmas,1) == nCl && size(sigmas,2) == D
|
||
S = sigmas;
|
||
else
|
||
S = repmat(0.5*(inRanges(2,:)-inRanges(1,:)), nCl, 1);
|
||
end
|
||
% -----------------------------------------
|
||
|
||
% For each cluster: add one Gaussian MF per input, one constant MF for output,
|
||
% and one rule that ties those together (AND=prod).
|
||
for i = 1:nCl
|
||
antecedentIdx = zeros(1,D);
|
||
|
||
% Add input MFs for this cluster (and remember their indices)
|
||
for d = 1:D
|
||
mfName = sprintf('c%d_r%d_x%d', c, i, d);
|
||
params = [S(i,d) centers(i,d)]; % [sigma center]
|
||
fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName);
|
||
antecedentIdx(d) = numel(fis.Inputs(d).MembershipFunctions);
|
||
end
|
||
|
||
% Add ONE output MF (constant) for THIS rule (ANFIS requirement)
|
||
outMfName = sprintf('const_c%d_r%d', c, i);
|
||
fis = addMF(fis, 'y', 'constant', double(c), 'Name', outMfName);
|
||
outIdx = numel(fis.Outputs(1).MembershipFunctions);
|
||
|
||
% Rule row: [inMFs outMF weight AND=1]
|
||
rule = [antecedentIdx, outIdx, 1, 1];
|
||
ruleList = [ruleList; rule]; %#ok<AGROW>
|
||
end
|
||
end
|
||
|
||
if ~isempty(ruleList)
|
||
fis = addRule(fis, ruleList);
|
||
end
|
||
|
||
% Standard TSK operators
|
||
fis.AndMethod = 'prod';
|
||
fis.OrMethod = 'probor';
|
||
fis.ImplicationMethod = 'prod';
|
||
fis.AggregationMethod = 'sum';
|
||
fis.DefuzzificationMethod = 'wtaver';
|
||
end
|
||
|