%% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman) % TSK classification with Subtractive Clustering (SC) % Modes: (A) class-independent SC, % (B) class-dependent SC) % Uses: split_data, preprocess_data, evaluate_classification, plot_results1 % % Dataset: ./Datasets/haberman.data % Columns: [age, op_year, axillary_nodes, class] with class in {1,2} % % Assignment 4 in Fuzzy systems % % author: % Christos Choutouridis ΑΕΜ 8997 % cchoutou@ece.auth.gr % close all; clear; clc; % CONFIGURATION % ================================ cfg = struct(); rng(42, 'twister'); % reproducibility % Data handling cfg.split = [0.6 0.2 0.2]; % train / val / test (stratified in split_data) cfg.standardize = true; % z-score features % SC radii sweep cfg.radii = [0.20 0.80]; % ANFIS options cfg.maxEpochs = 100; cfg.errorGoal = 0; cfg.initialStep = 0.01; cfg.stepDecrease = 0.9; cfg.stepIncrease = 1.1; cfg.displayANFIS = 0; % quiet % Modes cfg.modes = {'class-independent','class-dependent'}; % Output cfg.outDir = 'figures_scn1'; if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end % DATA dataPath = './Datasets/haberman.data'; assert(isfile(dataPath), 'Dataset not found at: %s', dataPath); % raw = load(dataPath); assert(size(raw,2) == 4, 'Expected 4 columns in haberman.data'); X = raw(:,1:3); Y = raw(:,4); Y = Y(:); classLabels = unique(Y); minLabel = min(classLabels); maxLabel = max(classLabels); % SPLIT & PREPROCESS [trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split); if cfg.standardize [trainX, mu, sigma] = preprocess_data(trainX); valX = preprocess_data(valX, mu, sigma); testX = preprocess_data(testX, mu, sigma); else mu = []; sigma = []; end % For manual sugfis construction inRanges = [min(trainX,[],1); max(trainX,[],1)]; % TRAINING results = []; runId = 0; for m = 1:numel(cfg.modes) modeName = cfg.modes{m}; for r = 1:numel(cfg.radii) radius = cfg.radii(r); runId = runId + 1; fprintf('\n=== Run %d: mode=%s, radius=%.2f ===\n', runId, modeName, radius); % Initial FIS switch modeName case 'class-independent' opt = genfisOptions('SubtractiveClustering', ... 'ClusterInfluenceRange', radius); initFis = genfis(trainX, double(trainY), opt); % genfis(Subtractive) already builds Sugeno with constant consequents. case 'class-dependent' % Our custom builder initFis = build_classdep_fis(trainX, trainY, classLabels, radius, inRanges); otherwise error('Unknown mode: %s', modeName); end % ANFIS training trData = [trainX double(trainY)]; ckData = [valX double(valY)]; anfisOpts = [cfg.maxEpochs cfg.errorGoal cfg.initialStep cfg.stepDecrease cfg.stepIncrease]; if cfg.displayANFIS [fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [], ckData); else [fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [0 0 0 0], ckData); end % Evaluate on test set yhat_cont = evalfis(testX, fisTrained); yhat = round(yhat_cont); % clip into valid label range (important for small rulebases) yhat(yhat < minLabel) = minLabel; yhat(yhat > maxLabel) = maxLabel; % Metrics (note: our evaluate expects (yTrue, yPred)) R = evaluate_classification(testY, yhat, classLabels); % Collect res = struct(); res.runId = runId; res.mode = modeName; res.radius = radius; res.fis = fisTrained; res.nRules = numel(fisTrained.rule); res.metrics = R; res.trError = trError; res.ckError = ckError; res.mu = mu; res.sigma = sigma; res.initFis = initFis; res.yhat = yhat; res.ytrue = testY; results = [results; res]; fprintf('Rules: %d | OA: %.2f%% | Kappa: %.3f\n', res.nRules, 100*R.OA, R.Kappa); end end % PLOTTING plot_results1(results, classLabels, cfg); % SAVE ALL save('results_scn1.mat','results','cfg','classLabels','mu','sigma', ... 'trainX','valX','testX','trainY','valY','testY'); fprintf('\nDone. Figures saved in: %s\n', cfg.outDir); % LOCAL FUNCTIONS % =================================================== function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges) % BUILD_CLASSDEP_FIS — class-dependent SC for Sugeno FIS (ANFIS-ready) % Creates ONE constant output MF PER RULE (required by ANFIS). % Runs SUBCLUST on FEATURES ONLY for each class. D = size(X, 2); fis = sugfis('Name','TSK_ClassDependent'); % Inputs with ranges from training data for d = 1:D fis = addInput(fis, [inRanges(1,d) inRanges(2,d)], 'Name', sprintf('x%d', d)); end % Single scalar output y (range just spans label space) outRange = [min(classLabels) max(classLabels)]; fis = addOutput(fis, outRange, 'Name', 'y'); ruleList = []; % Build rules class-by-class for k = 1:numel(classLabels) c = classLabels(k); Xi = X(Y==c, :); if isempty(Xi), continue; end % Subtractive clustering on class features [centers, sigmas] = subclust(Xi, radius); nCl = size(centers,1); % robust sigma broadcasting to M×D if isscalar(sigmas) S = repmat(sigmas, nCl, D); elseif size(sigmas,1) == 1 && size(sigmas,2) == D S = repmat(sigmas, nCl, 1); elseif size(sigmas,1) == nCl && size(sigmas,2) == D S = sigmas; else S = repmat(0.5*(inRanges(2,:)-inRanges(1,:)), nCl, 1); end % ----------------------------------------- % For each cluster: add one Gaussian MF per input, one constant MF for output, % and one rule that ties those together (AND=prod). for i = 1:nCl antecedentIdx = zeros(1,D); % Add input MFs for this cluster (and remember their indices) for d = 1:D mfName = sprintf('c%d_r%d_x%d', c, i, d); params = [S(i, d) centers(i, d)]; % [sigma center] fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName); antecedentIdx(d) = numel(fis.Inputs(d).MembershipFunctions); end % Add ONE output MF (constant) for THIS rule (ANFIS requirement) outMfName = sprintf('const_c%d_r%d', c, i); fis = addMF(fis, 'y', 'constant', double(c), 'Name', outMfName); outIdx = numel(fis.Outputs(1).MembershipFunctions); % Rule row: [inMFs outMF weight AND=1] rule = [antecedentIdx, outIdx, 1, 1]; ruleList = [ruleList; rule]; %#ok end end if ~isempty(ruleList) fis = addRule(fis, ruleList); end % Standard TSK operators fis.AndMethod = 'prod'; fis.OrMethod = 'probor'; fis.ImplicationMethod = 'prod'; fis.AggregationMethod = 'sum'; fis.DefuzzificationMethod = 'wtaver'; end