HW4: Scenario 2 source and report added
BIN
Work 1/FuzzySystems_HW1_Choutouridis_8997.zip
Normal file
BIN
Work 2/FuzzySystems_HW2_Choutouridis_8997.zip
Normal file
BIN
Work 3/FuzzySystems_HW3_Choutouridis_8997.zip
Normal file
@ -254,7 +254,149 @@ x_i' = \frac{x_i - \mu_i}{\sigma_i}
|
||||
\end{itemize}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
% =====================================================================
|
||||
\subsection{Επίλογος}
|
||||
Αναπτύξαμε
|
||||
\section{Σενάριο 2 — Dataset με υψηλή διαστασιμότητα (Epileptic Seizure Recognition)}
|
||||
\label{sec:scenario2}
|
||||
|
||||
\subsection{Περιγραφή και ζητούμενο}
|
||||
Το δεύτερο σενάριο στοχεύει στην ταξινόμηση εγκεφαλικών σημάτων \textit{EEG} σε πέντε κατηγορίες, στο πλαίσιο του προβλήματος \emph{Epileptic Seizure Recognition}.
|
||||
Σε αντίθεση με το απλούστερο πρόβλημα του Σενάριου~1, εδώ οι διαστάσεις των δεδομένων είναι υψηλές και οι κλάσεις πολυπληθείς, καθιστώντας το έργο αξιολόγησης σημαντικά πιο απαιτητικό.
|
||||
|
||||
Ο στόχος είναι η ανάπτυξη ενός TSK–τύπου μοντέλου που συνδυάζει:
|
||||
\begin{itemize}
|
||||
\item επιλογή χαρακτηριστικών με \textbf{ReliefF},
|
||||
\item αρχικοποίηση κανόνων μέσω \textbf{Subtractive Clustering (SC)},
|
||||
\item και εκπαίδευση με \textbf{υβριδική μέθοδο} (\emph{gradient descent} + \emph{least squares}).
|
||||
\end{itemize}
|
||||
Η διαδικασία επαναλαμβάνεται για διάφορους συνδυασμούς ακτίνας συσσωμάτωσης $r_a$ και αριθμού χαρακτηριστικών που κρατούνται μετά το ReliefF, με στόχο τον εντοπισμό του συνδυασμού που προσφέρει τον βέλτιστο συμβιβασμό μεταξύ ακρίβειας και πολυπλοκότητας (\#Rules).
|
||||
|
||||
\subsection{Προσέγγιση και μεθοδολογία}
|
||||
Για την επιλογή των υπερπαραμέτρων χρησιμοποιήθηκε \textbf{k–fold cross–validation}, ενώ η συνολική διαδικασία αυτοματοποιήθηκε πλήρως μέσα από το script \texttt{scenario2.m}.
|
||||
Η ροή έχει ως εξής:
|
||||
\begin{enumerate}
|
||||
\item Διαβάζονται τα δεδομένα από το αρχείο \texttt{epileptic\_seizure\_data.csv} και κατανέμονται σε train-validation-test σε ποσοστό $60$–$20$–$20$.
|
||||
\item Για κάθε συνδυασμό τιμών \texttt{feature\_grid} και \texttt{radii\_grid}, εκτελείται η διαδικασία k-fold CV.
|
||||
Σε κάθε fold εφαρμόζεται το ReliefF ώστε να επιλεγούν τα πιο σημαντικά χαρακτηριστικά, και στη συνέχεια πραγματοποιείται Subtractive Clustering ανά κλάση (class–dependent).
|
||||
\item Κατασκευάζεται το αρχικό FIS με μία Gaussian MF ανά cluster και constant εξόδους (ένας MF ανά κανόνα).
|
||||
\item Η εκπαίδευση γίνεται όλες τις εποχές μέσω \texttt{anfis()}, με validation set για έλεγχο γενίκευσης.
|
||||
\item Μετά από κάθε fold υπολογίζεται ο Cohen’s $\kappa$ και ο αριθμός κανόνων, ώστε να εξαχθεί ο μέσος όρος ανά συνδυασμό παραμέτρων.
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
|
||||
\paragraph{Περιορισμοί εκτέλεσης.}
|
||||
Δυστυχώς, μέχρι της παρούσης, δεν καταφέραμε να τρέξουμε το σενάριο για το μέγιστο αριθμό εποχών ($100$) και με το πλήρες grid σε κανέναν προσωπικό μας υπολογιστή.
|
||||
Παρόλα αυτά, παραθέτουμε τα αποτελέσματα που λάβαμε από ένα μικρότερο πείραμα όπου τρέξαμε για:
|
||||
\begin{verbatim}
|
||||
cfg.feature_grid = [5 8]; % instead of [5 8 11 15]
|
||||
cfg.radii_grid = [0.5 0.75]; % instead of [0.25 0.50 0.75 1.00]
|
||||
cfg.kfold = 3; % instead of 5
|
||||
cfg.maxEpochs = 20; % instead of 100
|
||||
\end{verbatim}
|
||||
Από το grid–search προέκυψε ως \textbf{βέλτιστο μοντέλο} το:
|
||||
\[
|
||||
\text{features}=5, \quad r_a=0.50, \quad \text{rules}=6, \quad \kappa=0.23
|
||||
\]
|
||||
που προσφέρει ικανοποιητική ισορροπία μεταξύ ακρίβειας και απλότητας.
|
||||
|
||||
\subsection{Αποτελέσματα πειράματος}
|
||||
|
||||
\paragraph{Αναζήτηση υπερπαραμέτρων (Grid Search).}
|
||||
Στο Σχήμα~\ref{fig:scn2-grid} φαίνονται τα αποτελέσματα του grid–search.
|
||||
Η μέση τιμή του συντελεστή $\kappa$ παραμένει κοντά στο $0.22$–$0.23$ για όλους τους συνδυασμούς, δείχνοντας ότι το μοντέλο είναι σχετικά σταθερό.
|
||||
Ο αριθμός κανόνων μειώνεται αισθητά με αύξηση του $r_a$, όπως αναμενόταν.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=.49\textwidth]{../source/figures_scn2/cv_kappa_heatmap.png}
|
||||
\includegraphics[width=.49\textwidth]{../source/figures_scn2/cv_rules_heatmap.png}
|
||||
\caption{Grid search: μέση τιμή του Cohen’s $\kappa$ (αριστερά)
|
||||
και μέσος αριθμός κανόνων (δεξιά) για κάθε συνδυασμό παραμέτρων.}
|
||||
\label{fig:scn2-grid}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Απόδοση βέλτιστου μοντέλου.}
|
||||
Η μήτρα σύγχυσης του βέλτιστου μοντέλου φαίνεται στο Σχήμα~\ref{fig:scn2-cm}.
|
||||
Παρατηρείται ότι οι κλάσεις 3 και 4 αναγνωρίζονται με σχετικά υψηλή ακρίβεια, ενώ η Κλάση~1 συγκεντρώνει τις περισσότερες λανθασμένες προβλέψεις.
|
||||
Η συνολική ακρίβεια είναι μέτρια, αλλά ικανοποιητική για το συγκεκριμένο dataset.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=.7\textwidth]{../source/figures_scn2/cm_best_model.png}
|
||||
\caption{Μήτρα σύγχυσης για το βέλτιστο μοντέλο (features$=5$, $r_a=0.50$, rules$=6$).}
|
||||
\label{fig:scn2-cm}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Καμπύλη εκμάθησης.}
|
||||
Η καμπύλη εκμάθησης (Σχήμα~\ref{fig:scn2-learning}) παρουσιάζει σταδιακή μείωση του σφάλματος σε train και validation set, χωρίς σημαντική απόκλιση, γεγονός που δείχνει ότι το μοντέλο γενικεύει καλά και δεν υπερ-προσαρμόζεται.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=.65\textwidth]{../source/figures_scn2/learning_best_model.png}
|
||||
\caption{Καμπύλες εκμάθησης (training \& validation error) για το βέλτιστο μοντέλο.}
|
||||
\label{fig:scn2-learning}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Συναρτήσεις συμμετοχής.}
|
||||
Στο Σχήμα~\ref{fig:scn2-mfs} απεικονίζονται οι συναρτήσεις συμμετοχής για τα πέντε πιο σημαντικά χαρακτηριστικά, πριν και μετά την εκπαίδευση.
|
||||
Παρατηρείται ελαφρά προσαρμογή στα πλάτη και στις θέσεις των Gaussian MF, χωρίς παραμόρφωση — κάτι που δείχνει καλή σταθερότητα του μοντέλου.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=\textwidth]{../source/figures_scn2/mfs_best_model.png}
|
||||
\caption{MFs πριν και μετά την εκπαίδευση (βέλτιστο μοντέλο, top–5 χαρακτηριστικά).}
|
||||
\label{fig:scn2-mfs}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Ανάλυση ανά κλάση.}
|
||||
Οι μετρικές ακρίβειας ανά κλάση (Σχήμα~\ref{fig:scn2-pa-ua}) δείχνουν ικανοποιητική απόδοση για τις Κλάσεις~1 και~3, ενώ οι υπόλοιπες παρουσιάζουν χαμηλότερες τιμές PA και UA.
|
||||
Αυτό είναι αναμενόμενο λόγω ανισορροπίας δείγματος και επικαλύψεων στα χαρακτηριστικά.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=.8\textwidth]{../source/figures_scn2/pa_ua_best_model.png}
|
||||
\caption{Producer’s (PA) και User’s (UA) Accuracy ανά κλάση στο test–set.}
|
||||
\label{fig:scn2-pa-ua}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Απόδοση στο test set.}
|
||||
Η σύγκριση πραγματικών και προβλεπόμενων ετικετών (Σχήμα~\ref{fig:scn2-truth}) δείχνει ότι οι περισσότερες προβλέψεις ακολουθούν τη σωστή κλάση, αν και παρατηρείται «θόρυβος» στις μεταβάσεις — ένδειξη περιορισμένης διακριτότητας των clusters.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=.9\textwidth]{../source/figures_scn2/pred_vs_truth_best_model.png}
|
||||
\caption{Truth vs Prediction στο test–set (βέλτιστο μοντέλο).}
|
||||
\label{fig:scn2-truth}
|
||||
\end{figure}
|
||||
|
||||
\paragraph{Βάρη ReliefF.}
|
||||
Τα πέντε επιλεγμένα χαρακτηριστικά είχαν πολύ κοντινά βάρη (Σχήμα~\ref{fig:scn2-relief}), γεγονός που δείχνει ότι η πληροφορία κατανέμεται σχετικά ομοιόμορφα -- δεν υπάρχει δηλαδή ένα «κυρίαρχο» χαρακτηριστικό.
|
||||
|
||||
\begin{figure}[H]\centering
|
||||
\includegraphics[width=.65\textwidth]{../source/figures_scn2/relieff_weights_selected.png}
|
||||
\caption{Βάρη των επιλεγμένων χαρακτηριστικών από το ReliefF (top–5).}
|
||||
\label{fig:scn2-relief}
|
||||
\end{figure}
|
||||
|
||||
\subsection{Συμπεράσματα}
|
||||
Το μειωμένο πείραμα επιβεβαιώνει τη λειτουργικότητα της ροής και των συναρτήσεων.
|
||||
Παρά τον περιορισμένο αριθμό εποχών, το σύστημα κατόρθωσε να επιτύχει σταθερό Kappa γύρω στο $0.23$, με μόλις $6$ κανόνες και πέντε χαρακτηριστικά.
|
||||
Η συμπεριφορά των μετρικών και των MFs δείχνει σωστή προσαρμογή και καλή γενίκευση, ενώ η μείωση του αριθμού κανόνων με αύξηση της ακτίνας επιβεβαιώνει τη θεωρητική αναμενόμενη συμπεριφορά του Subtractive Clustering.
|
||||
|
||||
Σε ένα πλήρες πείραμα (με $100$+ εποχές και εκτεταμένο grid), αναμένεται περαιτέρω βελτίωση τόσο στην τιμή του $\kappa$ όσο και στη σταθερότητα των καμπυλών εκμάθησης.
|
||||
|
||||
|
||||
% =====================================================================
|
||||
\section{Επίλογος}
|
||||
|
||||
Στην παρούσα εργασία υλοποιήσαμε μια ολοκληρωμένη ροή για την ανάπτυξη και αξιολόγηση TSK–fuzzy μοντέλων ταξινόμησης με χρήση \textsc{ANFIS}.
|
||||
Η μεθοδολογία κάλυψε όλα τα στάδια -- από τον διαχωρισμό και την προεπεξεργασία των δεδομένων, μέχρι την επιλογή χαρακτηριστικών, την εκπαίδευση και τη συγκριτική ανάλυση των αποτελεσμάτων.
|
||||
|
||||
Στο Σενάριο~1, επιβεβαιώθηκε η σημασία της ακτίνας συσσωμάτωσης και της στρατηγικής αρχικοποίησης (class–dependent ή independent) ως προς την ισορροπία μεταξύ πολυπλοκότητας και ακρίβειας.
|
||||
Τα αποτελέσματα ήταν σταθερά, με συνεπή συμπεριφορά των MFs και των μετρικών.
|
||||
|
||||
Το πιο απαιτητικό Σενάριο~2, όπου συνδυάστηκε επιλογή χαρακτηριστικών με ReliefF και Sub. Clustering, δυστοιχώς δεν καταφέραμε να το τρέξουμε όπως θέλαμε.
|
||||
Παρόλα αυτά παρατηρήθηκε καλή γενίκευση ακόμη και με περιορισμένο grid και λίγες εποχές.
|
||||
Παρά τους χρονικούς περιορισμούς, η συνολική συμπεριφορά του συστήματος ήταν αξιόπιστη και αναδεικνύει τη δυναμική των TSK–fuzzy μοντέλων για ταξινόμηση υψηλής διάστασης.
|
||||
Η προσέγγιση αυτή προσφέρει μια σταθερή και επεκτάσιμη βάση για μελλοντική έρευνα σε μεγαλύτερης κλίμακας δεδομένα.
|
||||
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
22
Work 4/source/Scenario1.log
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
=== Run 1: mode=class-independent, radius=0.20 ===
|
||||
Overall Accuracy: 68.85%
|
||||
Cohen's Kappa : 0.413
|
||||
Rules: 32 | OA: 68.85% | Kappa: 0.413
|
||||
|
||||
=== Run 2: mode=class-independent, radius=0.80 ===
|
||||
Overall Accuracy: 73.77%
|
||||
Cohen's Kappa : 0.617
|
||||
Rules: 3 | OA: 73.77% | Kappa: 0.617
|
||||
|
||||
=== Run 3: mode=class-dependent, radius=0.20 ===
|
||||
Overall Accuracy: 60.66%
|
||||
Cohen's Kappa : 0.296
|
||||
Rules: 51 | OA: 60.66% | Kappa: 0.296
|
||||
|
||||
=== Run 4: mode=class-dependent, radius=0.80 ===
|
||||
Overall Accuracy: 72.13%
|
||||
Cohen's Kappa : 0.604
|
||||
Rules: 4 | OA: 72.13% | Kappa: 0.604
|
||||
|
||||
Done. Figures saved in: figures_scn1
|
||||
235
Work 4/source/Scenario2.log
Normal file
@ -0,0 +1,235 @@
|
||||
Scenario 2 - Epileptic Seizure Classification
|
||||
================================================
|
||||
|
||||
Configuration loaded: 3 folds, 2 feature options, 2 radius options.
|
||||
Loading dataset from ./Datasets/epileptic_seizure_data.csv ...
|
||||
Dataset loaded: 11500 samples, 178 features, 5 classes.
|
||||
|
||||
Splitting data into train/val/test (60/20/20%)...
|
||||
-> train: 6900 val: 2300 test: 2300
|
||||
Applying z-score normalization...
|
||||
|
||||
GRID SEARCH (features × radius) using 3-fold CV
|
||||
|
||||
[GRID] features= 5, radius=0.50 ...
|
||||
-> Fold 1/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 92
|
||||
Number of linear parameters: 7
|
||||
Number of nonlinear parameters: 70
|
||||
Total number of parameters: 77
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 7
|
||||
Minimal training RMSE = 1.13032
|
||||
Minimal checking RMSE = 1.13236
|
||||
Overall Accuracy: 30.61%
|
||||
Cohen's Kappa : 0.169
|
||||
kappa=0.169 rules=7
|
||||
|
||||
-> Fold 2/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 140
|
||||
Number of linear parameters: 11
|
||||
Number of nonlinear parameters: 110
|
||||
Total number of parameters: 121
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 11
|
||||
Minimal training RMSE = 1.12499
|
||||
Minimal checking RMSE = 1.15105
|
||||
Overall Accuracy: 39.43%
|
||||
Cohen's Kappa : 0.289
|
||||
kappa=0.289 rules=11
|
||||
|
||||
-> Fold 3/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 128
|
||||
Number of linear parameters: 10
|
||||
Number of nonlinear parameters: 100
|
||||
Total number of parameters: 110
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 10
|
||||
Minimal training RMSE = 1.13166
|
||||
Minimal checking RMSE = 1.12551
|
||||
Overall Accuracy: 34.91%
|
||||
Cohen's Kappa : 0.225
|
||||
kappa=0.225 rules=10
|
||||
|
||||
-> mean Kappa=0.227 mean rules=9
|
||||
|
||||
[GRID] features= 5, radius=0.75 ...
|
||||
-> Fold 1/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 68
|
||||
Number of linear parameters: 5
|
||||
Number of nonlinear parameters: 50
|
||||
Total number of parameters: 55
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 5
|
||||
Minimal training RMSE = 1.12389
|
||||
Minimal checking RMSE = 1.12597
|
||||
Overall Accuracy: 33.17%
|
||||
Cohen's Kappa : 0.210
|
||||
kappa=0.210 rules=5
|
||||
|
||||
-> Fold 2/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 68
|
||||
Number of linear parameters: 5
|
||||
Number of nonlinear parameters: 50
|
||||
Total number of parameters: 55
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 5
|
||||
Minimal training RMSE = 1.15261
|
||||
Minimal checking RMSE = 1.16568
|
||||
Overall Accuracy: 34.30%
|
||||
Cohen's Kappa : 0.240
|
||||
kappa=0.240 rules=5
|
||||
|
||||
-> Fold 3/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 68
|
||||
Number of linear parameters: 5
|
||||
Number of nonlinear parameters: 50
|
||||
Total number of parameters: 55
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 5
|
||||
Minimal training RMSE = 1.14349
|
||||
Minimal checking RMSE = 1.13975
|
||||
Overall Accuracy: 34.65%
|
||||
Cohen's Kappa : 0.226
|
||||
kappa=0.226 rules=5
|
||||
|
||||
-> mean Kappa=0.225 mean rules=5
|
||||
|
||||
[GRID] features= 8, radius=0.50 ...
|
||||
-> Fold 1/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 209
|
||||
Number of linear parameters: 11
|
||||
Number of nonlinear parameters: 176
|
||||
Total number of parameters: 187
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 11
|
||||
Minimal training RMSE = 1.11568
|
||||
Minimal checking RMSE = 1.12592
|
||||
Overall Accuracy: 35.26%
|
||||
Cohen's Kappa : 0.224
|
||||
kappa=0.224 rules=11
|
||||
|
||||
-> Fold 2/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 209
|
||||
Number of linear parameters: 11
|
||||
Number of nonlinear parameters: 176
|
||||
Total number of parameters: 187
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 11
|
||||
Minimal training RMSE = 1.08705
|
||||
Minimal checking RMSE = 1.11471
|
||||
Overall Accuracy: 35.48%
|
||||
Cohen's Kappa : 0.226
|
||||
kappa=0.226 rules=11
|
||||
|
||||
-> Fold 3/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 209
|
||||
Number of linear parameters: 11
|
||||
Number of nonlinear parameters: 176
|
||||
Total number of parameters: 187
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 11
|
||||
Minimal training RMSE = 1.12397
|
||||
Minimal checking RMSE = 1.12198
|
||||
Overall Accuracy: 35.65%
|
||||
Cohen's Kappa : 0.230
|
||||
kappa=0.230 rules=11
|
||||
|
||||
-> mean Kappa=0.227 mean rules=11
|
||||
|
||||
[GRID] features= 8, radius=0.75 ...
|
||||
-> Fold 1/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 119
|
||||
Number of linear parameters: 6
|
||||
Number of nonlinear parameters: 96
|
||||
Total number of parameters: 102
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 6
|
||||
Minimal training RMSE = 1.12245
|
||||
Minimal checking RMSE = 1.12803
|
||||
Overall Accuracy: 32.70%
|
||||
Cohen's Kappa : 0.196
|
||||
kappa=0.196 rules=6
|
||||
|
||||
-> Fold 2/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 173
|
||||
Number of linear parameters: 9
|
||||
Number of nonlinear parameters: 144
|
||||
Total number of parameters: 153
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 9
|
||||
Minimal training RMSE = 1.10065
|
||||
Minimal checking RMSE = 1.12512
|
||||
Overall Accuracy: 37.09%
|
||||
Cohen's Kappa : 0.251
|
||||
kappa=0.251 rules=9
|
||||
|
||||
-> Fold 3/3 ...
|
||||
ANFIS info:
|
||||
Number of nodes: 101
|
||||
Number of linear parameters: 5
|
||||
Number of nonlinear parameters: 80
|
||||
Total number of parameters: 85
|
||||
Number of training data pairs: 4600
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 5
|
||||
Minimal training RMSE = 1.13562
|
||||
Minimal checking RMSE = 1.13557
|
||||
Overall Accuracy: 33.00%
|
||||
Cohen's Kappa : 0.196
|
||||
kappa=0.196 rules=5
|
||||
|
||||
-> mean Kappa=0.214 mean rules=7
|
||||
|
||||
BEST HYPERPARAMS
|
||||
features=5 radius=0.50 CV Kappa=0.227 mean rules=9
|
||||
|
||||
Training final model on train+val with best params ...
|
||||
|
||||
ANFIS info:
|
||||
Number of nodes: 80
|
||||
Number of linear parameters: 6
|
||||
Number of nonlinear parameters: 60
|
||||
Total number of parameters: 66
|
||||
Number of training data pairs: 9200
|
||||
Number of checking data pairs: 2300
|
||||
Number of fuzzy rules: 6
|
||||
|
||||
Minimal training RMSE = 1.14268
|
||||
Minimal checking RMSE = 1.1463
|
||||
Final training complete: 6 rules.
|
||||
|
||||
Evaluating on TEST set ...
|
||||
Overall Accuracy: 29.78%
|
||||
Cohen's Kappa : 0.158
|
||||
|
||||
[TEST RESULTS]
|
||||
OA = 29.78 %
|
||||
Kappa= 0.158
|
||||
Rules= 6
|
||||
|
||||
Generating figures ...
|
||||
|
||||
Done. Figures saved in: figures_scn2
|
||||
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 44 KiB After Width: | Height: | Size: 44 KiB |
|
Before Width: | Height: | Size: 66 KiB After Width: | Height: | Size: 66 KiB |
|
Before Width: | Height: | Size: 82 KiB After Width: | Height: | Size: 82 KiB |
|
Before Width: | Height: | Size: 68 KiB After Width: | Height: | Size: 68 KiB |
|
Before Width: | Height: | Size: 82 KiB After Width: | Height: | Size: 82 KiB |
|
Before Width: | Height: | Size: 411 KiB After Width: | Height: | Size: 411 KiB |
|
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 146 KiB |
|
Before Width: | Height: | Size: 566 KiB After Width: | Height: | Size: 566 KiB |
|
Before Width: | Height: | Size: 174 KiB After Width: | Height: | Size: 174 KiB |
|
Before Width: | Height: | Size: 52 KiB After Width: | Height: | Size: 52 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |
|
Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 134 KiB |
|
Before Width: | Height: | Size: 122 KiB After Width: | Height: | Size: 122 KiB |
|
Before Width: | Height: | Size: 133 KiB After Width: | Height: | Size: 133 KiB |
|
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
BIN
Work 4/source/figures_scn2/cm_best_model.png
Normal file
|
After Width: | Height: | Size: 57 KiB |
BIN
Work 4/source/figures_scn2/cv_kappa_heatmap.png
Normal file
|
After Width: | Height: | Size: 51 KiB |
BIN
Work 4/source/figures_scn2/cv_rules_heatmap.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
Work 4/source/figures_scn2/learning_best_model.png
Normal file
|
After Width: | Height: | Size: 71 KiB |
BIN
Work 4/source/figures_scn2/mfs_best_model.png
Normal file
|
After Width: | Height: | Size: 206 KiB |
BIN
Work 4/source/figures_scn2/pa_ua_best_model.png
Normal file
|
After Width: | Height: | Size: 55 KiB |
BIN
Work 4/source/figures_scn2/pred_vs_truth_best_model.png
Normal file
|
After Width: | Height: | Size: 446 KiB |
BIN
Work 4/source/figures_scn2/relieff_weights_selected.png
Normal file
|
After Width: | Height: | Size: 39 KiB |
@ -29,7 +29,7 @@ function plot_results1(results, classLabels, cfg)
|
||||
radii(i) = results(i).radius;
|
||||
end
|
||||
|
||||
% -------- Per-model plots --------
|
||||
% Per-model plots
|
||||
for i = 1:nRuns
|
||||
tag = sprintf('run%02d_%s_r%.2f_rules%d', ...
|
||||
i, results(i).mode, results(i).radius, results(i).nRules);
|
||||
@ -102,7 +102,7 @@ function plot_results1(results, classLabels, cfg)
|
||||
end
|
||||
end
|
||||
|
||||
% -------- Across-model summaries --------
|
||||
% Across-model summaries
|
||||
[~, idxSort] = sortrows([double(modes=='class-independent'), radii], [1 2]);
|
||||
OA_s = OA(idxSort);
|
||||
Kap_s = Kap(idxSort);
|
||||
|
||||
142
Work 4/source/plot_results2.m
Normal file
@ -0,0 +1,142 @@
|
||||
function plot_results2(results, cfg, classLabels)
|
||||
% PLOT_RESULTS2 — Scenario 2 plotting suite
|
||||
% Produces and saves:
|
||||
% (A) CV heatmap of Kappa over (#features × radius)
|
||||
% (B) CV heatmap of mean #rules over the same grid
|
||||
% (C) Confusion matrix on TEST for the best model
|
||||
% (D) PA/UA bars on TEST
|
||||
% (E) Learning curves (train/validation error)
|
||||
% (F) MFs before/after for the best model
|
||||
% (G) Truth vs Prediction (TEST)
|
||||
% (H) ReliefF feature weights bar chart
|
||||
%
|
||||
% All PNGs saved under cfg.outDir.
|
||||
|
||||
outDir = cfg.outDir;
|
||||
|
||||
% (A) CV heatmap — Kappa
|
||||
fig = figure('Color','w');
|
||||
imagesc(results.cvScores);
|
||||
set(gca,'XTick',1:numel(results.rGrid),'XTickLabel',compose('%.2f',results.rGrid));
|
||||
set(gca,'YTick',1:numel(results.fGrid),'YTickLabel',string(results.fGrid));
|
||||
xlabel('SC radius r_a'); ylabel('#Features (ReliefF)'); colorbar;
|
||||
title(sprintf('CV mean Kappa (K=%d folds)', cfg.kfold));
|
||||
for i = 1:numel(results.fGrid)
|
||||
for j = 1:numel(results.rGrid)
|
||||
text(j,i,sprintf('%.2f',results.cvScores(i,j)),...
|
||||
'HorizontalAlignment','center','Color','w','FontWeight','bold');
|
||||
end
|
||||
end
|
||||
exportgraphics(fig, fullfile(outDir,'cv_kappa_heatmap.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
% (B) CV heatmap — mean #rules
|
||||
fig = figure('Color','w');
|
||||
imagesc(results.cvRules);
|
||||
set(gca,'XTick',1:numel(results.rGrid),'XTickLabel',compose('%.2f',results.rGrid));
|
||||
set(gca,'YTick',1:numel(results.fGrid),'YTickLabel',string(results.fGrid));
|
||||
xlabel('SC radius r_a'); ylabel('#Features (ReliefF)'); colorbar;
|
||||
title(sprintf('CV mean #Rules (K=%d folds)', cfg.kfold));
|
||||
for i = 1:numel(results.fGrid)
|
||||
for j = 1:numel(results.rGrid)
|
||||
text(j,i,sprintf('%d',results.cvRules(i,j)),...
|
||||
'HorizontalAlignment','center','Color','w','FontWeight','bold');
|
||||
end
|
||||
end
|
||||
exportgraphics(fig, fullfile(outDir,'cv_rules_heatmap.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
% (C) Confusion matrix — TEST
|
||||
fig = figure('Color','w');
|
||||
confusionchart(results.metrics.confMat, string(classLabels), ...
|
||||
'Title', sprintf('Confusion — best model (features=%d, r=%.2f, rules=%d)', ...
|
||||
results.bestF, results.bestR, numel(results.bestFis.rule)));
|
||||
exportgraphics(fig, fullfile(outDir,'cm_best_model.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
% (D) PA/UA bars — TEST
|
||||
fig = figure('Color','w');
|
||||
t = tiledlayout(2,1,'TileSpacing','compact','Padding','compact');
|
||||
nexttile; bar(results.metrics.PA); ylim([0 1]);
|
||||
xticks(1:numel(classLabels)); xticklabels(string(classLabels));
|
||||
ylabel('PA (Recall)'); title('Producer''s Accuracy (TEST)'); grid on;
|
||||
nexttile; bar(results.metrics.UA); ylim([0 1]);
|
||||
xticks(1:numel(classLabels)); xticklabels(string(classLabels));
|
||||
ylabel('UA (Precision)'); title('User''s Accuracy (TEST)'); grid on;
|
||||
exportgraphics(fig, fullfile(outDir,'pa_ua_best_model.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
% (E) Learning curves
|
||||
fig = figure('Color','w');
|
||||
plot(1:numel(results.trError), results.trError, 'LineWidth', 1.2); hold on;
|
||||
if ~isempty(results.vaError)
|
||||
plot(1:numel(results.vaError), results.vaError, '--', 'LineWidth', 1.2);
|
||||
legend('Training Error','Validation Error','Location','best');
|
||||
else
|
||||
legend('Training Error','Location','best');
|
||||
end
|
||||
xlabel('Epoch'); ylabel('Error'); grid on;
|
||||
title(sprintf('Learning Curve — best model (features=%d, r=%.2f)', ...
|
||||
results.bestF, results.bestR));
|
||||
exportgraphics(fig, fullfile(outDir,'learning_best_model.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
% (F) MFs before/after for the best model
|
||||
try
|
||||
plot_mfs_before_after(results.initFis, results.bestFis, ...
|
||||
sprintf('MFs — best model (features=%d, r=%.2f)', results.bestF, results.bestR), ...
|
||||
fullfile(outDir,'mfs_best_model.png'));
|
||||
catch ME
|
||||
warning('MF plot failed: %s', ME.message);
|
||||
end
|
||||
|
||||
% (G) Truth vs Prediction — TEST
|
||||
fig = figure('Color','w');
|
||||
plot(results.ytrue, 'LineWidth', 1.0); hold on;
|
||||
plot(results.yhat, '--', 'LineWidth', 1.0);
|
||||
xlabel('Test sample index'); ylabel('Class label'); grid on;
|
||||
title('Truth vs Prediction (TEST)');
|
||||
legend('Truth','Prediction','Location','best');
|
||||
exportgraphics(fig, fullfile(outDir,'pred_vs_truth_best_model.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
% (H) ReliefF feature weights (on full train+val)
|
||||
fig = figure('Color','w');
|
||||
w = results.reliefW(:);
|
||||
idx = results.selIdx(:);
|
||||
% map back: selected indices first for clarity
|
||||
bar(1:numel(idx), w(idx)); grid on;
|
||||
xlabel('Selected feature index'); ylabel('ReliefF weight');
|
||||
title(sprintf('ReliefF weights (top %d features)', results.bestF));
|
||||
exportgraphics(fig, fullfile(outDir,'relieff_weights_selected.png'), 'Resolution', 200);
|
||||
close(fig);
|
||||
|
||||
|
||||
|
||||
|
||||
% ===================== local helper =====================
|
||||
function plot_mfs_before_after(fisBefore, fisAfter, suptitleStr, outPng)
|
||||
D = numel(fisAfter.Inputs);
|
||||
fig = figure('Color','w','Position',[100 100 1200 420]);
|
||||
t = tiledlayout(2, D, 'TileSpacing','compact','Padding','compact');
|
||||
for d = 1:D
|
||||
nexttile(d); hold on;
|
||||
try
|
||||
[xB, yB] = plotmf(fisBefore, 'input', d);
|
||||
plot(xB, yB, 'LineWidth', 1.0);
|
||||
catch
|
||||
end
|
||||
title(sprintf('Input %d — BEFORE', d));
|
||||
ylim([0 1]); grid on;
|
||||
|
||||
nexttile(D + d); hold on;
|
||||
[xA, yA] = plotmf(fisAfter, 'input', d);
|
||||
plot(xA, yA, 'LineWidth', 1.0);
|
||||
title(sprintf('Input %d — AFTER', d));
|
||||
ylim([0 1]); grid on;
|
||||
end
|
||||
sgtitle(suptitleStr);
|
||||
exportgraphics(fig, outPng, 'Resolution', 200);
|
||||
close(fig);
|
||||
end
|
||||
end
|
||||
@ -1,5 +1,5 @@
|
||||
function [Xn, mu, sigma] = preprocess_data(X, mu, sigma)
|
||||
% PREPROCESS Normalize feature matrix using z-score scaling
|
||||
% PREPROCESS_DATA Normalize feature matrix using z-score scaling
|
||||
%
|
||||
% [Xn, mu, sigma] = preprocess(X)
|
||||
% [Xn, mu, sigma] = preprocess(X, mu, sigma)
|
||||
|
||||
BIN
Work 4/source/results_scn2.mat
Normal file
@ -1,16 +1,24 @@
|
||||
% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman)
|
||||
%% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman)
|
||||
% TSK classification with Subtractive Clustering (SC)
|
||||
% Modes: (A) class-independent SC, (B) class-dependent SC)
|
||||
% Modes: (A) class-independent SC,
|
||||
% (B) class-dependent SC)
|
||||
% Uses: split_data, preprocess_data, evaluate_classification, plot_results1
|
||||
%
|
||||
% Dataset: ./Datasets/haberman.data
|
||||
% Columns: [age, op_year, axillary_nodes, class] with class in {1,2}
|
||||
|
||||
%
|
||||
% Assignment 4 in Fuzzy systems
|
||||
%
|
||||
% author:
|
||||
% Christos Choutouridis ΑΕΜ 8997
|
||||
% cchoutou@ece.auth.gr
|
||||
%
|
||||
close all; clear; clc;
|
||||
|
||||
% ============================ CONFIGURATION ================================
|
||||
% CONFIGURATION
|
||||
% ================================
|
||||
cfg = struct();
|
||||
rng(42, 'twister'); % reproducibility
|
||||
rng(42, 'twister'); % reproducibility
|
||||
|
||||
% Data handling
|
||||
cfg.split = [0.6 0.2 0.2]; % train / val / test (stratified in split_data)
|
||||
@ -34,7 +42,7 @@ cfg.modes = {'class-independent','class-dependent'};
|
||||
cfg.outDir = 'figures_scn1';
|
||||
if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end
|
||||
|
||||
% =============================== DATA =====================================
|
||||
% DATA
|
||||
dataPath = './Datasets/haberman.data';
|
||||
assert(isfile(dataPath), 'Dataset not found at: %s', dataPath);
|
||||
|
||||
@ -49,7 +57,7 @@ Y = Y(:);
|
||||
classLabels = unique(Y);
|
||||
minLabel = min(classLabels); maxLabel = max(classLabels);
|
||||
|
||||
% =========================== SPLIT & PREPROCESS ===========================
|
||||
% SPLIT & PREPROCESS
|
||||
[trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split);
|
||||
|
||||
if cfg.standardize
|
||||
@ -63,7 +71,7 @@ end
|
||||
% For manual sugfis construction
|
||||
inRanges = [min(trainX,[],1); max(trainX,[],1)];
|
||||
|
||||
% ============================== TRAINING ==================================
|
||||
% TRAINING
|
||||
results = []; runId = 0;
|
||||
|
||||
for m = 1:numel(cfg.modes)
|
||||
@ -74,24 +82,23 @@ for m = 1:numel(cfg.modes)
|
||||
runId = runId + 1;
|
||||
fprintf('\n=== Run %d: mode=%s, radius=%.2f ===\n', runId, modeName, radius);
|
||||
|
||||
% ----- Initial FIS -----
|
||||
% Initial FIS
|
||||
switch modeName
|
||||
case 'class-independent'
|
||||
% Use new-style API like your colleague
|
||||
opt = genfisOptions('SubtractiveClustering', ...
|
||||
'ClusterInfluenceRange', radius);
|
||||
initFis = genfis(trainX, double(trainY), opt);
|
||||
% genfis(Subtractive) already builds Sugeno with constant consequents.
|
||||
|
||||
case 'class-dependent'
|
||||
% Our custom builder (fixes colleague's bug: feed only features to subclust)
|
||||
% Our custom builder
|
||||
initFis = build_classdep_fis(trainX, trainY, classLabels, radius, inRanges);
|
||||
|
||||
otherwise
|
||||
error('Unknown mode: %s', modeName);
|
||||
end
|
||||
|
||||
% ----- ANFIS training -----
|
||||
% ANFIS training
|
||||
trData = [trainX double(trainY)];
|
||||
ckData = [valX double(valY)];
|
||||
anfisOpts = [cfg.maxEpochs cfg.errorGoal cfg.initialStep cfg.stepDecrease cfg.stepIncrease];
|
||||
@ -102,7 +109,7 @@ for m = 1:numel(cfg.modes)
|
||||
[fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [0 0 0 0], ckData);
|
||||
end
|
||||
|
||||
% ----- Evaluate on test set -----
|
||||
% Evaluate on test set
|
||||
yhat_cont = evalfis(testX, fisTrained);
|
||||
yhat = round(yhat_cont);
|
||||
% clip into valid label range (important for small rulebases)
|
||||
@ -133,22 +140,23 @@ for m = 1:numel(cfg.modes)
|
||||
end
|
||||
end
|
||||
|
||||
% ============================== PLOTTING ==================================
|
||||
% PLOTTING
|
||||
plot_results1(results, classLabels, cfg);
|
||||
|
||||
% =============================== SAVE ALL =================================
|
||||
% SAVE ALL
|
||||
save('results_scn1.mat','results','cfg','classLabels','mu','sigma', ...
|
||||
'trainX','valX','testX','trainY','valY','testY');
|
||||
|
||||
fprintf('\nDone. Figures saved in: %s\n', cfg.outDir);
|
||||
|
||||
% ============================ LOCAL FUNCTIONS =============================
|
||||
% LOCAL FUNCTIONS
|
||||
% ===================================================
|
||||
function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges)
|
||||
% BUILD_CLASSDEP_FIS — class-dependent SC for Sugeno FIS (ANFIS-ready)
|
||||
% Creates ONE constant output MF PER RULE (required by ANFIS).
|
||||
% Runs SUBCLUST on FEATURES ONLY for each class.
|
||||
% Creates ONE constant output MF PER RULE (required by ANFIS).
|
||||
% Runs SUBCLUST on FEATURES ONLY for each class.
|
||||
|
||||
D = size(X,2);
|
||||
D = size(X, 2);
|
||||
fis = sugfis('Name','TSK_ClassDependent');
|
||||
|
||||
% Inputs with ranges from training data
|
||||
@ -172,7 +180,7 @@ function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges)
|
||||
[centers, sigmas] = subclust(Xi, radius);
|
||||
nCl = size(centers,1);
|
||||
|
||||
% ---- robust sigma broadcasting to M×D ----
|
||||
% robust sigma broadcasting to M×D
|
||||
if isscalar(sigmas)
|
||||
S = repmat(sigmas, nCl, D);
|
||||
elseif size(sigmas,1) == 1 && size(sigmas,2) == D
|
||||
@ -192,7 +200,7 @@ function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges)
|
||||
% Add input MFs for this cluster (and remember their indices)
|
||||
for d = 1:D
|
||||
mfName = sprintf('c%d_r%d_x%d', c, i, d);
|
||||
params = [S(i,d) centers(i,d)]; % [sigma center]
|
||||
params = [S(i, d) centers(i, d)]; % [sigma center]
|
||||
fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName);
|
||||
antecedentIdx(d) = numel(fis.Inputs(d).MembershipFunctions);
|
||||
end
|
||||
|
||||
297
Work 4/source/scenario2.m
Normal file
@ -0,0 +1,297 @@
|
||||
%% scenario2.m — Assignment 4 (Classification), Scenario 2 (Epileptic Seizure)
|
||||
% TSK classification on a high-dimensional dataset with feature selection.
|
||||
% Verbose version with progress printing.
|
||||
%
|
||||
% Uses: split_data, preprocess_data, evaluate_classification, plot_results2
|
||||
% Dataset path: ./Datasets/epileptic_seizure_data.csv
|
||||
%
|
||||
% Assignment 4 in Fuzzy systems
|
||||
%
|
||||
% author:
|
||||
% Christos Choutouridis ΑΕΜ 8997
|
||||
% cchoutou@ece.auth.gr
|
||||
|
||||
close all; clear; clc;
|
||||
|
||||
%parpool('threads',6);
|
||||
|
||||
fprintf('\nScenario 2 - Epileptic Seizure Classification');
|
||||
fprintf('\n================================================\n\n');
|
||||
|
||||
% CONFIGURATION
|
||||
cfg = struct();
|
||||
rng(42,'twister'); % reproducibility
|
||||
|
||||
% Data handling
|
||||
cfg.split = [0.6 0.2 0.2];
|
||||
cfg.standardize = true;
|
||||
|
||||
% Feature selection + SC hyper-params
|
||||
|
||||
% Debug configuration
|
||||
cfg.feature_grid = [5 8]; %[5 8 11 15];
|
||||
cfg.radii_grid = [0.5 0.75]; %[0.25 0.50 0.75 1.00];
|
||||
cfg.kfold = 3;
|
||||
cfg.maxEpochs = 20; % ANFIS options
|
||||
cfg.displayANFIS = 0;
|
||||
% Default configuraion
|
||||
%cfg.feature_grid = [5 8 11 15];
|
||||
%cfg.radii_grid = [0.25 0.50 0.75 1.00];
|
||||
%cfg.kfold = 5;
|
||||
%cfg.maxEpochs = 100; % ANFIS options
|
||||
%cfg.displayANFIS = 0;
|
||||
|
||||
% Output directory
|
||||
cfg.outDir = 'figures_scn2';
|
||||
if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end
|
||||
|
||||
fprintf('Configuration loaded: %d folds, %d feature options, %d radius options.\n', ...
|
||||
cfg.kfold, numel(cfg.feature_grid), numel(cfg.radii_grid));
|
||||
|
||||
% DATA
|
||||
dataPath = './Datasets/epileptic_seizure_data.csv';
|
||||
fprintf('Loading dataset from %s ...\n', dataPath);
|
||||
assert(isfile(dataPath), 'Dataset not found!');
|
||||
|
||||
raw = importdata(dataPath);
|
||||
if isstruct(raw) && isfield(raw,'data')
|
||||
A = raw.data;
|
||||
else
|
||||
A = readmatrix(dataPath);
|
||||
end
|
||||
X = A(:,1:end-1);
|
||||
Y = A(:,end);
|
||||
Y = double(Y(:));
|
||||
classLabels = unique(Y);
|
||||
num_classes = numel(classLabels);
|
||||
fprintf('Dataset loaded: %d samples, %d features, %d classes.\n', ...
|
||||
size(X,1), size(X,2), num_classes);
|
||||
|
||||
% SPLIT & PREPROCESS
|
||||
fprintf('\nSplitting data into train/val/test (%.0f/%.0f/%.0f%%)...\n', cfg.split*100);
|
||||
[trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split);
|
||||
fprintf('-> train: %d val: %d test: %d\n', size(trainX,1), size(valX,1), size(testX,1));
|
||||
|
||||
if cfg.standardize
|
||||
fprintf('Applying z-score normalization...\n');
|
||||
[trainX, mu, sigma] = preprocess_data(trainX);
|
||||
valX = preprocess_data(valX, mu, sigma);
|
||||
testX = preprocess_data(testX, mu, sigma);
|
||||
else
|
||||
mu = []; sigma = [];
|
||||
end
|
||||
|
||||
fullTrainX = [trainX; valX];
|
||||
fullTrainY = [trainY; valY];
|
||||
|
||||
% GRID SEARCH
|
||||
fprintf('\nGRID SEARCH (features × radius) using %d-fold CV\n', cfg.kfold);
|
||||
|
||||
cvp = cvpartition(trainY, 'KFold', cfg.kfold, 'Stratify', true);
|
||||
nF = numel(cfg.feature_grid);
|
||||
nR = numel(cfg.radii_grid);
|
||||
cvScores = zeros(nF, nR);
|
||||
cvRules = zeros(nF, nR);
|
||||
|
||||
for fi = 1:nF
|
||||
featKeep = cfg.feature_grid(fi);
|
||||
for ri = 1:nR
|
||||
radius = cfg.radii_grid(ri);
|
||||
fprintf('\n[GRID] features=%2d, radius=%.2f ... ', featKeep, radius);
|
||||
|
||||
kappas = zeros(cvp.NumTestSets,1);
|
||||
rulesK = zeros(cvp.NumTestSets,1);
|
||||
|
||||
for k = 1:cvp.NumTestSets
|
||||
fprintf('\n-> Fold %d/%d ... ', k, cfg.kfold);
|
||||
|
||||
trIdx = training(cvp, k);
|
||||
teIdx = test(cvp, k);
|
||||
Xtr = trainX(trIdx,:); Ytr = trainY(trIdx);
|
||||
Xva = trainX(teIdx,:); Yva = trainY(teIdx);
|
||||
|
||||
% Relief feature selection
|
||||
[idxFeat, ~] = relief_select(Xtr, Ytr);
|
||||
sel = idxFeat(1:min(featKeep, numel(idxFeat)));
|
||||
Xtr = Xtr(:, sel);
|
||||
Xva = Xva(:, sel);
|
||||
|
||||
% Build FIS
|
||||
inRanges = [min(Xtr,[],1); max(Xtr,[],1)];
|
||||
initFis = build_classdep_fis(Xtr, Ytr, classLabels, radius, inRanges);
|
||||
|
||||
% Train
|
||||
trData = [Xtr double(Ytr)];
|
||||
vaData = [Xva double(Yva)];
|
||||
anfisOpts = anfisOptions('InitialFis', initFis, ...
|
||||
'EpochNumber', cfg.maxEpochs, ...
|
||||
'ValidationData', vaData, ...
|
||||
'OptimizationMethod', 1, ...
|
||||
'DisplayErrorValues', 0, ...
|
||||
'DisplayStepSize', 0);
|
||||
[~, ~, ~, bestFis, ~] = anfis(trData, anfisOpts);
|
||||
|
||||
% Evaluate fold
|
||||
yhat = evalfis(bestFis, Xva);
|
||||
yhat = round(yhat);
|
||||
yhat(yhat < min(classLabels)) = min(classLabels);
|
||||
yhat(yhat > max(classLabels)) = max(classLabels);
|
||||
|
||||
R = evaluate_classification(Yva, yhat, classLabels);
|
||||
kappas(k) = R.Kappa;
|
||||
rulesK(k) = numel(bestFis.rule);
|
||||
fprintf('kappa=%.3f rules=%d\n', R.Kappa, rulesK(k));
|
||||
end
|
||||
|
||||
cvScores(fi,ri) = mean(kappas);
|
||||
cvRules(fi,ri) = round(mean(rulesK));
|
||||
fprintf('\n-> mean Kappa=%.3f mean rules=%d\n', cvScores(fi,ri), cvRules(fi,ri));
|
||||
end
|
||||
end
|
||||
|
||||
[maxPerRow, idxR] = max(cvScores, [], 2);
|
||||
[bestKappa, idxF] = max(maxPerRow);
|
||||
idxR = idxR(idxF);
|
||||
bestFeatures = cfg.feature_grid(idxF);
|
||||
bestRadius = cfg.radii_grid(idxR);
|
||||
bestRulesEst = cvRules(idxF, idxR);
|
||||
|
||||
fprintf('\nBEST HYPERPARAMS\nfeatures=%d radius=%.2f CV Kappa=%.3f mean rules=%d\n', ...
|
||||
bestFeatures, bestRadius, bestKappa, bestRulesEst);
|
||||
|
||||
% FINAL TRAIN
|
||||
fprintf('\nTraining final model on train+val with best params ...\n');
|
||||
|
||||
[idxAll, weightsAll] = relief_select(fullTrainX, fullTrainY);
|
||||
sel = idxAll(1:min(bestFeatures, numel(idxAll)));
|
||||
Xtr = fullTrainX(:, sel);
|
||||
Xte = testX(:, sel);
|
||||
|
||||
inRanges = [min(Xtr,[],1); max(Xtr,[],1)];
|
||||
initFis = build_classdep_fis(Xtr, fullTrainY, classLabels, bestRadius, inRanges);
|
||||
trData = [Xtr double(fullTrainY)];
|
||||
teData = [Xte double(testY)];
|
||||
|
||||
anfisOpts = anfisOptions('InitialFis', initFis, ...
|
||||
'EpochNumber', cfg.maxEpochs, ...
|
||||
'ValidationData', teData, ...
|
||||
'OptimizationMethod', 1, ...
|
||||
'DisplayErrorValues', 0, ...
|
||||
'DisplayStepSize', 0);
|
||||
[fisTrained, trError, ~, bestFis, vaError] = anfis(trData, anfisOpts);
|
||||
fprintf('Final training complete: %d rules.\n', numel(bestFis.rule));
|
||||
|
||||
% TEST EVAL
|
||||
fprintf('\nEvaluating on TEST set ...\n');
|
||||
yhat_test = evalfis(bestFis, Xte);
|
||||
yhat_test = round(yhat_test);
|
||||
yhat_test(yhat_test < min(classLabels)) = min(classLabels);
|
||||
yhat_test(yhat_test > max(classLabels)) = max(classLabels);
|
||||
|
||||
Rtest = evaluate_classification(testY, yhat_test, classLabels);
|
||||
fprintf('\n[TEST RESULTS]\n');
|
||||
fprintf(' OA = %.2f %%\n', 100*Rtest.OA);
|
||||
fprintf(' Kappa= %.3f\n', Rtest.Kappa);
|
||||
fprintf(' Rules= %d\n', numel(bestFis.rule));
|
||||
|
||||
% PLOTTING
|
||||
fprintf('\nGenerating figures ...\n');
|
||||
results = struct();
|
||||
results.cvScores = cvScores;
|
||||
results.cvRules = cvRules;
|
||||
results.fGrid = cfg.feature_grid;
|
||||
results.rGrid = cfg.radii_grid;
|
||||
results.bestF = numel(sel);
|
||||
results.bestR = bestRadius;
|
||||
results.bestFis = bestFis;
|
||||
results.initFis = initFis;
|
||||
results.trError = trError;
|
||||
results.vaError = vaError;
|
||||
results.ytrue = testY;
|
||||
results.yhat = yhat_test;
|
||||
results.metrics = Rtest;
|
||||
results.selIdx = sel;
|
||||
results.reliefW = weightsAll;
|
||||
|
||||
plot_results2(results, cfg, classLabels);
|
||||
|
||||
save('results_scn2.mat','results','cfg','classLabels','mu','sigma');
|
||||
fprintf('\nDone. Figures saved in: %s\n', cfg.outDir);
|
||||
|
||||
|
||||
|
||||
|
||||
% LOCAL FUNCTIONS
|
||||
% ==================================================
|
||||
function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges)
|
||||
% BUILD_CLASSDEP_FIS — class-dependent SC Sugeno FIS (ANFIS-ready)
|
||||
% Creates ONE constant output MF PER RULE (ANFIS requirement).
|
||||
% Runs subclust on FEATURES ONLY per class.
|
||||
D = size(X,2);
|
||||
fis = sugfis('Name','TSK_CD');
|
||||
|
||||
% Inputs
|
||||
for d = 1:D
|
||||
fis = addInput(fis, [inRanges(1,d) inRanges(2,d)], 'Name', sprintf('x%d', d));
|
||||
end
|
||||
% Output (range spans label space)
|
||||
outRange = [min(classLabels) max(classLabels)];
|
||||
fis = addOutput(fis, outRange, 'Name', 'y');
|
||||
|
||||
ruleList = [];
|
||||
for k = 1:numel(classLabels)
|
||||
c = classLabels(k);
|
||||
Xi = X(Y==c, :);
|
||||
if isempty(Xi), continue; end
|
||||
|
||||
[centers, sigmas] = subclust(Xi, radius);
|
||||
nCl = size(centers,1);
|
||||
% Robust sigma broadcasting to M×D
|
||||
if isscalar(sigmas)
|
||||
S = repmat(sigmas, nCl, D);
|
||||
elseif size(sigmas,1)==1 && size(sigmas,2)==D
|
||||
S = repmat(sigmas, nCl, 1);
|
||||
elseif all(size(sigmas)==[nCl D])
|
||||
S = sigmas;
|
||||
else
|
||||
S = repmat(0.5*(inRanges(2,:)-inRanges(1,:)), nCl, 1);
|
||||
end
|
||||
|
||||
for i = 1:nCl
|
||||
antIdx = zeros(1,D);
|
||||
for d = 1:D
|
||||
mfName = sprintf('c%d_r%d_x%d', c, i, d);
|
||||
params = [S(i,d) centers(i,d)]; % [sigma center]
|
||||
fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName);
|
||||
antIdx(d) = numel(fis.Inputs(d).MembershipFunctions);
|
||||
end
|
||||
% ONE constant output MF per rule
|
||||
outName = sprintf('const_c%d_r%d', c, i);
|
||||
fis = addMF(fis, 'y', 'constant', double(c), 'Name', outName);
|
||||
outIdx = numel(fis.Outputs(1).MembershipFunctions);
|
||||
ruleList = [ruleList; [antIdx, outIdx, 1, 1]]; %#ok<AGROW>
|
||||
end
|
||||
end
|
||||
|
||||
if ~isempty(ruleList)
|
||||
fis = addRule(fis, ruleList);
|
||||
end
|
||||
|
||||
% Standard TSK ops
|
||||
fis.AndMethod = 'prod';
|
||||
fis.OrMethod = 'probor';
|
||||
fis.ImplicationMethod = 'prod';
|
||||
fis.AggregationMethod = 'sum';
|
||||
fis.DefuzzificationMethod = 'wtaver';
|
||||
end
|
||||
|
||||
function [idx, w] = relief_select(X, y)
|
||||
% RELIEF_SELECT — wraps relieff and returns ranked indices + weights.
|
||||
try
|
||||
[idx, w] = relieff(X, y, 10); % k=10 neighbors
|
||||
catch
|
||||
% Fallback: simple variance ranking if Statistics Toolbox missing
|
||||
w = var(X, 0, 1);
|
||||
[~, idx] = sort(w, 'descend');
|
||||
end
|
||||
end
|
||||
@ -1,5 +1,5 @@
|
||||
function [trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, ratios)
|
||||
% SPLIT Split dataset into train/validation/test sets (stratified)
|
||||
% SPLIT_DATA Split dataset into train/validation/test sets (stratified)
|
||||
%
|
||||
% [trainX, valX, testX, trainY, valY, testY] = split(X, Y, ratios)
|
||||
%
|
||||
|
||||