diff --git a/Work 1/FuzzySystems_HW1_Choutouridis_8997.zip b/Work 1/FuzzySystems_HW1_Choutouridis_8997.zip new file mode 100644 index 0000000..6a4017e Binary files /dev/null and b/Work 1/FuzzySystems_HW1_Choutouridis_8997.zip differ diff --git a/Work 2/FuzzySystems_HW2_Choutouridis_8997.zip b/Work 2/FuzzySystems_HW2_Choutouridis_8997.zip new file mode 100644 index 0000000..62b0944 Binary files /dev/null and b/Work 2/FuzzySystems_HW2_Choutouridis_8997.zip differ diff --git a/Work 3/FuzzySystems_HW3_Choutouridis_8997.zip b/Work 3/FuzzySystems_HW3_Choutouridis_8997.zip new file mode 100644 index 0000000..0e26287 Binary files /dev/null and b/Work 3/FuzzySystems_HW3_Choutouridis_8997.zip differ diff --git a/Work 4/report/Work4_report.pdf b/Work 4/report/Work4_report.pdf index 0da0e09..9715577 100644 Binary files a/Work 4/report/Work4_report.pdf and b/Work 4/report/Work4_report.pdf differ diff --git a/Work 4/report/Work4_report.tex b/Work 4/report/Work4_report.tex index 50cda9d..b197fb2 100644 --- a/Work 4/report/Work4_report.tex +++ b/Work 4/report/Work4_report.tex @@ -254,7 +254,149 @@ x_i' = \frac{x_i - \mu_i}{\sigma_i} \end{itemize} + + + % ===================================================================== -\subsection{Επίλογος} -Αναπτύξαμε +\section{Σενάριο 2 — Dataset με υψηλή διαστασιμότητα (Epileptic Seizure Recognition)} +\label{sec:scenario2} + +\subsection{Περιγραφή και ζητούμενο} +Το δεύτερο σενάριο στοχεύει στην ταξινόμηση εγκεφαλικών σημάτων \textit{EEG} σε πέντε κατηγορίες, στο πλαίσιο του προβλήματος \emph{Epileptic Seizure Recognition}. +Σε αντίθεση με το απλούστερο πρόβλημα του Σενάριου~1, εδώ οι διαστάσεις των δεδομένων είναι υψηλές και οι κλάσεις πολυπληθείς, καθιστώντας το έργο αξιολόγησης σημαντικά πιο απαιτητικό. + +Ο στόχος είναι η ανάπτυξη ενός TSK–τύπου μοντέλου που συνδυάζει: +\begin{itemize} + \item επιλογή χαρακτηριστικών με \textbf{ReliefF}, + \item αρχικοποίηση κανόνων μέσω \textbf{Subtractive Clustering (SC)}, + \item και εκπαίδευση με \textbf{υβριδική μέθοδο} (\emph{gradient descent} + \emph{least squares}). +\end{itemize} +Η διαδικασία επαναλαμβάνεται για διάφορους συνδυασμούς ακτίνας συσσωμάτωσης $r_a$ και αριθμού χαρακτηριστικών που κρατούνται μετά το ReliefF, με στόχο τον εντοπισμό του συνδυασμού που προσφέρει τον βέλτιστο συμβιβασμό μεταξύ ακρίβειας και πολυπλοκότητας (\#Rules). + +\subsection{Προσέγγιση και μεθοδολογία} +Για την επιλογή των υπερπαραμέτρων χρησιμοποιήθηκε \textbf{k–fold cross–validation}, ενώ η συνολική διαδικασία αυτοματοποιήθηκε πλήρως μέσα από το script \texttt{scenario2.m}. +Η ροή έχει ως εξής: +\begin{enumerate} + \item Διαβάζονται τα δεδομένα από το αρχείο \texttt{epileptic\_seizure\_data.csv} και κατανέμονται σε train-validation-test σε ποσοστό $60$–$20$–$20$. + \item Για κάθε συνδυασμό τιμών \texttt{feature\_grid} και \texttt{radii\_grid}, εκτελείται η διαδικασία k-fold CV. + Σε κάθε fold εφαρμόζεται το ReliefF ώστε να επιλεγούν τα πιο σημαντικά χαρακτηριστικά, και στη συνέχεια πραγματοποιείται Subtractive Clustering ανά κλάση (class–dependent). + \item Κατασκευάζεται το αρχικό FIS με μία Gaussian MF ανά cluster και constant εξόδους (ένας MF ανά κανόνα). + \item Η εκπαίδευση γίνεται όλες τις εποχές μέσω \texttt{anfis()}, με validation set για έλεγχο γενίκευσης. + \item Μετά από κάθε fold υπολογίζεται ο Cohen’s $\kappa$ και ο αριθμός κανόνων, ώστε να εξαχθεί ο μέσος όρος ανά συνδυασμό παραμέτρων. +\end{enumerate} + + + +\paragraph{Περιορισμοί εκτέλεσης.} +Δυστυχώς, μέχρι της παρούσης, δεν καταφέραμε να τρέξουμε το σενάριο για το μέγιστο αριθμό εποχών ($100$) και με το πλήρες grid σε κανέναν προσωπικό μας υπολογιστή. +Παρόλα αυτά, παραθέτουμε τα αποτελέσματα που λάβαμε από ένα μικρότερο πείραμα όπου τρέξαμε για: +\begin{verbatim} + cfg.feature_grid = [5 8]; % instead of [5 8 11 15] + cfg.radii_grid = [0.5 0.75]; % instead of [0.25 0.50 0.75 1.00] + cfg.kfold = 3; % instead of 5 + cfg.maxEpochs = 20; % instead of 100 +\end{verbatim} +Από το grid–search προέκυψε ως \textbf{βέλτιστο μοντέλο} το: +\[ +\text{features}=5, \quad r_a=0.50, \quad \text{rules}=6, \quad \kappa=0.23 +\] +που προσφέρει ικανοποιητική ισορροπία μεταξύ ακρίβειας και απλότητας. + +\subsection{Αποτελέσματα πειράματος} + +\paragraph{Αναζήτηση υπερπαραμέτρων (Grid Search).} +Στο Σχήμα~\ref{fig:scn2-grid} φαίνονται τα αποτελέσματα του grid–search. +Η μέση τιμή του συντελεστή $\kappa$ παραμένει κοντά στο $0.22$–$0.23$ για όλους τους συνδυασμούς, δείχνοντας ότι το μοντέλο είναι σχετικά σταθερό. +Ο αριθμός κανόνων μειώνεται αισθητά με αύξηση του $r_a$, όπως αναμενόταν. + +\begin{figure}[H]\centering + \includegraphics[width=.49\textwidth]{../source/figures_scn2/cv_kappa_heatmap.png} + \includegraphics[width=.49\textwidth]{../source/figures_scn2/cv_rules_heatmap.png} + \caption{Grid search: μέση τιμή του Cohen’s $\kappa$ (αριστερά) + και μέσος αριθμός κανόνων (δεξιά) για κάθε συνδυασμό παραμέτρων.} + \label{fig:scn2-grid} +\end{figure} + +\paragraph{Απόδοση βέλτιστου μοντέλου.} +Η μήτρα σύγχυσης του βέλτιστου μοντέλου φαίνεται στο Σχήμα~\ref{fig:scn2-cm}. +Παρατηρείται ότι οι κλάσεις 3 και 4 αναγνωρίζονται με σχετικά υψηλή ακρίβεια, ενώ η Κλάση~1 συγκεντρώνει τις περισσότερες λανθασμένες προβλέψεις. +Η συνολική ακρίβεια είναι μέτρια, αλλά ικανοποιητική για το συγκεκριμένο dataset. + +\begin{figure}[H]\centering + \includegraphics[width=.7\textwidth]{../source/figures_scn2/cm_best_model.png} + \caption{Μήτρα σύγχυσης για το βέλτιστο μοντέλο (features$=5$, $r_a=0.50$, rules$=6$).} + \label{fig:scn2-cm} +\end{figure} + +\paragraph{Καμπύλη εκμάθησης.} +Η καμπύλη εκμάθησης (Σχήμα~\ref{fig:scn2-learning}) παρουσιάζει σταδιακή μείωση του σφάλματος σε train και validation set, χωρίς σημαντική απόκλιση, γεγονός που δείχνει ότι το μοντέλο γενικεύει καλά και δεν υπερ-προσαρμόζεται. + +\begin{figure}[H]\centering + \includegraphics[width=.65\textwidth]{../source/figures_scn2/learning_best_model.png} + \caption{Καμπύλες εκμάθησης (training \& validation error) για το βέλτιστο μοντέλο.} + \label{fig:scn2-learning} +\end{figure} + +\paragraph{Συναρτήσεις συμμετοχής.} +Στο Σχήμα~\ref{fig:scn2-mfs} απεικονίζονται οι συναρτήσεις συμμετοχής για τα πέντε πιο σημαντικά χαρακτηριστικά, πριν και μετά την εκπαίδευση. +Παρατηρείται ελαφρά προσαρμογή στα πλάτη και στις θέσεις των Gaussian MF, χωρίς παραμόρφωση — κάτι που δείχνει καλή σταθερότητα του μοντέλου. + +\begin{figure}[H]\centering + \includegraphics[width=\textwidth]{../source/figures_scn2/mfs_best_model.png} + \caption{MFs πριν και μετά την εκπαίδευση (βέλτιστο μοντέλο, top–5 χαρακτηριστικά).} + \label{fig:scn2-mfs} +\end{figure} + +\paragraph{Ανάλυση ανά κλάση.} +Οι μετρικές ακρίβειας ανά κλάση (Σχήμα~\ref{fig:scn2-pa-ua}) δείχνουν ικανοποιητική απόδοση για τις Κλάσεις~1 και~3, ενώ οι υπόλοιπες παρουσιάζουν χαμηλότερες τιμές PA και UA. +Αυτό είναι αναμενόμενο λόγω ανισορροπίας δείγματος και επικαλύψεων στα χαρακτηριστικά. + +\begin{figure}[H]\centering + \includegraphics[width=.8\textwidth]{../source/figures_scn2/pa_ua_best_model.png} + \caption{Producer’s (PA) και User’s (UA) Accuracy ανά κλάση στο test–set.} + \label{fig:scn2-pa-ua} +\end{figure} + +\paragraph{Απόδοση στο test set.} +Η σύγκριση πραγματικών και προβλεπόμενων ετικετών (Σχήμα~\ref{fig:scn2-truth}) δείχνει ότι οι περισσότερες προβλέψεις ακολουθούν τη σωστή κλάση, αν και παρατηρείται «θόρυβος» στις μεταβάσεις — ένδειξη περιορισμένης διακριτότητας των clusters. + +\begin{figure}[H]\centering + \includegraphics[width=.9\textwidth]{../source/figures_scn2/pred_vs_truth_best_model.png} + \caption{Truth vs Prediction στο test–set (βέλτιστο μοντέλο).} + \label{fig:scn2-truth} +\end{figure} + +\paragraph{Βάρη ReliefF.} +Τα πέντε επιλεγμένα χαρακτηριστικά είχαν πολύ κοντινά βάρη (Σχήμα~\ref{fig:scn2-relief}), γεγονός που δείχνει ότι η πληροφορία κατανέμεται σχετικά ομοιόμορφα -- δεν υπάρχει δηλαδή ένα «κυρίαρχο» χαρακτηριστικό. + +\begin{figure}[H]\centering + \includegraphics[width=.65\textwidth]{../source/figures_scn2/relieff_weights_selected.png} + \caption{Βάρη των επιλεγμένων χαρακτηριστικών από το ReliefF (top–5).} + \label{fig:scn2-relief} +\end{figure} + +\subsection{Συμπεράσματα} +Το μειωμένο πείραμα επιβεβαιώνει τη λειτουργικότητα της ροής και των συναρτήσεων. +Παρά τον περιορισμένο αριθμό εποχών, το σύστημα κατόρθωσε να επιτύχει σταθερό Kappa γύρω στο $0.23$, με μόλις $6$ κανόνες και πέντε χαρακτηριστικά. +Η συμπεριφορά των μετρικών και των MFs δείχνει σωστή προσαρμογή και καλή γενίκευση, ενώ η μείωση του αριθμού κανόνων με αύξηση της ακτίνας επιβεβαιώνει τη θεωρητική αναμενόμενη συμπεριφορά του Subtractive Clustering. + +Σε ένα πλήρες πείραμα (με $100$+ εποχές και εκτεταμένο grid), αναμένεται περαιτέρω βελτίωση τόσο στην τιμή του $\kappa$ όσο και στη σταθερότητα των καμπυλών εκμάθησης. + + +% ===================================================================== +\section{Επίλογος} + +Στην παρούσα εργασία υλοποιήσαμε μια ολοκληρωμένη ροή για την ανάπτυξη και αξιολόγηση TSK–fuzzy μοντέλων ταξινόμησης με χρήση \textsc{ANFIS}. +Η μεθοδολογία κάλυψε όλα τα στάδια -- από τον διαχωρισμό και την προεπεξεργασία των δεδομένων, μέχρι την επιλογή χαρακτηριστικών, την εκπαίδευση και τη συγκριτική ανάλυση των αποτελεσμάτων. + +Στο Σενάριο~1, επιβεβαιώθηκε η σημασία της ακτίνας συσσωμάτωσης και της στρατηγικής αρχικοποίησης (class–dependent ή independent) ως προς την ισορροπία μεταξύ πολυπλοκότητας και ακρίβειας. +Τα αποτελέσματα ήταν σταθερά, με συνεπή συμπεριφορά των MFs και των μετρικών. + +Το πιο απαιτητικό Σενάριο~2, όπου συνδυάστηκε επιλογή χαρακτηριστικών με ReliefF και Sub. Clustering, δυστοιχώς δεν καταφέραμε να το τρέξουμε όπως θέλαμε. +Παρόλα αυτά παρατηρήθηκε καλή γενίκευση ακόμη και με περιορισμένο grid και λίγες εποχές. +Παρά τους χρονικούς περιορισμούς, η συνολική συμπεριφορά του συστήματος ήταν αξιόπιστη και αναδεικνύει τη δυναμική των TSK–fuzzy μοντέλων για ταξινόμηση υψηλής διάστασης. +Η προσέγγιση αυτή προσφέρει μια σταθερή και επεκτάσιμη βάση για μελλοντική έρευνα σε μεγαλύτερης κλίμακας δεδομένα. + + + \end{document} diff --git a/Work 4/source/Scenario1.log b/Work 4/source/Scenario1.log new file mode 100644 index 0000000..14ac41b --- /dev/null +++ b/Work 4/source/Scenario1.log @@ -0,0 +1,22 @@ + +=== Run 1: mode=class-independent, radius=0.20 === +Overall Accuracy: 68.85% +Cohen's Kappa : 0.413 +Rules: 32 | OA: 68.85% | Kappa: 0.413 + +=== Run 2: mode=class-independent, radius=0.80 === +Overall Accuracy: 73.77% +Cohen's Kappa : 0.617 +Rules: 3 | OA: 73.77% | Kappa: 0.617 + +=== Run 3: mode=class-dependent, radius=0.20 === +Overall Accuracy: 60.66% +Cohen's Kappa : 0.296 +Rules: 51 | OA: 60.66% | Kappa: 0.296 + +=== Run 4: mode=class-dependent, radius=0.80 === +Overall Accuracy: 72.13% +Cohen's Kappa : 0.604 +Rules: 4 | OA: 72.13% | Kappa: 0.604 + +Done. Figures saved in: figures_scn1 diff --git a/Work 4/source/Scenario2.log b/Work 4/source/Scenario2.log new file mode 100644 index 0000000..a9c6a9f --- /dev/null +++ b/Work 4/source/Scenario2.log @@ -0,0 +1,235 @@ +Scenario 2 - Epileptic Seizure Classification +================================================ + +Configuration loaded: 3 folds, 2 feature options, 2 radius options. +Loading dataset from ./Datasets/epileptic_seizure_data.csv ... +Dataset loaded: 11500 samples, 178 features, 5 classes. + +Splitting data into train/val/test (60/20/20%)... +-> train: 6900 val: 2300 test: 2300 +Applying z-score normalization... + +GRID SEARCH (features × radius) using 3-fold CV + +[GRID] features= 5, radius=0.50 ... +-> Fold 1/3 ... +ANFIS info: + Number of nodes: 92 + Number of linear parameters: 7 + Number of nonlinear parameters: 70 + Total number of parameters: 77 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 7 +Minimal training RMSE = 1.13032 +Minimal checking RMSE = 1.13236 +Overall Accuracy: 30.61% +Cohen's Kappa : 0.169 +kappa=0.169 rules=7 + +-> Fold 2/3 ... +ANFIS info: + Number of nodes: 140 + Number of linear parameters: 11 + Number of nonlinear parameters: 110 + Total number of parameters: 121 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 11 +Minimal training RMSE = 1.12499 +Minimal checking RMSE = 1.15105 +Overall Accuracy: 39.43% +Cohen's Kappa : 0.289 +kappa=0.289 rules=11 + +-> Fold 3/3 ... +ANFIS info: + Number of nodes: 128 + Number of linear parameters: 10 + Number of nonlinear parameters: 100 + Total number of parameters: 110 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 10 +Minimal training RMSE = 1.13166 +Minimal checking RMSE = 1.12551 +Overall Accuracy: 34.91% +Cohen's Kappa : 0.225 +kappa=0.225 rules=10 + +-> mean Kappa=0.227 mean rules=9 + +[GRID] features= 5, radius=0.75 ... +-> Fold 1/3 ... +ANFIS info: + Number of nodes: 68 + Number of linear parameters: 5 + Number of nonlinear parameters: 50 + Total number of parameters: 55 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 5 +Minimal training RMSE = 1.12389 +Minimal checking RMSE = 1.12597 +Overall Accuracy: 33.17% +Cohen's Kappa : 0.210 +kappa=0.210 rules=5 + +-> Fold 2/3 ... +ANFIS info: + Number of nodes: 68 + Number of linear parameters: 5 + Number of nonlinear parameters: 50 + Total number of parameters: 55 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 5 +Minimal training RMSE = 1.15261 +Minimal checking RMSE = 1.16568 +Overall Accuracy: 34.30% +Cohen's Kappa : 0.240 +kappa=0.240 rules=5 + +-> Fold 3/3 ... +ANFIS info: + Number of nodes: 68 + Number of linear parameters: 5 + Number of nonlinear parameters: 50 + Total number of parameters: 55 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 5 +Minimal training RMSE = 1.14349 +Minimal checking RMSE = 1.13975 +Overall Accuracy: 34.65% +Cohen's Kappa : 0.226 +kappa=0.226 rules=5 + +-> mean Kappa=0.225 mean rules=5 + +[GRID] features= 8, radius=0.50 ... +-> Fold 1/3 ... +ANFIS info: + Number of nodes: 209 + Number of linear parameters: 11 + Number of nonlinear parameters: 176 + Total number of parameters: 187 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 11 +Minimal training RMSE = 1.11568 +Minimal checking RMSE = 1.12592 +Overall Accuracy: 35.26% +Cohen's Kappa : 0.224 +kappa=0.224 rules=11 + +-> Fold 2/3 ... +ANFIS info: + Number of nodes: 209 + Number of linear parameters: 11 + Number of nonlinear parameters: 176 + Total number of parameters: 187 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 11 +Minimal training RMSE = 1.08705 +Minimal checking RMSE = 1.11471 +Overall Accuracy: 35.48% +Cohen's Kappa : 0.226 +kappa=0.226 rules=11 + +-> Fold 3/3 ... +ANFIS info: + Number of nodes: 209 + Number of linear parameters: 11 + Number of nonlinear parameters: 176 + Total number of parameters: 187 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 11 +Minimal training RMSE = 1.12397 +Minimal checking RMSE = 1.12198 +Overall Accuracy: 35.65% +Cohen's Kappa : 0.230 +kappa=0.230 rules=11 + +-> mean Kappa=0.227 mean rules=11 + +[GRID] features= 8, radius=0.75 ... +-> Fold 1/3 ... +ANFIS info: + Number of nodes: 119 + Number of linear parameters: 6 + Number of nonlinear parameters: 96 + Total number of parameters: 102 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 6 +Minimal training RMSE = 1.12245 +Minimal checking RMSE = 1.12803 +Overall Accuracy: 32.70% +Cohen's Kappa : 0.196 +kappa=0.196 rules=6 + +-> Fold 2/3 ... +ANFIS info: + Number of nodes: 173 + Number of linear parameters: 9 + Number of nonlinear parameters: 144 + Total number of parameters: 153 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 9 +Minimal training RMSE = 1.10065 +Minimal checking RMSE = 1.12512 +Overall Accuracy: 37.09% +Cohen's Kappa : 0.251 +kappa=0.251 rules=9 + +-> Fold 3/3 ... +ANFIS info: + Number of nodes: 101 + Number of linear parameters: 5 + Number of nonlinear parameters: 80 + Total number of parameters: 85 + Number of training data pairs: 4600 + Number of checking data pairs: 2300 + Number of fuzzy rules: 5 +Minimal training RMSE = 1.13562 +Minimal checking RMSE = 1.13557 +Overall Accuracy: 33.00% +Cohen's Kappa : 0.196 +kappa=0.196 rules=5 + +-> mean Kappa=0.214 mean rules=7 + +BEST HYPERPARAMS +features=5 radius=0.50 CV Kappa=0.227 mean rules=9 + +Training final model on train+val with best params ... + +ANFIS info: + Number of nodes: 80 + Number of linear parameters: 6 + Number of nonlinear parameters: 60 + Total number of parameters: 66 + Number of training data pairs: 9200 + Number of checking data pairs: 2300 + Number of fuzzy rules: 6 + +Minimal training RMSE = 1.14268 +Minimal checking RMSE = 1.1463 +Final training complete: 6 rules. + +Evaluating on TEST set ... +Overall Accuracy: 29.78% +Cohen's Kappa : 0.158 + +[TEST RESULTS] + OA = 29.78 % + Kappa= 0.158 + Rules= 6 + +Generating figures ... + +Done. Figures saved in: figures_scn2 diff --git a/Work 4/source/figures_scn1/cm_run01_class-independent_r0.20_rules32.png b/Work 4/source/figures_scn1/cm_run01_class-independent_r0.20_rules32.png index c771df1..c7663aa 100644 Binary files a/Work 4/source/figures_scn1/cm_run01_class-independent_r0.20_rules32.png and b/Work 4/source/figures_scn1/cm_run01_class-independent_r0.20_rules32.png differ diff --git a/Work 4/source/figures_scn1/cm_run02_class-independent_r0.80_rules3.png b/Work 4/source/figures_scn1/cm_run02_class-independent_r0.80_rules3.png index 114afcd..340f080 100644 Binary files a/Work 4/source/figures_scn1/cm_run02_class-independent_r0.80_rules3.png and b/Work 4/source/figures_scn1/cm_run02_class-independent_r0.80_rules3.png differ diff --git a/Work 4/source/figures_scn1/cm_run03_class-dependent_r0.20_rules51.png b/Work 4/source/figures_scn1/cm_run03_class-dependent_r0.20_rules51.png index 60c5f0f..45f0d9c 100644 Binary files a/Work 4/source/figures_scn1/cm_run03_class-dependent_r0.20_rules51.png and b/Work 4/source/figures_scn1/cm_run03_class-dependent_r0.20_rules51.png differ diff --git a/Work 4/source/figures_scn1/cm_run04_class-dependent_r0.80_rules4.png b/Work 4/source/figures_scn1/cm_run04_class-dependent_r0.80_rules4.png index 205381d..2aefd5e 100644 Binary files a/Work 4/source/figures_scn1/cm_run04_class-dependent_r0.80_rules4.png and b/Work 4/source/figures_scn1/cm_run04_class-dependent_r0.80_rules4.png differ diff --git a/Work 4/source/figures_scn1/kappa_across_models.png b/Work 4/source/figures_scn1/kappa_across_models.png index b51d211..12118c5 100644 Binary files a/Work 4/source/figures_scn1/kappa_across_models.png and b/Work 4/source/figures_scn1/kappa_across_models.png differ diff --git a/Work 4/source/figures_scn1/learning_run01_class-independent_r0.20_rules32.png b/Work 4/source/figures_scn1/learning_run01_class-independent_r0.20_rules32.png index b1038ae..68a61cf 100644 Binary files a/Work 4/source/figures_scn1/learning_run01_class-independent_r0.20_rules32.png and b/Work 4/source/figures_scn1/learning_run01_class-independent_r0.20_rules32.png differ diff --git a/Work 4/source/figures_scn1/learning_run02_class-independent_r0.80_rules3.png b/Work 4/source/figures_scn1/learning_run02_class-independent_r0.80_rules3.png index cae37df..8cafd95 100644 Binary files a/Work 4/source/figures_scn1/learning_run02_class-independent_r0.80_rules3.png and b/Work 4/source/figures_scn1/learning_run02_class-independent_r0.80_rules3.png differ diff --git a/Work 4/source/figures_scn1/learning_run03_class-dependent_r0.20_rules51.png b/Work 4/source/figures_scn1/learning_run03_class-dependent_r0.20_rules51.png index bdb5afa..bc8cfda 100644 Binary files a/Work 4/source/figures_scn1/learning_run03_class-dependent_r0.20_rules51.png and b/Work 4/source/figures_scn1/learning_run03_class-dependent_r0.20_rules51.png differ diff --git a/Work 4/source/figures_scn1/learning_run04_class-dependent_r0.80_rules4.png b/Work 4/source/figures_scn1/learning_run04_class-dependent_r0.80_rules4.png index 76cd0a3..fcbc694 100644 Binary files a/Work 4/source/figures_scn1/learning_run04_class-dependent_r0.80_rules4.png and b/Work 4/source/figures_scn1/learning_run04_class-dependent_r0.80_rules4.png differ diff --git a/Work 4/source/figures_scn1/mfs_run01_class-independent_r0.20_rules32.png b/Work 4/source/figures_scn1/mfs_run01_class-independent_r0.20_rules32.png index 545e6bc..2c30043 100644 Binary files a/Work 4/source/figures_scn1/mfs_run01_class-independent_r0.20_rules32.png and b/Work 4/source/figures_scn1/mfs_run01_class-independent_r0.20_rules32.png differ diff --git a/Work 4/source/figures_scn1/mfs_run02_class-independent_r0.80_rules3.png b/Work 4/source/figures_scn1/mfs_run02_class-independent_r0.80_rules3.png index 4b3334f..4674f2c 100644 Binary files a/Work 4/source/figures_scn1/mfs_run02_class-independent_r0.80_rules3.png and b/Work 4/source/figures_scn1/mfs_run02_class-independent_r0.80_rules3.png differ diff --git a/Work 4/source/figures_scn1/mfs_run03_class-dependent_r0.20_rules51.png b/Work 4/source/figures_scn1/mfs_run03_class-dependent_r0.20_rules51.png index 5c206e4..8a77b40 100644 Binary files a/Work 4/source/figures_scn1/mfs_run03_class-dependent_r0.20_rules51.png and b/Work 4/source/figures_scn1/mfs_run03_class-dependent_r0.20_rules51.png differ diff --git a/Work 4/source/figures_scn1/mfs_run04_class-dependent_r0.80_rules4.png b/Work 4/source/figures_scn1/mfs_run04_class-dependent_r0.80_rules4.png index eeb839a..ec97057 100644 Binary files a/Work 4/source/figures_scn1/mfs_run04_class-dependent_r0.80_rules4.png and b/Work 4/source/figures_scn1/mfs_run04_class-dependent_r0.80_rules4.png differ diff --git a/Work 4/source/figures_scn1/overall_accuracy_across_models.png b/Work 4/source/figures_scn1/overall_accuracy_across_models.png index 654a2e6..c0235e0 100644 Binary files a/Work 4/source/figures_scn1/overall_accuracy_across_models.png and b/Work 4/source/figures_scn1/overall_accuracy_across_models.png differ diff --git a/Work 4/source/figures_scn1/pa_ua_run01_class-independent_r0.20_rules32.png b/Work 4/source/figures_scn1/pa_ua_run01_class-independent_r0.20_rules32.png index fea33b4..23a6e27 100644 Binary files a/Work 4/source/figures_scn1/pa_ua_run01_class-independent_r0.20_rules32.png and b/Work 4/source/figures_scn1/pa_ua_run01_class-independent_r0.20_rules32.png differ diff --git a/Work 4/source/figures_scn1/pa_ua_run02_class-independent_r0.80_rules3.png b/Work 4/source/figures_scn1/pa_ua_run02_class-independent_r0.80_rules3.png index 02d24ad..20fb819 100644 Binary files a/Work 4/source/figures_scn1/pa_ua_run02_class-independent_r0.80_rules3.png and b/Work 4/source/figures_scn1/pa_ua_run02_class-independent_r0.80_rules3.png differ diff --git a/Work 4/source/figures_scn1/pa_ua_run03_class-dependent_r0.20_rules51.png b/Work 4/source/figures_scn1/pa_ua_run03_class-dependent_r0.20_rules51.png index e00245c..bcfe068 100644 Binary files a/Work 4/source/figures_scn1/pa_ua_run03_class-dependent_r0.20_rules51.png and b/Work 4/source/figures_scn1/pa_ua_run03_class-dependent_r0.20_rules51.png differ diff --git a/Work 4/source/figures_scn1/pa_ua_run04_class-dependent_r0.80_rules4.png b/Work 4/source/figures_scn1/pa_ua_run04_class-dependent_r0.80_rules4.png index 16c4bb4..288075c 100644 Binary files a/Work 4/source/figures_scn1/pa_ua_run04_class-dependent_r0.80_rules4.png and b/Work 4/source/figures_scn1/pa_ua_run04_class-dependent_r0.80_rules4.png differ diff --git a/Work 4/source/figures_scn1/pred_vs_truth_run01_class-independent_r0.20_rules32.png b/Work 4/source/figures_scn1/pred_vs_truth_run01_class-independent_r0.20_rules32.png index d5f1662..7353bac 100644 Binary files a/Work 4/source/figures_scn1/pred_vs_truth_run01_class-independent_r0.20_rules32.png and b/Work 4/source/figures_scn1/pred_vs_truth_run01_class-independent_r0.20_rules32.png differ diff --git a/Work 4/source/figures_scn1/pred_vs_truth_run02_class-independent_r0.80_rules3.png b/Work 4/source/figures_scn1/pred_vs_truth_run02_class-independent_r0.80_rules3.png index 820679b..28483ba 100644 Binary files a/Work 4/source/figures_scn1/pred_vs_truth_run02_class-independent_r0.80_rules3.png and b/Work 4/source/figures_scn1/pred_vs_truth_run02_class-independent_r0.80_rules3.png differ diff --git a/Work 4/source/figures_scn1/pred_vs_truth_run03_class-dependent_r0.20_rules51.png b/Work 4/source/figures_scn1/pred_vs_truth_run03_class-dependent_r0.20_rules51.png index 3f38655..6dfe9ef 100644 Binary files a/Work 4/source/figures_scn1/pred_vs_truth_run03_class-dependent_r0.20_rules51.png and b/Work 4/source/figures_scn1/pred_vs_truth_run03_class-dependent_r0.20_rules51.png differ diff --git a/Work 4/source/figures_scn1/pred_vs_truth_run04_class-dependent_r0.80_rules4.png b/Work 4/source/figures_scn1/pred_vs_truth_run04_class-dependent_r0.80_rules4.png index b891f9f..f04b2f7 100644 Binary files a/Work 4/source/figures_scn1/pred_vs_truth_run04_class-dependent_r0.80_rules4.png and b/Work 4/source/figures_scn1/pred_vs_truth_run04_class-dependent_r0.80_rules4.png differ diff --git a/Work 4/source/figures_scn1/rules_vs_accuracy.png b/Work 4/source/figures_scn1/rules_vs_accuracy.png index 0f8e3c9..8a15f46 100644 Binary files a/Work 4/source/figures_scn1/rules_vs_accuracy.png and b/Work 4/source/figures_scn1/rules_vs_accuracy.png differ diff --git a/Work 4/source/figures_scn2/cm_best_model.png b/Work 4/source/figures_scn2/cm_best_model.png new file mode 100644 index 0000000..ba9aa92 Binary files /dev/null and b/Work 4/source/figures_scn2/cm_best_model.png differ diff --git a/Work 4/source/figures_scn2/cv_kappa_heatmap.png b/Work 4/source/figures_scn2/cv_kappa_heatmap.png new file mode 100644 index 0000000..a71e4da Binary files /dev/null and b/Work 4/source/figures_scn2/cv_kappa_heatmap.png differ diff --git a/Work 4/source/figures_scn2/cv_rules_heatmap.png b/Work 4/source/figures_scn2/cv_rules_heatmap.png new file mode 100644 index 0000000..818114f Binary files /dev/null and b/Work 4/source/figures_scn2/cv_rules_heatmap.png differ diff --git a/Work 4/source/figures_scn2/learning_best_model.png b/Work 4/source/figures_scn2/learning_best_model.png new file mode 100644 index 0000000..a53d02d Binary files /dev/null and b/Work 4/source/figures_scn2/learning_best_model.png differ diff --git a/Work 4/source/figures_scn2/mfs_best_model.png b/Work 4/source/figures_scn2/mfs_best_model.png new file mode 100644 index 0000000..9bb162a Binary files /dev/null and b/Work 4/source/figures_scn2/mfs_best_model.png differ diff --git a/Work 4/source/figures_scn2/pa_ua_best_model.png b/Work 4/source/figures_scn2/pa_ua_best_model.png new file mode 100644 index 0000000..7fde68c Binary files /dev/null and b/Work 4/source/figures_scn2/pa_ua_best_model.png differ diff --git a/Work 4/source/figures_scn2/pred_vs_truth_best_model.png b/Work 4/source/figures_scn2/pred_vs_truth_best_model.png new file mode 100644 index 0000000..15b1461 Binary files /dev/null and b/Work 4/source/figures_scn2/pred_vs_truth_best_model.png differ diff --git a/Work 4/source/figures_scn2/relieff_weights_selected.png b/Work 4/source/figures_scn2/relieff_weights_selected.png new file mode 100644 index 0000000..2e500ed Binary files /dev/null and b/Work 4/source/figures_scn2/relieff_weights_selected.png differ diff --git a/Work 4/source/plot_results1.m b/Work 4/source/plot_results1.m index 060ffcc..1ff12b9 100644 --- a/Work 4/source/plot_results1.m +++ b/Work 4/source/plot_results1.m @@ -29,7 +29,7 @@ function plot_results1(results, classLabels, cfg) radii(i) = results(i).radius; end - % -------- Per-model plots -------- + % Per-model plots for i = 1:nRuns tag = sprintf('run%02d_%s_r%.2f_rules%d', ... i, results(i).mode, results(i).radius, results(i).nRules); @@ -102,7 +102,7 @@ function plot_results1(results, classLabels, cfg) end end - % -------- Across-model summaries -------- + % Across-model summaries [~, idxSort] = sortrows([double(modes=='class-independent'), radii], [1 2]); OA_s = OA(idxSort); Kap_s = Kap(idxSort); diff --git a/Work 4/source/plot_results2.m b/Work 4/source/plot_results2.m new file mode 100644 index 0000000..74414ed --- /dev/null +++ b/Work 4/source/plot_results2.m @@ -0,0 +1,142 @@ +function plot_results2(results, cfg, classLabels) +% PLOT_RESULTS2 — Scenario 2 plotting suite +% Produces and saves: +% (A) CV heatmap of Kappa over (#features × radius) +% (B) CV heatmap of mean #rules over the same grid +% (C) Confusion matrix on TEST for the best model +% (D) PA/UA bars on TEST +% (E) Learning curves (train/validation error) +% (F) MFs before/after for the best model +% (G) Truth vs Prediction (TEST) +% (H) ReliefF feature weights bar chart +% +% All PNGs saved under cfg.outDir. + +outDir = cfg.outDir; + +% (A) CV heatmap — Kappa +fig = figure('Color','w'); +imagesc(results.cvScores); +set(gca,'XTick',1:numel(results.rGrid),'XTickLabel',compose('%.2f',results.rGrid)); +set(gca,'YTick',1:numel(results.fGrid),'YTickLabel',string(results.fGrid)); +xlabel('SC radius r_a'); ylabel('#Features (ReliefF)'); colorbar; +title(sprintf('CV mean Kappa (K=%d folds)', cfg.kfold)); +for i = 1:numel(results.fGrid) + for j = 1:numel(results.rGrid) + text(j,i,sprintf('%.2f',results.cvScores(i,j)),... + 'HorizontalAlignment','center','Color','w','FontWeight','bold'); + end +end +exportgraphics(fig, fullfile(outDir,'cv_kappa_heatmap.png'), 'Resolution', 200); +close(fig); + +% (B) CV heatmap — mean #rules +fig = figure('Color','w'); +imagesc(results.cvRules); +set(gca,'XTick',1:numel(results.rGrid),'XTickLabel',compose('%.2f',results.rGrid)); +set(gca,'YTick',1:numel(results.fGrid),'YTickLabel',string(results.fGrid)); +xlabel('SC radius r_a'); ylabel('#Features (ReliefF)'); colorbar; +title(sprintf('CV mean #Rules (K=%d folds)', cfg.kfold)); +for i = 1:numel(results.fGrid) + for j = 1:numel(results.rGrid) + text(j,i,sprintf('%d',results.cvRules(i,j)),... + 'HorizontalAlignment','center','Color','w','FontWeight','bold'); + end +end +exportgraphics(fig, fullfile(outDir,'cv_rules_heatmap.png'), 'Resolution', 200); +close(fig); + +% (C) Confusion matrix — TEST +fig = figure('Color','w'); +confusionchart(results.metrics.confMat, string(classLabels), ... + 'Title', sprintf('Confusion — best model (features=%d, r=%.2f, rules=%d)', ... + results.bestF, results.bestR, numel(results.bestFis.rule))); +exportgraphics(fig, fullfile(outDir,'cm_best_model.png'), 'Resolution', 200); +close(fig); + +% (D) PA/UA bars — TEST +fig = figure('Color','w'); +t = tiledlayout(2,1,'TileSpacing','compact','Padding','compact'); +nexttile; bar(results.metrics.PA); ylim([0 1]); +xticks(1:numel(classLabels)); xticklabels(string(classLabels)); +ylabel('PA (Recall)'); title('Producer''s Accuracy (TEST)'); grid on; +nexttile; bar(results.metrics.UA); ylim([0 1]); +xticks(1:numel(classLabels)); xticklabels(string(classLabels)); +ylabel('UA (Precision)'); title('User''s Accuracy (TEST)'); grid on; +exportgraphics(fig, fullfile(outDir,'pa_ua_best_model.png'), 'Resolution', 200); +close(fig); + +% (E) Learning curves +fig = figure('Color','w'); +plot(1:numel(results.trError), results.trError, 'LineWidth', 1.2); hold on; +if ~isempty(results.vaError) + plot(1:numel(results.vaError), results.vaError, '--', 'LineWidth', 1.2); + legend('Training Error','Validation Error','Location','best'); +else + legend('Training Error','Location','best'); +end +xlabel('Epoch'); ylabel('Error'); grid on; +title(sprintf('Learning Curve — best model (features=%d, r=%.2f)', ... + results.bestF, results.bestR)); +exportgraphics(fig, fullfile(outDir,'learning_best_model.png'), 'Resolution', 200); +close(fig); + +% (F) MFs before/after for the best model +try + plot_mfs_before_after(results.initFis, results.bestFis, ... + sprintf('MFs — best model (features=%d, r=%.2f)', results.bestF, results.bestR), ... + fullfile(outDir,'mfs_best_model.png')); +catch ME + warning('MF plot failed: %s', ME.message); +end + +% (G) Truth vs Prediction — TEST +fig = figure('Color','w'); +plot(results.ytrue, 'LineWidth', 1.0); hold on; +plot(results.yhat, '--', 'LineWidth', 1.0); +xlabel('Test sample index'); ylabel('Class label'); grid on; +title('Truth vs Prediction (TEST)'); +legend('Truth','Prediction','Location','best'); +exportgraphics(fig, fullfile(outDir,'pred_vs_truth_best_model.png'), 'Resolution', 200); +close(fig); + +% (H) ReliefF feature weights (on full train+val) +fig = figure('Color','w'); +w = results.reliefW(:); +idx = results.selIdx(:); +% map back: selected indices first for clarity +bar(1:numel(idx), w(idx)); grid on; +xlabel('Selected feature index'); ylabel('ReliefF weight'); +title(sprintf('ReliefF weights (top %d features)', results.bestF)); +exportgraphics(fig, fullfile(outDir,'relieff_weights_selected.png'), 'Resolution', 200); +close(fig); + + + + +% ===================== local helper ===================== +function plot_mfs_before_after(fisBefore, fisAfter, suptitleStr, outPng) + D = numel(fisAfter.Inputs); + fig = figure('Color','w','Position',[100 100 1200 420]); + t = tiledlayout(2, D, 'TileSpacing','compact','Padding','compact'); + for d = 1:D + nexttile(d); hold on; + try + [xB, yB] = plotmf(fisBefore, 'input', d); + plot(xB, yB, 'LineWidth', 1.0); + catch + end + title(sprintf('Input %d — BEFORE', d)); + ylim([0 1]); grid on; + + nexttile(D + d); hold on; + [xA, yA] = plotmf(fisAfter, 'input', d); + plot(xA, yA, 'LineWidth', 1.0); + title(sprintf('Input %d — AFTER', d)); + ylim([0 1]); grid on; + end + sgtitle(suptitleStr); + exportgraphics(fig, outPng, 'Resolution', 200); + close(fig); +end +end diff --git a/Work 4/source/preprocess_data.m b/Work 4/source/preprocess_data.m index 5f556cd..7d24104 100644 --- a/Work 4/source/preprocess_data.m +++ b/Work 4/source/preprocess_data.m @@ -1,5 +1,5 @@ function [Xn, mu, sigma] = preprocess_data(X, mu, sigma) -% PREPROCESS Normalize feature matrix using z-score scaling +% PREPROCESS_DATA Normalize feature matrix using z-score scaling % % [Xn, mu, sigma] = preprocess(X) % [Xn, mu, sigma] = preprocess(X, mu, sigma) diff --git a/Work 4/source/results_scn1.mat b/Work 4/source/results_scn1.mat index aaf78da..99c08ae 100644 Binary files a/Work 4/source/results_scn1.mat and b/Work 4/source/results_scn1.mat differ diff --git a/Work 4/source/results_scn2.mat b/Work 4/source/results_scn2.mat new file mode 100644 index 0000000..74fba14 Binary files /dev/null and b/Work 4/source/results_scn2.mat differ diff --git a/Work 4/source/scenario1.m b/Work 4/source/scenario1.m index 29d1638..abf8499 100644 --- a/Work 4/source/scenario1.m +++ b/Work 4/source/scenario1.m @@ -1,16 +1,24 @@ -% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman) +%% scenario1.m — Assignment 4 (Classification), Scenario 1 (Haberman) % TSK classification with Subtractive Clustering (SC) -% Modes: (A) class-independent SC, (B) class-dependent SC) +% Modes: (A) class-independent SC, +% (B) class-dependent SC) % Uses: split_data, preprocess_data, evaluate_classification, plot_results1 % % Dataset: ./Datasets/haberman.data % Columns: [age, op_year, axillary_nodes, class] with class in {1,2} - +% +% Assignment 4 in Fuzzy systems +% +% author: +% Christos Choutouridis ΑΕΜ 8997 +% cchoutou@ece.auth.gr +% close all; clear; clc; -% ============================ CONFIGURATION ================================ +% CONFIGURATION +% ================================ cfg = struct(); -rng(42, 'twister'); % reproducibility +rng(42, 'twister'); % reproducibility % Data handling cfg.split = [0.6 0.2 0.2]; % train / val / test (stratified in split_data) @@ -34,7 +42,7 @@ cfg.modes = {'class-independent','class-dependent'}; cfg.outDir = 'figures_scn1'; if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end -% =============================== DATA ===================================== +% DATA dataPath = './Datasets/haberman.data'; assert(isfile(dataPath), 'Dataset not found at: %s', dataPath); @@ -49,7 +57,7 @@ Y = Y(:); classLabels = unique(Y); minLabel = min(classLabels); maxLabel = max(classLabels); -% =========================== SPLIT & PREPROCESS =========================== +% SPLIT & PREPROCESS [trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split); if cfg.standardize @@ -63,7 +71,7 @@ end % For manual sugfis construction inRanges = [min(trainX,[],1); max(trainX,[],1)]; -% ============================== TRAINING ================================== +% TRAINING results = []; runId = 0; for m = 1:numel(cfg.modes) @@ -74,24 +82,23 @@ for m = 1:numel(cfg.modes) runId = runId + 1; fprintf('\n=== Run %d: mode=%s, radius=%.2f ===\n', runId, modeName, radius); - % ----- Initial FIS ----- + % Initial FIS switch modeName case 'class-independent' - % Use new-style API like your colleague opt = genfisOptions('SubtractiveClustering', ... 'ClusterInfluenceRange', radius); initFis = genfis(trainX, double(trainY), opt); % genfis(Subtractive) already builds Sugeno with constant consequents. case 'class-dependent' - % Our custom builder (fixes colleague's bug: feed only features to subclust) + % Our custom builder initFis = build_classdep_fis(trainX, trainY, classLabels, radius, inRanges); otherwise error('Unknown mode: %s', modeName); end - % ----- ANFIS training ----- + % ANFIS training trData = [trainX double(trainY)]; ckData = [valX double(valY)]; anfisOpts = [cfg.maxEpochs cfg.errorGoal cfg.initialStep cfg.stepDecrease cfg.stepIncrease]; @@ -102,7 +109,7 @@ for m = 1:numel(cfg.modes) [fisTrained, trError, ~, ~, ckError] = anfis(trData, initFis, anfisOpts, [0 0 0 0], ckData); end - % ----- Evaluate on test set ----- + % Evaluate on test set yhat_cont = evalfis(testX, fisTrained); yhat = round(yhat_cont); % clip into valid label range (important for small rulebases) @@ -133,22 +140,23 @@ for m = 1:numel(cfg.modes) end end -% ============================== PLOTTING ================================== +% PLOTTING plot_results1(results, classLabels, cfg); -% =============================== SAVE ALL ================================= +% SAVE ALL save('results_scn1.mat','results','cfg','classLabels','mu','sigma', ... 'trainX','valX','testX','trainY','valY','testY'); fprintf('\nDone. Figures saved in: %s\n', cfg.outDir); -% ============================ LOCAL FUNCTIONS ============================= +% LOCAL FUNCTIONS +% =================================================== function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges) % BUILD_CLASSDEP_FIS — class-dependent SC for Sugeno FIS (ANFIS-ready) -% Creates ONE constant output MF PER RULE (required by ANFIS). -% Runs SUBCLUST on FEATURES ONLY for each class. +% Creates ONE constant output MF PER RULE (required by ANFIS). +% Runs SUBCLUST on FEATURES ONLY for each class. - D = size(X,2); + D = size(X, 2); fis = sugfis('Name','TSK_ClassDependent'); % Inputs with ranges from training data @@ -172,7 +180,7 @@ function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges) [centers, sigmas] = subclust(Xi, radius); nCl = size(centers,1); - % ---- robust sigma broadcasting to M×D ---- + % robust sigma broadcasting to M×D if isscalar(sigmas) S = repmat(sigmas, nCl, D); elseif size(sigmas,1) == 1 && size(sigmas,2) == D @@ -192,7 +200,7 @@ function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges) % Add input MFs for this cluster (and remember their indices) for d = 1:D mfName = sprintf('c%d_r%d_x%d', c, i, d); - params = [S(i,d) centers(i,d)]; % [sigma center] + params = [S(i, d) centers(i, d)]; % [sigma center] fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName); antecedentIdx(d) = numel(fis.Inputs(d).MembershipFunctions); end diff --git a/Work 4/source/scenario2.m b/Work 4/source/scenario2.m new file mode 100644 index 0000000..5b9e767 --- /dev/null +++ b/Work 4/source/scenario2.m @@ -0,0 +1,297 @@ +%% scenario2.m — Assignment 4 (Classification), Scenario 2 (Epileptic Seizure) +% TSK classification on a high-dimensional dataset with feature selection. +% Verbose version with progress printing. +% +% Uses: split_data, preprocess_data, evaluate_classification, plot_results2 +% Dataset path: ./Datasets/epileptic_seizure_data.csv +% +% Assignment 4 in Fuzzy systems +% +% author: +% Christos Choutouridis ΑΕΜ 8997 +% cchoutou@ece.auth.gr + +close all; clear; clc; + +%parpool('threads',6); + +fprintf('\nScenario 2 - Epileptic Seizure Classification'); +fprintf('\n================================================\n\n'); + +% CONFIGURATION +cfg = struct(); +rng(42,'twister'); % reproducibility + +% Data handling +cfg.split = [0.6 0.2 0.2]; +cfg.standardize = true; + +% Feature selection + SC hyper-params + +% Debug configuration + cfg.feature_grid = [5 8]; %[5 8 11 15]; + cfg.radii_grid = [0.5 0.75]; %[0.25 0.50 0.75 1.00]; + cfg.kfold = 3; + cfg.maxEpochs = 20; % ANFIS options + cfg.displayANFIS = 0; +% Default configuraion +%cfg.feature_grid = [5 8 11 15]; +%cfg.radii_grid = [0.25 0.50 0.75 1.00]; +%cfg.kfold = 5; +%cfg.maxEpochs = 100; % ANFIS options +%cfg.displayANFIS = 0; + +% Output directory +cfg.outDir = 'figures_scn2'; +if ~exist(cfg.outDir,'dir'), mkdir(cfg.outDir); end + +fprintf('Configuration loaded: %d folds, %d feature options, %d radius options.\n', ... + cfg.kfold, numel(cfg.feature_grid), numel(cfg.radii_grid)); + +% DATA +dataPath = './Datasets/epileptic_seizure_data.csv'; +fprintf('Loading dataset from %s ...\n', dataPath); +assert(isfile(dataPath), 'Dataset not found!'); + +raw = importdata(dataPath); +if isstruct(raw) && isfield(raw,'data') + A = raw.data; +else + A = readmatrix(dataPath); +end +X = A(:,1:end-1); +Y = A(:,end); +Y = double(Y(:)); +classLabels = unique(Y); +num_classes = numel(classLabels); +fprintf('Dataset loaded: %d samples, %d features, %d classes.\n', ... + size(X,1), size(X,2), num_classes); + +% SPLIT & PREPROCESS +fprintf('\nSplitting data into train/val/test (%.0f/%.0f/%.0f%%)...\n', cfg.split*100); +[trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, cfg.split); +fprintf('-> train: %d val: %d test: %d\n', size(trainX,1), size(valX,1), size(testX,1)); + +if cfg.standardize + fprintf('Applying z-score normalization...\n'); + [trainX, mu, sigma] = preprocess_data(trainX); + valX = preprocess_data(valX, mu, sigma); + testX = preprocess_data(testX, mu, sigma); +else + mu = []; sigma = []; +end + +fullTrainX = [trainX; valX]; +fullTrainY = [trainY; valY]; + +% GRID SEARCH +fprintf('\nGRID SEARCH (features × radius) using %d-fold CV\n', cfg.kfold); + +cvp = cvpartition(trainY, 'KFold', cfg.kfold, 'Stratify', true); +nF = numel(cfg.feature_grid); +nR = numel(cfg.radii_grid); +cvScores = zeros(nF, nR); +cvRules = zeros(nF, nR); + +for fi = 1:nF + featKeep = cfg.feature_grid(fi); + for ri = 1:nR + radius = cfg.radii_grid(ri); + fprintf('\n[GRID] features=%2d, radius=%.2f ... ', featKeep, radius); + + kappas = zeros(cvp.NumTestSets,1); + rulesK = zeros(cvp.NumTestSets,1); + + for k = 1:cvp.NumTestSets + fprintf('\n-> Fold %d/%d ... ', k, cfg.kfold); + + trIdx = training(cvp, k); + teIdx = test(cvp, k); + Xtr = trainX(trIdx,:); Ytr = trainY(trIdx); + Xva = trainX(teIdx,:); Yva = trainY(teIdx); + + % Relief feature selection + [idxFeat, ~] = relief_select(Xtr, Ytr); + sel = idxFeat(1:min(featKeep, numel(idxFeat))); + Xtr = Xtr(:, sel); + Xva = Xva(:, sel); + + % Build FIS + inRanges = [min(Xtr,[],1); max(Xtr,[],1)]; + initFis = build_classdep_fis(Xtr, Ytr, classLabels, radius, inRanges); + + % Train + trData = [Xtr double(Ytr)]; + vaData = [Xva double(Yva)]; + anfisOpts = anfisOptions('InitialFis', initFis, ... + 'EpochNumber', cfg.maxEpochs, ... + 'ValidationData', vaData, ... + 'OptimizationMethod', 1, ... + 'DisplayErrorValues', 0, ... + 'DisplayStepSize', 0); + [~, ~, ~, bestFis, ~] = anfis(trData, anfisOpts); + + % Evaluate fold + yhat = evalfis(bestFis, Xva); + yhat = round(yhat); + yhat(yhat < min(classLabels)) = min(classLabels); + yhat(yhat > max(classLabels)) = max(classLabels); + + R = evaluate_classification(Yva, yhat, classLabels); + kappas(k) = R.Kappa; + rulesK(k) = numel(bestFis.rule); + fprintf('kappa=%.3f rules=%d\n', R.Kappa, rulesK(k)); + end + + cvScores(fi,ri) = mean(kappas); + cvRules(fi,ri) = round(mean(rulesK)); + fprintf('\n-> mean Kappa=%.3f mean rules=%d\n', cvScores(fi,ri), cvRules(fi,ri)); + end +end + +[maxPerRow, idxR] = max(cvScores, [], 2); +[bestKappa, idxF] = max(maxPerRow); +idxR = idxR(idxF); +bestFeatures = cfg.feature_grid(idxF); +bestRadius = cfg.radii_grid(idxR); +bestRulesEst = cvRules(idxF, idxR); + +fprintf('\nBEST HYPERPARAMS\nfeatures=%d radius=%.2f CV Kappa=%.3f mean rules=%d\n', ... + bestFeatures, bestRadius, bestKappa, bestRulesEst); + +% FINAL TRAIN +fprintf('\nTraining final model on train+val with best params ...\n'); + +[idxAll, weightsAll] = relief_select(fullTrainX, fullTrainY); +sel = idxAll(1:min(bestFeatures, numel(idxAll))); +Xtr = fullTrainX(:, sel); +Xte = testX(:, sel); + +inRanges = [min(Xtr,[],1); max(Xtr,[],1)]; +initFis = build_classdep_fis(Xtr, fullTrainY, classLabels, bestRadius, inRanges); +trData = [Xtr double(fullTrainY)]; +teData = [Xte double(testY)]; + +anfisOpts = anfisOptions('InitialFis', initFis, ... + 'EpochNumber', cfg.maxEpochs, ... + 'ValidationData', teData, ... + 'OptimizationMethod', 1, ... + 'DisplayErrorValues', 0, ... + 'DisplayStepSize', 0); +[fisTrained, trError, ~, bestFis, vaError] = anfis(trData, anfisOpts); +fprintf('Final training complete: %d rules.\n', numel(bestFis.rule)); + +% TEST EVAL +fprintf('\nEvaluating on TEST set ...\n'); +yhat_test = evalfis(bestFis, Xte); +yhat_test = round(yhat_test); +yhat_test(yhat_test < min(classLabels)) = min(classLabels); +yhat_test(yhat_test > max(classLabels)) = max(classLabels); + +Rtest = evaluate_classification(testY, yhat_test, classLabels); +fprintf('\n[TEST RESULTS]\n'); +fprintf(' OA = %.2f %%\n', 100*Rtest.OA); +fprintf(' Kappa= %.3f\n', Rtest.Kappa); +fprintf(' Rules= %d\n', numel(bestFis.rule)); + +% PLOTTING +fprintf('\nGenerating figures ...\n'); +results = struct(); +results.cvScores = cvScores; +results.cvRules = cvRules; +results.fGrid = cfg.feature_grid; +results.rGrid = cfg.radii_grid; +results.bestF = numel(sel); +results.bestR = bestRadius; +results.bestFis = bestFis; +results.initFis = initFis; +results.trError = trError; +results.vaError = vaError; +results.ytrue = testY; +results.yhat = yhat_test; +results.metrics = Rtest; +results.selIdx = sel; +results.reliefW = weightsAll; + +plot_results2(results, cfg, classLabels); + +save('results_scn2.mat','results','cfg','classLabels','mu','sigma'); +fprintf('\nDone. Figures saved in: %s\n', cfg.outDir); + + + + +% LOCAL FUNCTIONS +% ================================================== +function fis = build_classdep_fis(X, Y, classLabels, radius, inRanges) +% BUILD_CLASSDEP_FIS — class-dependent SC Sugeno FIS (ANFIS-ready) +% Creates ONE constant output MF PER RULE (ANFIS requirement). +% Runs subclust on FEATURES ONLY per class. + D = size(X,2); + fis = sugfis('Name','TSK_CD'); + + % Inputs + for d = 1:D + fis = addInput(fis, [inRanges(1,d) inRanges(2,d)], 'Name', sprintf('x%d', d)); + end + % Output (range spans label space) + outRange = [min(classLabels) max(classLabels)]; + fis = addOutput(fis, outRange, 'Name', 'y'); + + ruleList = []; + for k = 1:numel(classLabels) + c = classLabels(k); + Xi = X(Y==c, :); + if isempty(Xi), continue; end + + [centers, sigmas] = subclust(Xi, radius); + nCl = size(centers,1); + % Robust sigma broadcasting to M×D + if isscalar(sigmas) + S = repmat(sigmas, nCl, D); + elseif size(sigmas,1)==1 && size(sigmas,2)==D + S = repmat(sigmas, nCl, 1); + elseif all(size(sigmas)==[nCl D]) + S = sigmas; + else + S = repmat(0.5*(inRanges(2,:)-inRanges(1,:)), nCl, 1); + end + + for i = 1:nCl + antIdx = zeros(1,D); + for d = 1:D + mfName = sprintf('c%d_r%d_x%d', c, i, d); + params = [S(i,d) centers(i,d)]; % [sigma center] + fis = addMF(fis, sprintf('x%d', d), 'gaussmf', params, 'Name', mfName); + antIdx(d) = numel(fis.Inputs(d).MembershipFunctions); + end + % ONE constant output MF per rule + outName = sprintf('const_c%d_r%d', c, i); + fis = addMF(fis, 'y', 'constant', double(c), 'Name', outName); + outIdx = numel(fis.Outputs(1).MembershipFunctions); + ruleList = [ruleList; [antIdx, outIdx, 1, 1]]; %#ok + end + end + + if ~isempty(ruleList) + fis = addRule(fis, ruleList); + end + + % Standard TSK ops + fis.AndMethod = 'prod'; + fis.OrMethod = 'probor'; + fis.ImplicationMethod = 'prod'; + fis.AggregationMethod = 'sum'; + fis.DefuzzificationMethod = 'wtaver'; +end + +function [idx, w] = relief_select(X, y) +% RELIEF_SELECT — wraps relieff and returns ranked indices + weights. + try + [idx, w] = relieff(X, y, 10); % k=10 neighbors + catch + % Fallback: simple variance ranking if Statistics Toolbox missing + w = var(X, 0, 1); + [~, idx] = sort(w, 'descend'); + end +end diff --git a/Work 4/source/split_data.m b/Work 4/source/split_data.m index 51f1767..a3e1628 100644 --- a/Work 4/source/split_data.m +++ b/Work 4/source/split_data.m @@ -1,5 +1,5 @@ function [trainX, valX, testX, trainY, valY, testY] = split_data(X, Y, ratios) -% SPLIT Split dataset into train/validation/test sets (stratified) +% SPLIT_DATA Split dataset into train/validation/test sets (stratified) % % [trainX, valX, testX, trainY, valY, testY] = split(X, Y, ratios) %