diff --git a/.gitmodules b/.gitmodules index cf6262b..72119e7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "HW01/report/AUThReport"] path = HW01/report/AUThReport url = ssh://git@git.hoo2.net:222/hoo2/AUThReport.git +[submodule "HW03/report/AUThReport"] + path = HW03/report/AUThReport + url = https://git.hoo2.net/hoo2/AUThReport.git diff --git a/HW03/.gitignore b/HW03/.gitignore new file mode 100644 index 0000000..303b4a8 --- /dev/null +++ b/HW03/.gitignore @@ -0,0 +1,5 @@ +# Python execution related +__pycache__/ + +# IDE files +.idea/ diff --git a/HW03/dip-2025-hw3-v0.1.pdf b/HW03/dip-2025-hw3-v0.1.pdf new file mode 100644 index 0000000..3d11825 Binary files /dev/null and b/HW03/dip-2025-hw3-v0.1.pdf differ diff --git a/HW03/report/AUThReport b/HW03/report/AUThReport new file mode 160000 index 0000000..74ec4b5 --- /dev/null +++ b/HW03/report/AUThReport @@ -0,0 +1 @@ +Subproject commit 74ec4b5f6c66382e5f1b6d2e6930897e4ed53ea6 diff --git a/HW03/scripts/demo1.py b/HW03/scripts/demo1.py new file mode 100644 index 0000000..8201f94 --- /dev/null +++ b/HW03/scripts/demo1.py @@ -0,0 +1,55 @@ +# +# demo 1 of the assignment +# +# For the given data we have: +# dip_hw_3.mat["d1a"] # [MN x MN] affinity matrix +# dip_hw_3.mat["d2a"] # [M x N x 3] RGB image +# dip_hw_3.mat["d2b"] # [M x N x 3] RGB image +# +# +# author: Christos Choutouridis +# date: 05/07/2025 +# + +try: + # Testing requirements + import numpy as np + from scipy.io import loadmat + import matplotlib.pyplot + # Project imports + from spectral_clustering import * +except ImportError as e: + print("Missing package:", e) + print("Run: pip install -r requirements.txt") + exit(1) + + +def run_demo1(): + data = loadmat("dip_hw_3.mat") + A = data["d1a"] + print("Loaded affinity matrix d1a with shape:", A.shape) + + for k in [2, 3, 4]: + print(f"\n=== Spectral Clustering on d1a with k={k} ===") + + labels = spectral_clustering(A, k) + print("Cluster labels:") + print(labels) + + # Optional: Visualize cluster assignment as bar + matplotlib.pyplot.figure(figsize=(6, 1.5)) + matplotlib.pyplot.title(f"Cluster assignments (k={k})") + matplotlib.pyplot.plot(labels, 'o-', markersize=8, label='cluster id') + matplotlib.pyplot.yticks(np.arange(k)) + matplotlib.pyplot.xlabel("Node index") + matplotlib.pyplot.tight_layout() + matplotlib.pyplot.savefig(f"plots/demo1_k{k}.png") + print(f"Saved: plots/demo1_k{k}.png") + + +if __name__ == '__main__': + run_demo1() + # Uncomment to compare with sklearn.cluster.spectral_clustering -- With normalized Laplacian btw! + #print("") + #for k in [2, 3, 4]: + # compare_with_sklearn(k, False) \ No newline at end of file diff --git a/HW03/scripts/demo2.py b/HW03/scripts/demo2.py new file mode 100644 index 0000000..5c09bdc --- /dev/null +++ b/HW03/scripts/demo2.py @@ -0,0 +1,79 @@ +# +# Demo 2: Spectral clustering on RGB images (d2a, d2b) +# +# Combines image_to_graph + spectral_clustering +# +# author: Christos Choutouridis +# date: 05/07/2025 +# + +try: + import numpy as np + import matplotlib.pyplot as plt + from scipy.io import loadmat + from image_to_graph import image_to_graph + from spectral_clustering import spectral_clustering +except ImportError as e: + print("Missing package:", e) + exit(1) + + +def plot_clusters_on_image(image: np.ndarray, cluster_idx: np.ndarray, k: int, title: str, fname: str): + """ + Overlays clustering result on the image using a colormap. + + Parameters: + ----------- + image : np.ndarray of shape (M, N, 3) + Original RGB image. + + cluster_idx : np.ndarray of shape (M*N,) + Flattened array of cluster labels. + k : int + Number of clusters. + title : str + Title for the plot. + fname : str + Output filename to save. + """ + M, N, _ = image.shape + clustered_img = cluster_idx.reshape(M, N) + + plt.figure(figsize=(4, 4)) + plt.imshow(clustered_img, cmap='tab10', vmin=0, vmax=k-1) + plt.title(title) + plt.axis('off') + plt.tight_layout() + plt.savefig(fname) + print(f"Saved: {fname}") + plt.close() + + +def run_demo2(normalized: bool = False): + data = loadmat("dip_hw_3.mat") + # Select string + normalized_str = "Normalized" if normalized else "Unnormalized" + + for name in ["d2a", "d2b"]: + img = data[name] + print(f"\n=== {normalized_str} test for Image {name} - shape: {img.shape} ===") + + affinity_mat = image_to_graph(img) + print("Affinity matrix computed.") + + for k in [2, 3, 4]: + print(f" Clustering with k={k}...") + labels = spectral_clustering(affinity_mat, k=k, normalized=normalized) + + plot_clusters_on_image( + img, + labels, + k, + title=f"{name} spectral clustering (k={k})", + fname=f"plots/demo2_{name}_k{k}_{normalized_str}.png" + ) + + +if __name__ == '__main__': + run_demo2(False) + run_demo2(True) diff --git a/HW03/scripts/dip_hw_3.mat b/HW03/scripts/dip_hw_3.mat new file mode 100644 index 0000000..781bbb7 Binary files /dev/null and b/HW03/scripts/dip_hw_3.mat differ diff --git a/HW03/scripts/image_to_graph.py b/HW03/scripts/image_to_graph.py new file mode 100644 index 0000000..42f7c5c --- /dev/null +++ b/HW03/scripts/image_to_graph.py @@ -0,0 +1,128 @@ +# +# Image to graph utility +# +# For the given data we have: +# dip_hw_3.mat["d1a"] # [MN x MN] affinity matrix +# dip_hw_3.mat["d2a"] # [M x N x 3] RGB image +# dip_hw_3.mat["d2b"] # [M x N x 3] RGB image +# +# +# author: Christos Choutouridis +# date: 05/07/2025 +# + +try: + import numpy as np + from numpy._typing import NDArray + from sklearn.metrics import pairwise_distances + + # Testing requirements + import matplotlib.pyplot + from scipy.io import loadmat +except ImportError as e: + print("Missing package: ", e) + print("Run: pip install -r requirements.txt to install.") + exit(1) + +def image_to_graph( + img_array: NDArray[np.floating] +) -> NDArray[np.float64]: + """ + Converts an input image into a fully connected graph represented + by an affinity matrix. + + Parameters: + ---------- + img_array : np.ndarray of shape (M, N, C), dtype=float + The input image with C channels (e.g., 3 for RGB), + with values normalized in [0, 1]. + + Returns: + ------- + affinity_mat : np.ndarray of shape (M*N, M*N), dtype=float + Symmetric affinity matrix representing the fully connected graph. + A(i, j) = 1 / ||pixel_i - pixel_j||_2 + """ + if not np.issubdtype(img_array.dtype, np.floating): + raise ValueError("img_array must be of float type with values in [0, 1].") + + M, N, C = img_array.shape + pixels = img_array.reshape(-1, C) # shape (M*N, C) + + # Compute Euclidean distances between all pixel vectors + distances = pairwise_distances(pixels, metric='euclidean') # shape (MN, MN) + + # Avoid division by zero on the diagonal + np.fill_diagonal(distances, 1e-10) + + # Affinity = 1 / e^d(i,j) + affinity_mat = 1.0 / np.exp(distances) + + return affinity_mat + + + +def _test_1(plot : bool = False): + """ + Test image_to_graph() with a small 4x4 RGB random value array + """ + print(f" === Test 1 === ") + print(f"") + # Small 4x4 RGB with random values at [0, 1] + img_array = np.random.rand(4, 4, 3).astype(np.float32) + + # affinity matrix calculation + A = image_to_graph(img_array) + + # Print specs + print("Shape of affinity matrix:", A.shape) # (16, 16) + print("Is symmetric:", np.allclose(A, A.T)) # True + print("Max value:", np.max(A)) + print("Min value:", np.min(A)) + + if plot: + matplotlib.use("TkAgg") + matplotlib.pyplot.imshow(A, cmap='hot') + matplotlib.pyplot.colorbar() + matplotlib.pyplot.title("Affinity Matrix Heatmap") + matplotlib.pyplot.show() + + +def _test_2(plot : bool = False): + """ + Test image_to_graph() with d2b matrix + """ + print(f" === Test 2 === ") + print(f"") + + data = loadmat("dip_hw_3.mat") + img_array = data["d2b"] # shape (M, N, 3), dtype float, values in [0, 1] + + # Check shape and type + print("Input image shape:", img_array.shape) + print("dtype:", img_array.dtype) + + # affinity matrix calculation + A = image_to_graph(img_array) + + # Print specs + print("Affinity matrix shape:", A.shape) + print("Is symmetric:", np.allclose(A, A.T)) + print("Max value:", np.max(A)) + print("Min value:", np.min(A)) + + if plot: + matplotlib.use("TkAgg") + matplotlib.pyplot.imshow(A, cmap='hot') + matplotlib.pyplot.title("Affinity Matrix Heatmap") + matplotlib.pyplot.colorbar() + matplotlib.pyplot.show() + + +if __name__ == '__main__': + # If you have TkAgg you can pass True, otherwise pass False + _test_1(True) + _test_2(True) + + + diff --git a/HW03/scripts/plots/demo1_k2.png b/HW03/scripts/plots/demo1_k2.png new file mode 100644 index 0000000..3fac95b Binary files /dev/null and b/HW03/scripts/plots/demo1_k2.png differ diff --git a/HW03/scripts/plots/demo1_k3.png b/HW03/scripts/plots/demo1_k3.png new file mode 100644 index 0000000..6ce7762 Binary files /dev/null and b/HW03/scripts/plots/demo1_k3.png differ diff --git a/HW03/scripts/plots/demo1_k4.png b/HW03/scripts/plots/demo1_k4.png new file mode 100644 index 0000000..0399369 Binary files /dev/null and b/HW03/scripts/plots/demo1_k4.png differ diff --git a/HW03/scripts/plots/demo2_d2a_k2_Normalized.png b/HW03/scripts/plots/demo2_d2a_k2_Normalized.png new file mode 100644 index 0000000..303b34a Binary files /dev/null and b/HW03/scripts/plots/demo2_d2a_k2_Normalized.png differ diff --git a/HW03/scripts/plots/demo2_d2a_k2_Unnormalized.png b/HW03/scripts/plots/demo2_d2a_k2_Unnormalized.png new file mode 100644 index 0000000..303b34a Binary files /dev/null and b/HW03/scripts/plots/demo2_d2a_k2_Unnormalized.png differ diff --git a/HW03/scripts/plots/demo2_d2a_k3_Normalized.png b/HW03/scripts/plots/demo2_d2a_k3_Normalized.png new file mode 100644 index 0000000..4f5b644 Binary files /dev/null and b/HW03/scripts/plots/demo2_d2a_k3_Normalized.png differ diff --git a/HW03/scripts/plots/demo2_d2a_k3_Unnormalized.png b/HW03/scripts/plots/demo2_d2a_k3_Unnormalized.png new file mode 100644 index 0000000..4f5b644 Binary files /dev/null and b/HW03/scripts/plots/demo2_d2a_k3_Unnormalized.png differ diff --git a/HW03/scripts/plots/demo2_d2a_k4_Normalized.png b/HW03/scripts/plots/demo2_d2a_k4_Normalized.png new file mode 100644 index 0000000..df2257f Binary files /dev/null and b/HW03/scripts/plots/demo2_d2a_k4_Normalized.png differ diff --git a/HW03/scripts/plots/demo2_d2a_k4_Unnormalized.png b/HW03/scripts/plots/demo2_d2a_k4_Unnormalized.png new file mode 100644 index 0000000..2333cfc Binary files /dev/null and b/HW03/scripts/plots/demo2_d2a_k4_Unnormalized.png differ diff --git a/HW03/scripts/plots/demo2_d2b_k2_Normalized.png b/HW03/scripts/plots/demo2_d2b_k2_Normalized.png new file mode 100644 index 0000000..d7b4012 Binary files /dev/null and b/HW03/scripts/plots/demo2_d2b_k2_Normalized.png differ diff --git a/HW03/scripts/plots/demo2_d2b_k2_Unnormalized.png b/HW03/scripts/plots/demo2_d2b_k2_Unnormalized.png new file mode 100644 index 0000000..56e3ccc Binary files /dev/null and b/HW03/scripts/plots/demo2_d2b_k2_Unnormalized.png differ diff --git a/HW03/scripts/plots/demo2_d2b_k3_Normalized.png b/HW03/scripts/plots/demo2_d2b_k3_Normalized.png new file mode 100644 index 0000000..9de6fbc Binary files /dev/null and b/HW03/scripts/plots/demo2_d2b_k3_Normalized.png differ diff --git a/HW03/scripts/plots/demo2_d2b_k3_Unnormalized.png b/HW03/scripts/plots/demo2_d2b_k3_Unnormalized.png new file mode 100644 index 0000000..4d451de Binary files /dev/null and b/HW03/scripts/plots/demo2_d2b_k3_Unnormalized.png differ diff --git a/HW03/scripts/plots/demo2_d2b_k4_Normalized.png b/HW03/scripts/plots/demo2_d2b_k4_Normalized.png new file mode 100644 index 0000000..ed984b1 Binary files /dev/null and b/HW03/scripts/plots/demo2_d2b_k4_Normalized.png differ diff --git a/HW03/scripts/plots/demo2_d2b_k4_Unnormalized.png b/HW03/scripts/plots/demo2_d2b_k4_Unnormalized.png new file mode 100644 index 0000000..dc4338a Binary files /dev/null and b/HW03/scripts/plots/demo2_d2b_k4_Unnormalized.png differ diff --git a/HW03/scripts/requirements.txt b/HW03/scripts/requirements.txt new file mode 100644 index 0000000..1b32c3d --- /dev/null +++ b/HW03/scripts/requirements.txt @@ -0,0 +1,5 @@ +numpy +scipy +scikit-learn +matplotlib + diff --git a/HW03/scripts/spectral_clustering.py b/HW03/scripts/spectral_clustering.py new file mode 100644 index 0000000..3b7bd8b --- /dev/null +++ b/HW03/scripts/spectral_clustering.py @@ -0,0 +1,164 @@ +# +# Spectral clustering routine +# +# For the given data we have: +# dip_hw_3.mat["d1a"] # [MN x MN] affinity matrix +# dip_hw_3.mat["d2a"] # [M x N x 3] RGB image +# dip_hw_3.mat["d2b"] # [M x N x 3] RGB image +# +# +# author: Christos Choutouridis +# date: 05/07/2025 +# + +try: + import numpy as np + from numpy.typing import NDArray + from scipy.sparse.linalg import eigs + from sklearn.cluster import KMeans + + # Testing requirements + from scipy.io import loadmat + import matplotlib.pyplot + from sklearn.cluster import spectral_clustering as sk_spectral + from sklearn.metrics import adjusted_rand_score +except ImportError as e: + print("Missing package:", e) + print("Run: pip install -r requirements.txt") + exit(1) + + +def spectral_clustering( + affinity_mat: NDArray[np.floating], + k: int, + normalized: bool = False +) -> NDArray[np.int32]: + """ + Performs spectral clustering on a given affinity matrix. + + Parameters: + ---------- + affinity_mat : np.ndarray of shape (n, n), dtype=float + The symmetric affinity matrix representing a graph. + k : int + The number of clusters. + normalized : bool, optional (default=False) + Whether to use the normalized Laplacian (L_sym = I - D^(-1/2) A D^(-1/2)) + !note: Don't miss-interpret this with normalized-cuts implementation!! + + Returns: + ------- + cluster_idx : np.ndarray of shape (n,), dtype=int + An array of cluster labels for each node. + """ + # Degree matrix + D = np.diag(affinity_mat.sum(axis=1)) + + if normalized: + with np.errstate(divide='ignore'): + D_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(D))) + D_inv_sqrt[np.isinf(D_inv_sqrt)] = 0.0 + # L = I - D^(-1/2) A D^(-1/2) + L = np.eye(affinity_mat.shape[0]) - D_inv_sqrt @ affinity_mat @ D_inv_sqrt + else: + L = D - affinity_mat + + # Compute k smallest eigenvectors (use eigsh if L is symmetric positive-definite) + eigvals, eigvecs = eigs(L, k=k, which='SR') # 'SR' = Smallest Real part + + # Form matrix U with eigenvectors as columns + # Convert complex -> real (imaginary parts should be negligible) + U = np.real(eigvecs) + + # Each row is a vector to be clustered + # random_state parameter to 1, to ensure reproducibility across experiments. + kmeans = KMeans(n_clusters=k, random_state=1) + kmeans.fit(U) + + # obtain the cluster labels for each input data point + return kmeans.labels_.astype(np.int32) + + +def _test_d1a(k: int, plot :bool = False): + """ + Runs spectral clustering on d1a from dip_hw_3.mat for a given k. + """ + print(f"=== Spectral clustering test on d1a (k={k}) ===") + print(f"") + + data = loadmat("dip_hw_3.mat") + A = data["d1a"] + print("Loaded d1a affinity matrix with shape:", A.shape) + + labels = spectral_clustering(A, k) + + print("Cluster labels:") + print(labels) + print("Unique clusters:", np.unique(labels)) + + if plot: + D = np.diag(A.sum(axis=1)) + L = D - A + eigvals, eigvecs = eigs(L, k=k, which='SR') + eigvecs = np.real(eigvecs) + + # Plot first 2 dimensions + matplotlib.use("TkAgg") + matplotlib.pyplot.figure() + matplotlib.pyplot.scatter(eigvecs[:, 0], eigvecs[:, 1], c=labels, cmap='tab10', s=100, edgecolors='k') + matplotlib.pyplot.title(f"Spectral Clustering (k={k}) on d1a") + matplotlib.pyplot.xlabel("Eigenvector 1") + matplotlib.pyplot.ylabel("Eigenvector 2") + matplotlib.pyplot.grid(True) + matplotlib.pyplot.tight_layout() + matplotlib.pyplot.show() + #matplotlib.pyplot.savefig(f"spectral_d1a_k{k}.png") + + +def compare_with_sklearn(k: int, plot: bool = False): + print(f"=== Comparing with sklearn spectral_clustering (k={k}) ===") + print(f"") + + data = loadmat("dip_hw_3.mat") + A = data["d1a"] + + # Your implementation + labels_own = spectral_clustering(A, k, True) + + # sklearn implementation (uses normalized Laplacian) + labels_sklearn = sk_spectral( + affinity=A, + n_clusters=k, + assign_labels="kmeans", + random_state=1 + ) + + # Compare clustering assignments using ARI + ari = adjusted_rand_score(labels_own, labels_sklearn) + + print(f"Labels (own): {labels_own}") + print(f"Labels (sklearn): {labels_sklearn}") + print(f"Adjusted Rand Index (ARI): {ari:.4f}") + + # Optional: bar plot to compare visually + if plot: + matplotlib.use("TkAgg") + matplotlib.pyplot.figure(figsize=(6, 1.5)) + matplotlib.pyplot.title(f"Cluster comparison (k={k})") + matplotlib.pyplot.plot(labels_own, 'o-', label='own', markersize=8) + matplotlib.pyplot.plot(labels_sklearn, 'x--', label='sklearn', markersize=6) + matplotlib.pyplot.yticks(range(k)) + matplotlib.pyplot.xlabel("Node index") + matplotlib.pyplot.legend() + matplotlib.pyplot.tight_layout() + matplotlib.pyplot.show() + # matplotlib.pyplot.savefig(f"compare_k{k}.png") + # print(f"Saved plot: compare_k{k}.png") + + + +if __name__ == '__main__': + for k in [2, 3, 4]: + # If you have TkAgg you can pass plot=True, otherwise pass False + _test_d1a(k, plot=False) + compare_with_sklearn(k, plot=False)