HW03: implementation of demo1 and 2

2025-07-05 19:40:31 +03:00 · 2025-07-05 19:40:31 +03:00 · 68e491c4a2
commit 68e491c4a2
parent be6322036b
25 changed files with 440 additions and 0 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,6 @@
 [submodule "HW01/report/AUThReport"]
 	path = HW01/report/AUThReport
 	url = ssh://git@git.hoo2.net:222/hoo2/AUThReport.git
 [submodule "HW03/report/AUThReport"]
 	path = HW03/report/AUThReport
 	url = https://git.hoo2.net/hoo2/AUThReport.git
--- a/HW03/.gitignore
+++ b/HW03/.gitignore
@ -0,0 +1,5 @@
 # Python execution related
 __pycache__/
 # IDE files
 .idea/
--- a/HW03/dip-2025-hw3-v0.1.pdf
+++ b/HW03/dip-2025-hw3-v0.1.pdf
--- a/HW03/report/AUThReport
+++ b/HW03/report/AUThReport
@ -0,0 +1 @@
 Subproject commit 74ec4b5f6c66382e5f1b6d2e6930897e4ed53ea6
--- a/HW03/scripts/demo1.py
+++ b/HW03/scripts/demo1.py
@ -0,0 +1,55 @@
 #
 # demo 1 of the assignment
 #
 # For the given data we have:
 # dip_hw_3.mat["d1a"]    # [MN x MN] affinity matrix
 # dip_hw_3.mat["d2a"]    # [M x N x 3] RGB image
 # dip_hw_3.mat["d2b"]    # [M x N x 3] RGB image
 #
 #
 # author: Christos Choutouridis <cchoutou@ece.auth.gr>
 # date:   05/07/2025
 #
 try:
    # Testing requirements
    import numpy as np
    from scipy.io import loadmat
    import matplotlib.pyplot
    # Project imports
    from spectral_clustering import *
 except ImportError as e:
    print("Missing package:", e)
    print("Run: pip install -r requirements.txt")
    exit(1)
 def run_demo1():
    data = loadmat("dip_hw_3.mat")
    A = data["d1a"]
    print("Loaded affinity matrix d1a with shape:", A.shape)
    for k in [2, 3, 4]:
        print(f"\n=== Spectral Clustering on d1a with k={k} ===")
        labels = spectral_clustering(A, k)
        print("Cluster labels:")
        print(labels)
        # Optional: Visualize cluster assignment as bar
        matplotlib.pyplot.figure(figsize=(6, 1.5))
        matplotlib.pyplot.title(f"Cluster assignments (k={k})")
        matplotlib.pyplot.plot(labels, 'o-', markersize=8, label='cluster id')
        matplotlib.pyplot.yticks(np.arange(k))
        matplotlib.pyplot.xlabel("Node index")
        matplotlib.pyplot.tight_layout()
        matplotlib.pyplot.savefig(f"plots/demo1_k{k}.png")
        print(f"Saved: plots/demo1_k{k}.png")
 if __name__ == '__main__':
    run_demo1()
    # Uncomment to compare with sklearn.cluster.spectral_clustering -- With normalized Laplacian btw!
    #print("")
    #for k in [2, 3, 4]:
    #    compare_with_sklearn(k, False)
--- a/HW03/scripts/demo2.py
+++ b/HW03/scripts/demo2.py
@ -0,0 +1,79 @@
 #
 # Demo 2: Spectral clustering on RGB images (d2a, d2b)
 #
 # Combines image_to_graph + spectral_clustering
 #
 # author: Christos Choutouridis <cchoutou@ece.auth.gr>
 # date:   05/07/2025
 #
 try:
    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.io import loadmat
    from image_to_graph import image_to_graph
    from spectral_clustering import spectral_clustering
 except ImportError as e:
    print("Missing package:", e)
    exit(1)
 def plot_clusters_on_image(image: np.ndarray, cluster_idx: np.ndarray, k: int, title: str, fname: str):
    """
    Overlays clustering result on the image using a colormap.
    Parameters:
    -----------
    image : np.ndarray of shape (M, N, 3)
        Original RGB image.
    cluster_idx : np.ndarray of shape (M*N,)
        Flattened array of cluster labels.
    k : int
        Number of clusters.
    title : str
        Title for the plot.
    fname : str
        Output filename to save.
    """
    M, N, _ = image.shape
    clustered_img = cluster_idx.reshape(M, N)
    plt.figure(figsize=(4, 4))
    plt.imshow(clustered_img, cmap='tab10', vmin=0, vmax=k-1)
    plt.title(title)
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(fname)
    print(f"Saved: {fname}")
    plt.close()
 def run_demo2(normalized: bool = False):
    data = loadmat("dip_hw_3.mat")
    # Select string
    normalized_str = "Normalized" if normalized else "Unnormalized"
    for name in ["d2a", "d2b"]:
        img = data[name]
        print(f"\n=== {normalized_str} test for Image {name} - shape: {img.shape} ===")
        affinity_mat = image_to_graph(img)
        print("Affinity matrix computed.")
        for k in [2, 3, 4]:
            print(f"  Clustering with k={k}...")
            labels = spectral_clustering(affinity_mat, k=k, normalized=normalized)
            plot_clusters_on_image(
                img,
                labels,
                k,
                title=f"{name} spectral clustering (k={k})",
                fname=f"plots/demo2_{name}_k{k}_{normalized_str}.png"
            )
 if __name__ == '__main__':
    run_demo2(False)
    run_demo2(True)
--- a/HW03/scripts/dip_hw_3.mat
+++ b/HW03/scripts/dip_hw_3.mat
--- a/HW03/scripts/image_to_graph.py
+++ b/HW03/scripts/image_to_graph.py
@ -0,0 +1,128 @@
 #
 # Image to graph utility
 #
 # For the given data we have:
 # dip_hw_3.mat["d1a"]    # [MN x MN] affinity matrix
 # dip_hw_3.mat["d2a"]    # [M x N x 3] RGB image
 # dip_hw_3.mat["d2b"]    # [M x N x 3] RGB image
 #
 #
 # author: Christos Choutouridis <cchoutou@ece.auth.gr>
 # date:   05/07/2025
 #
 try:
    import numpy as np
    from numpy._typing import NDArray
    from sklearn.metrics import pairwise_distances
    # Testing requirements
    import matplotlib.pyplot
    from scipy.io import loadmat
 except ImportError as e:
    print("Missing package: ", e)
    print("Run: pip install -r requirements.txt to install.")
    exit(1)
 def image_to_graph(
    img_array: NDArray[np.floating]
 ) -> NDArray[np.float64]:
    """
    Converts an input image into a fully connected graph represented
    by an affinity matrix.
    Parameters:
    ----------
    img_array : np.ndarray of shape (M, N, C), dtype=float
        The input image with C channels (e.g., 3 for RGB),
        with values normalized in [0, 1].
    Returns:
    -------
    affinity_mat : np.ndarray of shape (M*N, M*N), dtype=float
        Symmetric affinity matrix representing the fully connected graph.
        A(i, j) = 1 / ||pixel_i - pixel_j||_2
    """
    if not np.issubdtype(img_array.dtype, np.floating):
        raise ValueError("img_array must be of float type with values in [0, 1].")
    M, N, C = img_array.shape
    pixels = img_array.reshape(-1, C)  # shape (M*N, C)
    # Compute Euclidean distances between all pixel vectors
    distances = pairwise_distances(pixels, metric='euclidean')  # shape (MN, MN)
    # Avoid division by zero on the diagonal
    np.fill_diagonal(distances, 1e-10)
    # Affinity = 1 / e^d(i,j)
    affinity_mat = 1.0 / np.exp(distances)
    return affinity_mat
 def _test_1(plot : bool = False):
    """
    Test image_to_graph() with a small 4x4 RGB random value array
    """
    print(f" === Test 1 === ")
    print(f"")
    # Small 4x4 RGB with random values at [0, 1]
    img_array = np.random.rand(4, 4, 3).astype(np.float32)
    # affinity matrix calculation
    A = image_to_graph(img_array)
    # Print specs
    print("Shape of affinity matrix:", A.shape)  # (16, 16)
    print("Is symmetric:", np.allclose(A, A.T))  # True
    print("Max value:", np.max(A))
    print("Min value:", np.min(A))
    if plot:
        matplotlib.use("TkAgg")
        matplotlib.pyplot.imshow(A, cmap='hot')
        matplotlib.pyplot.colorbar()
        matplotlib.pyplot.title("Affinity Matrix Heatmap")
        matplotlib.pyplot.show()
 def _test_2(plot : bool = False):
    """
    Test image_to_graph() with d2b matrix
    """
    print(f" === Test 2 === ")
    print(f"")
    data = loadmat("dip_hw_3.mat")
    img_array = data["d2b"]  # shape (M, N, 3), dtype float, values in [0, 1]
    # Check shape and type
    print("Input image shape:", img_array.shape)
    print("dtype:", img_array.dtype)
    # affinity matrix calculation
    A = image_to_graph(img_array)
    # Print specs
    print("Affinity matrix shape:", A.shape)
    print("Is symmetric:", np.allclose(A, A.T))
    print("Max value:", np.max(A))
    print("Min value:", np.min(A))
    if plot:
        matplotlib.use("TkAgg")
        matplotlib.pyplot.imshow(A, cmap='hot')
        matplotlib.pyplot.title("Affinity Matrix Heatmap")
        matplotlib.pyplot.colorbar()
        matplotlib.pyplot.show()
 if __name__ == '__main__':
    # If you have TkAgg you can pass True, otherwise pass False
    _test_1(True)
    _test_2(True)
--- a/HW03/scripts/plots/demo1_k2.png
+++ b/HW03/scripts/plots/demo1_k2.png
--- a/HW03/scripts/plots/demo1_k3.png
+++ b/HW03/scripts/plots/demo1_k3.png
--- a/HW03/scripts/plots/demo1_k4.png
+++ b/HW03/scripts/plots/demo1_k4.png
--- a/HW03/scripts/plots/demo2_d2a_k2_Normalized.png
+++ b/HW03/scripts/plots/demo2_d2a_k2_Normalized.png
--- a/HW03/scripts/plots/demo2_d2a_k2_Unnormalized.png
+++ b/HW03/scripts/plots/demo2_d2a_k2_Unnormalized.png
--- a/HW03/scripts/plots/demo2_d2a_k3_Normalized.png
+++ b/HW03/scripts/plots/demo2_d2a_k3_Normalized.png
--- a/HW03/scripts/plots/demo2_d2a_k3_Unnormalized.png
+++ b/HW03/scripts/plots/demo2_d2a_k3_Unnormalized.png
--- a/HW03/scripts/plots/demo2_d2a_k4_Normalized.png
+++ b/HW03/scripts/plots/demo2_d2a_k4_Normalized.png
--- a/HW03/scripts/plots/demo2_d2a_k4_Unnormalized.png
+++ b/HW03/scripts/plots/demo2_d2a_k4_Unnormalized.png
--- a/HW03/scripts/plots/demo2_d2b_k2_Normalized.png
+++ b/HW03/scripts/plots/demo2_d2b_k2_Normalized.png
--- a/HW03/scripts/plots/demo2_d2b_k2_Unnormalized.png
+++ b/HW03/scripts/plots/demo2_d2b_k2_Unnormalized.png
--- a/HW03/scripts/plots/demo2_d2b_k3_Normalized.png
+++ b/HW03/scripts/plots/demo2_d2b_k3_Normalized.png
--- a/HW03/scripts/plots/demo2_d2b_k3_Unnormalized.png
+++ b/HW03/scripts/plots/demo2_d2b_k3_Unnormalized.png
--- a/HW03/scripts/plots/demo2_d2b_k4_Normalized.png
+++ b/HW03/scripts/plots/demo2_d2b_k4_Normalized.png
--- a/HW03/scripts/plots/demo2_d2b_k4_Unnormalized.png
+++ b/HW03/scripts/plots/demo2_d2b_k4_Unnormalized.png
--- a/HW03/scripts/requirements.txt
+++ b/HW03/scripts/requirements.txt
@ -0,0 +1,5 @@
 numpy
 scipy
 scikit-learn
 matplotlib
--- a/HW03/scripts/spectral_clustering.py
+++ b/HW03/scripts/spectral_clustering.py
@ -0,0 +1,164 @@
 #
 # Spectral clustering routine
 #
 # For the given data we have:
 # dip_hw_3.mat["d1a"]    # [MN x MN] affinity matrix
 # dip_hw_3.mat["d2a"]    # [M x N x 3] RGB image
 # dip_hw_3.mat["d2b"]    # [M x N x 3] RGB image
 #
 #
 # author: Christos Choutouridis <cchoutou@ece.auth.gr>
 # date:   05/07/2025
 #
 try:
    import numpy as np
    from numpy.typing import NDArray
    from scipy.sparse.linalg import eigs
    from sklearn.cluster import KMeans
    # Testing requirements
    from scipy.io import loadmat
    import matplotlib.pyplot
    from sklearn.cluster import spectral_clustering as sk_spectral
    from sklearn.metrics import adjusted_rand_score
 except ImportError as e:
    print("Missing package:", e)
    print("Run: pip install -r requirements.txt")
    exit(1)
 def spectral_clustering(
    affinity_mat: NDArray[np.floating],
    k: int,
    normalized: bool = False
 ) -> NDArray[np.int32]:
    """
    Performs spectral clustering on a given affinity matrix.
    Parameters:
    ----------
    affinity_mat : np.ndarray of shape (n, n), dtype=float
        The symmetric affinity matrix representing a graph.
    k : int
        The number of clusters.
    normalized : bool, optional (default=False)
        Whether to use the normalized Laplacian (L_sym = I - D^(-1/2) A D^(-1/2))
        !note: Don't miss-interpret this with normalized-cuts implementation!!
    Returns:
    -------
    cluster_idx : np.ndarray of shape (n,), dtype=int
        An array of cluster labels for each node.
    """
    # Degree matrix
    D = np.diag(affinity_mat.sum(axis=1))
    if normalized:
        with np.errstate(divide='ignore'):
            D_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(D)))
            D_inv_sqrt[np.isinf(D_inv_sqrt)] = 0.0
        # L = I - D^(-1/2) A D^(-1/2)
        L = np.eye(affinity_mat.shape[0]) - D_inv_sqrt @ affinity_mat @ D_inv_sqrt
    else:
        L = D - affinity_mat
    # Compute k smallest eigenvectors (use eigsh if L is symmetric positive-definite)
    eigvals, eigvecs = eigs(L, k=k, which='SR')  # 'SR' = Smallest Real part
    # Form matrix U with eigenvectors as columns
    # Convert complex -> real (imaginary parts should be negligible)
    U = np.real(eigvecs)
    # Each row is a vector to be clustered
    # random_state parameter to 1, to ensure reproducibility across experiments.
    kmeans = KMeans(n_clusters=k, random_state=1)
    kmeans.fit(U)
    # obtain the cluster labels for each input data point
    return kmeans.labels_.astype(np.int32)
 def _test_d1a(k: int, plot :bool = False):
    """
    Runs spectral clustering on d1a from dip_hw_3.mat for a given k.
    """
    print(f"=== Spectral clustering test on d1a (k={k}) ===")
    print(f"")
    data = loadmat("dip_hw_3.mat")
    A = data["d1a"]
    print("Loaded d1a affinity matrix with shape:", A.shape)
    labels = spectral_clustering(A, k)
    print("Cluster labels:")
    print(labels)
    print("Unique clusters:", np.unique(labels))
    if plot:
        D = np.diag(A.sum(axis=1))
        L = D - A
        eigvals, eigvecs = eigs(L, k=k, which='SR')
        eigvecs = np.real(eigvecs)
        # Plot first 2 dimensions
        matplotlib.use("TkAgg")
        matplotlib.pyplot.figure()
        matplotlib.pyplot.scatter(eigvecs[:, 0], eigvecs[:, 1], c=labels, cmap='tab10', s=100, edgecolors='k')
        matplotlib.pyplot.title(f"Spectral Clustering (k={k}) on d1a")
        matplotlib.pyplot.xlabel("Eigenvector 1")
        matplotlib.pyplot.ylabel("Eigenvector 2")
        matplotlib.pyplot.grid(True)
        matplotlib.pyplot.tight_layout()
        matplotlib.pyplot.show()
        #matplotlib.pyplot.savefig(f"spectral_d1a_k{k}.png")
 def compare_with_sklearn(k: int, plot: bool = False):
    print(f"=== Comparing with sklearn spectral_clustering (k={k}) ===")
    print(f"")
    data = loadmat("dip_hw_3.mat")
    A = data["d1a"]
    # Your implementation
    labels_own = spectral_clustering(A, k, True)
    # sklearn implementation (uses normalized Laplacian)
    labels_sklearn = sk_spectral(
        affinity=A,
        n_clusters=k,
        assign_labels="kmeans",
        random_state=1
    )
    # Compare clustering assignments using ARI
    ari = adjusted_rand_score(labels_own, labels_sklearn)
    print(f"Labels (own):     {labels_own}")
    print(f"Labels (sklearn): {labels_sklearn}")
    print(f"Adjusted Rand Index (ARI): {ari:.4f}")
    # Optional: bar plot to compare visually
    if plot:
        matplotlib.use("TkAgg")
        matplotlib.pyplot.figure(figsize=(6, 1.5))
        matplotlib.pyplot.title(f"Cluster comparison (k={k})")
        matplotlib.pyplot.plot(labels_own, 'o-', label='own', markersize=8)
        matplotlib.pyplot.plot(labels_sklearn, 'x--', label='sklearn', markersize=6)
        matplotlib.pyplot.yticks(range(k))
        matplotlib.pyplot.xlabel("Node index")
        matplotlib.pyplot.legend()
        matplotlib.pyplot.tight_layout()
        matplotlib.pyplot.show()
        # matplotlib.pyplot.savefig(f"compare_k{k}.png")
        # print(f"Saved plot: compare_k{k}.png")
 if __name__ == '__main__':
    for k in [2, 3, 4]:
        # If you have TkAgg you can pass plot=True, otherwise pass False
        _test_d1a(k, plot=False)
        compare_with_sklearn(k, plot=False)
		`@ -0,0 +1 @@`
							`Subproject commit 74ec4b5f6c66382e5f1b6d2e6930897e4ed53ea6`