DigitalImageProcessing/HW03/scripts/spectral_clustering.py

#
# Spectral clustering routine
#
# For the given data we have:
# dip_hw_3.mat["d1a"]    # [MN x MN] affinity matrix
# dip_hw_3.mat["d2a"]    # [M x N x 3] RGB image
# dip_hw_3.mat["d2b"]    # [M x N x 3] RGB image
#
#
# author: Christos Choutouridis <cchoutou@ece.auth.gr>
# date:   05/07/2025
#

try:
    import numpy as np
    from numpy.typing import NDArray
    from scipy.sparse.linalg import eigs
    from sklearn.cluster import KMeans

    # Testing requirements
    from scipy.io import loadmat
    import matplotlib.pyplot
    from sklearn.cluster import spectral_clustering as sk_spectral
    from sklearn.metrics import adjusted_rand_score
except ImportError as e:
    print("Missing package:", e)
    print("Run: pip install -r requirements.txt")
    exit(1)


def spectral_clustering(
    affinity_mat: NDArray[np.floating],
    k: int,
    normalized: bool = False
) -> NDArray[np.int32]:
    """
    Performs spectral clustering on a given affinity matrix.

    Parameters:
    ----------
    affinity_mat : np.ndarray of shape (n, n), dtype=float
        The symmetric affinity matrix representing a graph.
    k : int
        The number of clusters.
    normalized : bool, optional (default=False)
        Whether to use the normalized Laplacian (L_sym = I - D^(-1/2) A D^(-1/2))
        !note: Don't miss-interpret this with normalized-cuts implementation!!

    Returns:
    -------
    cluster_idx : np.ndarray of shape (n,), dtype=int
        An array of cluster labels for each node.
    """
    # Degree matrix
    D = np.diag(affinity_mat.sum(axis=1))

    if normalized:
        with np.errstate(divide='ignore'):
            D_inv_sqrt = np.diag(1.0 / np.sqrt(np.diag(D)))
            D_inv_sqrt[np.isinf(D_inv_sqrt)] = 0.0
        # L = I - D^(-1/2) A D^(-1/2)
        L = np.eye(affinity_mat.shape[0]) - D_inv_sqrt @ affinity_mat @ D_inv_sqrt
    else:
        L = D - affinity_mat

    # Compute k smallest eigenvectors (use eigsh if L is symmetric positive-definite)
    eigvals, eigvecs = eigs(L, k=k, which='SR')  # 'SR' = Smallest Real part

    # Form matrix U with eigenvectors as columns
    # Convert complex -> real (imaginary parts should be negligible)
    U = np.real(eigvecs)

    # Each row is a vector to be clustered
    # random_state parameter to 1, to ensure reproducibility across experiments.
    kmeans = KMeans(n_clusters=k, random_state=1)
    kmeans.fit(U)

    # obtain the cluster labels for each input data point
    return kmeans.labels_.astype(np.int32)


def _test_d1a(k: int, plot :bool = False):
    """
    Runs spectral clustering on d1a from dip_hw_3.mat for a given k.
    """
    print(f"=== Spectral clustering test on d1a (k={k}) ===")
    print(f"")

    data = loadmat("dip_hw_3.mat")
    A = data["d1a"]
    print("Loaded d1a affinity matrix with shape:", A.shape)

    labels = spectral_clustering(A, k)

    print("Cluster labels:")
    print(labels)
    print("Unique clusters:", np.unique(labels))

    if plot:
        D = np.diag(A.sum(axis=1))
        L = D - A
        eigvals, eigvecs = eigs(L, k=k, which='SR')
        eigvecs = np.real(eigvecs)

        # Plot first 2 dimensions
        matplotlib.use("TkAgg")
        matplotlib.pyplot.figure()
        matplotlib.pyplot.scatter(eigvecs[:, 0], eigvecs[:, 1], c=labels, cmap='tab10', s=100, edgecolors='k')
        matplotlib.pyplot.title(f"Spectral Clustering (k={k}) on d1a")
        matplotlib.pyplot.xlabel("Eigenvector 1")
        matplotlib.pyplot.ylabel("Eigenvector 2")
        matplotlib.pyplot.grid(True)
        matplotlib.pyplot.tight_layout()
        matplotlib.pyplot.show()
        #matplotlib.pyplot.savefig(f"spectral_d1a_k{k}.png")


def compare_with_sklearn(k: int, plot: bool = False):
    print(f"=== Comparing with sklearn spectral_clustering (k={k}) ===")
    print(f"")

    data = loadmat("dip_hw_3.mat")
    A = data["d1a"]

    # Your implementation
    labels_own = spectral_clustering(A, k, True)

    # sklearn implementation (uses normalized Laplacian)
    labels_sklearn = sk_spectral(
        affinity=A,
        n_clusters=k,
        assign_labels="kmeans",
        random_state=1
    )

    # Compare clustering assignments using ARI
    ari = adjusted_rand_score(labels_own, labels_sklearn)

    print(f"Labels (own):     {labels_own}")
    print(f"Labels (sklearn): {labels_sklearn}")
    print(f"Adjusted Rand Index (ARI): {ari:.4f}")

    # Optional: bar plot to compare visually
    if plot:
        matplotlib.use("TkAgg")
        matplotlib.pyplot.figure(figsize=(6, 1.5))
        matplotlib.pyplot.title(f"Cluster comparison (k={k})")
        matplotlib.pyplot.plot(labels_own, 'o-', label='own', markersize=8)
        matplotlib.pyplot.plot(labels_sklearn, 'x--', label='sklearn', markersize=6)
        matplotlib.pyplot.yticks(range(k))
        matplotlib.pyplot.xlabel("Node index")
        matplotlib.pyplot.legend()
        matplotlib.pyplot.tight_layout()
        matplotlib.pyplot.show()
        # matplotlib.pyplot.savefig(f"compare_k{k}.png")
        # print(f"Saved plot: compare_k{k}.png")


if __name__ == '__main__':
    for k in [2, 3, 4]:
        # If you have TkAgg you can pass plot=True, otherwise pass False
        _test_d1a(k, plot=False)
        compare_with_sklearn(k, plot=False)