Level_1: File restructure to support centralized development

2026-02-08 17:22:23 +02:00 · 2026-02-08 17:22:23 +02:00 · 8427d0e721
commit 8427d0e721
parent dde11ddebe
25 changed files with 3990 additions and 1229 deletions
--- a/source/core/aac_coder.py
+++ b/source/core/aac_coder.py
@ -0,0 +1,198 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - AAC Coder (Core)
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Level 1 AAC encoder orchestration.
+#   Keeps the same functional behavior as the original level_1 implementation:
+#   - Reads WAV via soundfile
+#   - Validates stereo and 48 kHz
+#   - Frames into 2048 samples with hop=1024 and zero padding at both ends
+#   - SSC decision uses next-frame attack detection
+#   - Filterbank analysis (MDCT)
+#   - Stores per-channel spectra in AACSeq1 schema:
+#       * ESH: (128, 8)
+#       * else: (1024, 1)
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Union
+
+import soundfile as sf
+
+from core.aac_configuration import WIN_TYPE
+from core.aac_filterbank import aac_filter_bank
+from core.aac_ssc import aac_SSC
+from core.aac_types import *
+
+
+# -----------------------------------------------------------------------------
+# Public helpers (useful for level_x demo wrappers)
+# -----------------------------------------------------------------------------
+
+def aac_read_wav_stereo_48k(filename_in: Union[str, Path]) -> tuple[StereoSignal, int]:
+    """
+    Read a WAV file using soundfile and validate the Level-1 assumptions.
+
+    Parameters
+    ----------
+    filename_in : Union[str, Path]
+        Input WAV filename.
+
+    Returns
+    -------
+    x : StereoSignal (np.ndarray)
+        Stereo samples as float64, shape (N, 2).
+    fs : int
+        Sampling rate (Hz). Must be 48000.
+
+    Raises
+    ------
+    ValueError
+        If the input is not stereo or the sampling rate is not 48 kHz.
+    """
+    filename_in = Path(filename_in)
+
+    x, fs = sf.read(str(filename_in), always_2d=True)
+    x = np.asarray(x, dtype=np.float64)
+
+    if x.shape[1] != 2:
+        raise ValueError("Input must be stereo (2 channels).")
+    if int(fs) != 48000:
+        raise ValueError("Input sampling rate must be 48 kHz.")
+
+    return x, int(fs)
+
+
+def aac_pack_frame_f_to_seq_channels(frame_type: FrameType, frame_f: FrameF) -> tuple[FrameChannelF, FrameChannelF]:
+    """
+    Convert the stereo FrameF returned by aac_filter_bank() into per-channel arrays
+    as required by the Level-1 AACSeq1 schema.
+
+    Parameters
+    ----------
+    frame_type : FrameType
+        "OLS" | "LSS" | "ESH" | "LPS".
+    frame_f : FrameF
+        Output of aac_filter_bank():
+        - If frame_type != "ESH": shape (1024, 2)
+        - If frame_type == "ESH": shape (128, 16) packed as [L0 R0 L1 R1 ... L7 R7]
+
+    Returns
+    -------
+    chl_f : FrameChannelF
+        Left channel coefficients:
+        - ESH: shape (128, 8)
+        - else: shape (1024, 1)
+    chr_f : FrameChannelF
+        Right channel coefficients:
+        - ESH: shape (128, 8)
+        - else: shape (1024, 1)
+    """
+    if frame_type == "ESH":
+        if frame_f.shape != (128, 16):
+            raise ValueError("For ESH, frame_f must have shape (128, 16).")
+
+        chl_f = np.empty((128, 8), dtype=np.float64)
+        chr_f = np.empty((128, 8), dtype=np.float64)
+        for j in range(8):
+            chl_f[:, j] = frame_f[:, 2 * j + 0]
+            chr_f[:, j] = frame_f[:, 2 * j + 1]
+        return chl_f, chr_f
+
+    # Non-ESH: store as (1024, 1) as required by the original Level-1 schema.
+    if frame_f.shape != (1024, 2):
+        raise ValueError("For OLS/LSS/LPS, frame_f must have shape (1024, 2).")
+
+    chl_f = frame_f[:, 0:1].astype(np.float64, copy=False)
+    chr_f = frame_f[:, 1:2].astype(np.float64, copy=False)
+    return chl_f, chr_f
+
+
+
+# -----------------------------------------------------------------------------
+# Level 1 encoder
+# -----------------------------------------------------------------------------
+
+def aac_coder_1(filename_in: Union[str, Path]) -> AACSeq1:
+    """
+    Level-1 AAC encoder.
+
+    This function preserves the behavior of the original level_1 implementation:
+    - Read stereo 48 kHz WAV
+    - Pad hop samples at start and hop samples at end
+    - Frame with win=2048, hop=1024
+    - Use SSC with next-frame lookahead
+    - Apply filterbank analysis
+    - Store per-channel coefficients using AACSeq1 schema
+
+    Parameters
+    ----------
+    filename_in : Union[str, Path]
+        Input WAV filename.
+        Assumption: stereo audio, sampling rate 48 kHz.
+
+    Returns
+    -------
+    AACSeq1
+        List of encoded frames (Level 1 schema).
+    """
+    x, fs = aac_read_wav_stereo_48k(filename_in)
+    _ = fs  # kept for clarity; The assignment assumes 48 kHz
+
+    hop = 1024
+    win = 2048
+
+    # Pad at the beginning to support the first overlap region.
+    # Tail padding is kept minimal; next-frame is padded on-the-fly when needed.
+    pad_pre = np.zeros((hop, 2), dtype=np.float64)
+    pad_post = np.zeros((hop, 2), dtype=np.float64)
+    x_pad = np.vstack([pad_pre, x, pad_post])
+
+    # Number of frames such that current frame fits; next frame will be padded if needed.
+    K = int((x_pad.shape[0] - win) // hop + 1)
+    if K <= 0:
+        raise ValueError("Input too short for framing.")
+
+    aac_seq: AACSeq1 = []
+    prev_frame_type: FrameType = "OLS"
+
+    win_type: WinType = WIN_TYPE
+
+    for i in range(K):
+        start = i * hop
+
+        frame_t: FrameT = x_pad[start:start + win, :]
+        if frame_t.shape != (win, 2):
+            # This should not happen due to K definition, but keep it explicit.
+            raise ValueError("Internal framing error: frame_t has wrong shape.")
+
+        next_t = x_pad[start + hop:start + hop + win, :]
+
+        # Ensure next_t is always (2048, 2) by zero-padding at the tail.
+        if next_t.shape[0] < win:
+            tail = np.zeros((win - next_t.shape[0], 2), dtype=np.float64)
+            next_t = np.vstack([next_t, tail])
+
+        frame_type = aac_SSC(frame_t, next_t, prev_frame_type)
+        frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+
+        chl_f, chr_f = aac_pack_frame_f_to_seq_channels(frame_type, frame_f)
+
+        aac_seq.append({
+            "frame_type": frame_type,
+            "win_type": win_type,
+            "chl": {"frame_F": chl_f},
+            "chr": {"frame_F": chr_f},
+        })
+
+        prev_frame_type = frame_type
+
+    return aac_seq
--- a/source/core/aac_configuration.py
+++ b/source/core/aac_configuration.py
@ -0,0 +1,22 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Configuration
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   This module contains the global configurations
+#
+# ------------------------------------------------------------
+from __future__ import annotations
+
+# Imports
+from core.aac_types import WinType
+
+# Window type
+# Options: "SIN", "KBD"
+WIN_TYPE: WinType = "SIN"
--- a/source/core/aac_decoder.py
+++ b/source/core/aac_decoder.py
@ -0,0 +1,166 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Inverse AAC Coder (Core)
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Level 1 AAC decoder orchestration (inverse of aac_coder_1()).
+#   Keeps the same functional behavior as the original level_1 implementation:
+#   - Re-pack per-channel spectra into FrameF expected by aac_i_filter_bank()
+#   - IMDCT synthesis per frame
+#   - Overlap-add with hop=1024
+#   - Remove encoder boundary padding: hop at start and hop at end
+#
+#   Note:
+#   This core module returns the reconstructed samples. Writing to disk is kept
+#   in level_x demos.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Union
+
+import soundfile as sf
+
+from core.aac_filterbank import aac_i_filter_bank
+from core.aac_types import *
+
+
+# -----------------------------------------------------------------------------
+# Public helpers (useful for level_x demo wrappers)
+# -----------------------------------------------------------------------------
+
+def aac_unpack_seq_channels_to_frame_f(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
+    """
+    Re-pack per-channel spectra from the Level-1 AACSeq1 schema into the stereo
+    FrameF container expected by aac_i_filter_bank().
+
+    Parameters
+    ----------
+    frame_type : FrameType
+        "OLS" | "LSS" | "ESH" | "LPS".
+    chl_f : FrameChannelF
+        Left channel coefficients:
+        - ESH: (128, 8)
+        - else: (1024, 1)
+    chr_f : FrameChannelF
+        Right channel coefficients:
+        - ESH: (128, 8)
+        - else: (1024, 1)
+
+    Returns
+    -------
+    FrameF
+        Stereo coefficients:
+        - ESH: (128, 16) packed as [L0 R0 L1 R1 ... L7 R7]
+        - else: (1024, 2)
+    """
+    if frame_type == "ESH":
+        if chl_f.shape != (128, 8) or chr_f.shape != (128, 8):
+            raise ValueError("ESH channel frame_F must have shape (128, 8).")
+
+        frame_f = np.empty((128, 16), dtype=np.float64)
+        for j in range(8):
+            frame_f[:, 2 * j + 0] = chl_f[:, j]
+            frame_f[:, 2 * j + 1] = chr_f[:, j]
+        return frame_f
+
+    # Non-ESH: expected (1024, 1) per channel in Level-1 schema.
+    if chl_f.shape != (1024, 1) or chr_f.shape != (1024, 1):
+        raise ValueError("Non-ESH channel frame_F must have shape (1024, 1).")
+
+    frame_f = np.empty((1024, 2), dtype=np.float64)
+    frame_f[:, 0] = chl_f[:, 0]
+    frame_f[:, 1] = chr_f[:, 0]
+    return frame_f
+
+
+def aac_remove_padding(y_pad: StereoSignal, hop: int = 1024) -> StereoSignal:
+    """
+    Remove the boundary padding that the Level-1 encoder adds:
+    hop samples at start and hop samples at end.
+
+    Parameters
+    ----------
+    y_pad : StereoSignal (np.ndarray)
+        Reconstructed padded stream, shape (N_pad, 2).
+    hop : int
+        Hop size in samples (default 1024).
+
+    Returns
+    -------
+    StereoSignal (np.ndarray)
+        Unpadded reconstructed stream, shape (N_pad - 2*hop, 2).
+
+    Raises
+    ------
+    ValueError
+        If y_pad is too short to unpad.
+    """
+    if y_pad.shape[0] < 2 * hop:
+        raise ValueError("Decoded stream too short to unpad.")
+    return y_pad[hop:-hop, :]
+
+
+# -----------------------------------------------------------------------------
+# Level 1 decoder (core)
+# -----------------------------------------------------------------------------
+
+def aac_decoder_1(aac_seq_1: AACSeq1, filename_out: Union[str, Path]) -> StereoSignal:
+    """
+    Level-1 AAC decoder (inverse of aac_coder_1()).
+
+    This function preserves the behavior of the original level_1 implementation:
+    - Reconstruct the full padded stream by overlap-adding K synthesized frames
+    - Remove hop padding at the beginning and hop padding at the end
+    - Write the reconstructed stereo WAV file (48 kHz)
+    - Return reconstructed stereo samples as float64
+
+    Parameters
+    ----------
+    aac_seq_1 : AACSeq1
+        Encoded sequence as produced by aac_coder_1().
+    filename_out : Union[str, Path]
+        Output WAV filename. Assumption: 48 kHz, stereo.
+
+    Returns
+    -------
+    StereoSignal
+        Decoded audio samples (time-domain), stereo, shape (N, 2), dtype float64.
+    """
+    filename_out = Path(filename_out)
+
+    hop = 1024
+    win = 2048
+    K = len(aac_seq_1)
+
+    # Output includes the encoder padding region, so we reconstruct the full padded stream.
+    # For K frames: last frame starts at (K-1)*hop and spans win,
+    # so total length = (K-1)*hop + win.
+    n_pad = (K - 1) * hop + win
+    y_pad: StereoSignal = np.zeros((n_pad, 2), dtype=np.float64)
+
+    for i, fr in enumerate(aac_seq_1):
+        frame_type: FrameType = fr["frame_type"]
+        win_type: WinType = fr["win_type"]
+
+        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
+        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
+
+        frame_f: FrameF = aac_unpack_seq_channels_to_frame_f(frame_type, chl_f, chr_f)
+        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
+
+        start = i * hop
+        y_pad[start:start + win, :] += frame_t_hat
+
+    y: StereoSignal = aac_remove_padding(y_pad, hop=hop)
+
+    # Level 1 assumption: 48 kHz output.
+    sf.write(str(filename_out), y, 48000)
+
+    return y
--- a/source/core/aac_filterbank.py
+++ b/source/core/aac_filterbank.py
@ -0,0 +1,454 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Filterbank module
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Filterbank stage (MDCT/IMDCT), windowing, ESH packing/unpacking
+#
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from core.aac_types import *
+
+from scipy.signal.windows import kaiser
+
+# Private helpers for Filterbank
+# ------------------------------------------------------------
+
+def _sin_window(N: int) -> Window:
+    """
+    Build a sinusoidal (SIN) window of length N.
+
+    The AAC sinusoid window is:
+        w[n] = sin(pi/N * (n + 0.5)),  for 0 <= n < N
+
+    Parameters
+    ----------
+    N : int
+        Window length in samples.
+
+    Returns
+    -------
+    Window
+        1-D array of shape (N, ) with dtype float64.
+    """
+    n = np.arange(N, dtype=np.float64)
+    return np.sin((np.pi / N) * (n + 0.5))
+
+
+def _kbd_window(N: int, alpha: float) -> Window:
+    """
+    Build a Kaiser-Bessel-Derived (KBD) window of length N.
+
+    This follows the standard KBD construction used in AAC:
+      1) Build a Kaiser kernel of length (N/2 + 1).
+      2) Form the left half by cumulative summation, normalization, and sqrt.
+      3) Mirror the left half to form the right half (symmetric full-length window).
+
+    Notes
+    -----
+    - N must be even (AAC uses N=2048 for long and N=256 for short).
+    - The assignment specifies alpha=6 for long windows and alpha=4 for short windows.
+    - The Kaiser beta parameter is commonly taken as beta = pi * alpha for this context.
+
+    Parameters
+    ----------
+    N : int
+        Window length in samples (must be even).
+    alpha : float
+        KBD alpha parameter.
+
+    Returns
+    -------
+    Window
+        1-D array of shape (N,) with dtype float64.
+    """
+    half = N // 2
+
+    # Kaiser kernel length: half + 1 samples (0 .. half)
+    # beta = pi * alpha per the usual correspondence with the ISO definition
+    kernel = kaiser(half + 1, beta=np.pi * alpha).astype(np.float64)
+
+    csum = np.cumsum(kernel)
+    denom = csum[-1]
+
+    w_left = np.sqrt(csum[:-1] / denom)  # length half, n = 0 .. half-1
+    w_right = w_left[::-1]               # mirror for second half
+
+    return np.concatenate([w_left, w_right])
+
+
+def _long_window(win_type: WinType) -> Window:
+    """
+    Return the long AAC window (length 2048) for the selected window family.
+
+    Parameters
+    ----------
+    win_type : WinType
+        Either "SIN" or "KBD".
+
+    Returns
+    -------
+    Window
+        1-D array of shape (2048,) with dtype float64.
+    """
+    if win_type == "SIN":
+        return _sin_window(2048)
+    if win_type == "KBD":
+        # Assignment-specific alpha values
+        return _kbd_window(2048, alpha=6.0)
+    raise ValueError(f"Invalid win_type: {win_type!r}")
+
+
+def _short_window(win_type: WinType) -> Window:
+    """
+    Return the short AAC window (length 256) for the selected window family.
+
+    Parameters
+    ----------
+    win_type : WinType
+        Either "SIN" or "KBD".
+
+    Returns
+    -------
+    Window
+        1-D array of shape (256,) with dtype float64.
+    """
+    if win_type == "SIN":
+        return _sin_window(256)
+    if win_type == "KBD":
+        # Assignment-specific alpha values
+        return _kbd_window(256, alpha=4.0)
+    raise ValueError(f"Invalid win_type: {win_type!r}")
+
+
+def _window_sequence(frame_type: FrameType, win_type: WinType) -> Window:
+    """
+    Build the 2048-sample analysis/synthesis window for OLS/LSS/LPS.
+
+    In this assignment we assume a single window family is used globally
+    (no mixed KBD/SIN halves). Therefore, both the long and short windows
+    are drawn from the same family.
+
+    For frame_type:
+    - "OLS": return the long window Wl (2048).
+    - "LSS": construct [Wl_left(1024), ones(448), Ws_right(128), zeros(448)].
+    - "LPS": construct [zeros(448), Ws_left(128), ones(448), Wl_right(1024)].
+
+    Parameters
+    ----------
+    frame_type : FrameType
+        One of "OLS", "LSS", "LPS".
+    win_type : WinType
+        Either "SIN" or "KBD".
+
+    Returns
+    -------
+    Window
+        1-D array of shape (2048,) with dtype float64.
+    """
+    wL = _long_window(win_type)   # length 2048
+    wS = _short_window(win_type)  # length 256
+
+    if frame_type == "OLS":
+        return wL
+
+    if frame_type == "LSS":
+        # 0..1023: left half of long window
+        # 1024..1471: ones (448 samples)
+        # 1472..1599: right half of short window (128 samples)
+        # 1600..2047: zeros (448 samples)
+        out = np.zeros(2048, dtype=np.float64)
+        out[0:1024] = wL[0:1024]
+        out[1024:1472] = 1.0
+        out[1472:1600] = wS[128:256]
+        out[1600:2048] = 0.0
+        return out
+
+    if frame_type == "LPS":
+        # 0..447: zeros (448)
+        # 448..575: left half of short window (128)
+        # 576..1023: ones (448)
+        # 1024..2047: right half of long window (1024)
+        out = np.zeros(2048, dtype=np.float64)
+        out[0:448] = 0.0
+        out[448:576] = wS[0:128]
+        out[576:1024] = 1.0
+        out[1024:2048] = wL[1024:2048]
+        return out
+
+    raise ValueError(f"Invalid frame_type for long window sequence: {frame_type!r}")
+
+
+def _mdct(s: TimeSignal) -> MdctCoeffs:
+    """
+    MDCT (direct form) as specified in the assignment.
+
+    Parameters
+    ----------
+    s : TimeSignal
+        Windowed time samples, 1-D array of length N (N = 2048 or 256).
+
+    Returns
+    -------
+    MdctCoeffs
+        MDCT coefficients, 1-D array of length N/2.
+
+    Definition
+    ----------
+    X[k] = 2 * sum_{n=0..N-1} s[n] * cos((2*pi/N) * (n + n0) * (k + 1/2)),
+    where n0 = (N/2 + 1)/2.
+    """
+    s = np.asarray(s, dtype=np.float64).reshape(-1)
+    N = int(s.shape[0])
+    if N not in (2048, 256):
+        raise ValueError("MDCT input length must be 2048 or 256.")
+
+    n0 = (N / 2.0 + 1.0) / 2.0
+    n = np.arange(N, dtype=np.float64) + n0
+    k = np.arange(N // 2, dtype=np.float64) + 0.5
+
+    C = np.cos((2.0 * np.pi / N) * np.outer(n, k))  # (N, N/2)
+    X = 2.0 * (s @ C)  # (N/2,)
+    return X
+
+
+def _imdct(X: MdctCoeffs) -> TimeSignal:
+    """
+    IMDCT (direct form) as specified in the assignment.
+
+    Parameters
+    ----------
+    X : MdctCoeffs
+        MDCT coefficients, 1-D array of length K (K = 1024 or 128).
+
+    Returns
+    -------
+    TimeSignal
+        Reconstructed time samples, 1-D array of length N = 2K.
+
+    Definition
+    ----------
+    s[n] = (2/N) * sum_{k=0..N/2-1} X[k] * cos((2*pi/N) * (n + n0) * (k + 1/2)),
+    where n0 = (N/2 + 1)/2.
+    """
+    X = np.asarray(X, dtype=np.float64).reshape(-1)
+    K = int(X.shape[0])
+    if K not in (1024, 128):
+        raise ValueError("IMDCT input length must be 1024 or 128.")
+
+    N = 2 * K
+    n0 = (N / 2.0 + 1.0) / 2.0
+
+    n = np.arange(N, dtype=np.float64) + n0
+    k = np.arange(K, dtype=np.float64) + 0.5
+
+    C = np.cos((2.0 * np.pi / N) * np.outer(n, k))  # (N, K)
+    s = (2.0 / N) * (C @ X)  # (N,)
+    return s
+
+
+def _filter_bank_esh_channel(x_ch: FrameChannelT, win_type: WinType) -> FrameChannelF:
+    """
+    ESH analysis for one channel.
+
+    Parameters
+    ----------
+    x_ch : FrameChannelT
+        Time-domain channel frame (expected shape: (2048,)).
+    win_type : WinType
+        Window family ("KBD" or "SIN").
+
+    Returns
+    -------
+    FrameChannelF
+        Array of shape (128, 8). Column j contains the 128 MDCT coefficients
+        of the j-th short window.
+    """
+    wS = _short_window(win_type)  # (256,)
+    X_esh = np.empty((128, 8), dtype=np.float64)
+
+    # ESH subwindows are taken from the central region:
+    # start positions: 448 + 128*j, j = 0..7
+    for j in range(8):
+        start = 448 + 128 * j
+        seg = x_ch[start:start + 256] * wS  # (256,)
+        X_esh[:, j] = _mdct(seg)           # (128,)
+
+    return X_esh
+
+
+def _unpack_esh(frame_F: FrameF) -> tuple[FrameChannelF, FrameChannelF]:
+    """
+    Unpack ESH spectrum from shape (128, 16) into per-channel arrays (128, 8).
+
+    Parameters
+    ----------
+    frame_F : FrameF
+        Packed ESH spectrum (expected shape: (128, 16)).
+
+    Returns
+    -------
+    left : FrameChannelF
+        Left channel spectrum, shape (128, 8).
+    right : FrameChannelF
+        Right channel spectrum, shape (128, 8).
+
+    Notes
+    -----
+    Inverse mapping of the packing used in aac_filter_bank():
+      packed[:, 2*j]   = left[:, j]
+      packed[:, 2*j+1] = right[:, j]
+    """
+    if frame_F.shape != (128, 16):
+        raise ValueError("ESH frame_F must have shape (128, 16).")
+
+    left = np.empty((128, 8), dtype=np.float64)
+    right = np.empty((128, 8), dtype=np.float64)
+    for j in range(8):
+        left[:, j] = frame_F[:, 2 * j + 0]
+        right[:, j] = frame_F[:, 2 * j + 1]
+    return left, right
+
+
+def _i_filter_bank_esh_channel(X_esh: FrameChannelF, win_type: WinType) -> FrameChannelT:
+    """
+    ESH synthesis for one channel.
+
+    Parameters
+    ----------
+    X_esh : FrameChannelF
+        MDCT coefficients for 8 short windows (expected shape: (128, 8)).
+    win_type : WinType
+        Window family ("KBD" or "SIN").
+
+    Returns
+    -------
+    FrameChannelT
+        Time-domain channel contribution, shape (2048,).
+        This is already overlap-added internally for the 8 short blocks and
+        ready for OLA at the caller level.
+    """
+    if X_esh.shape != (128, 8):
+        raise ValueError("X_esh must have shape (128, 8).")
+
+    wS = _short_window(win_type)  # (256,)
+    out = np.zeros(2048, dtype=np.float64)
+
+    # Each short IMDCT returns 256 samples. Place them at:
+    # start = 448 + 128*j, j=0..7 (50% overlap)
+    for j in range(8):
+        seg = _imdct(X_esh[:, j]) * wS  # (256,)
+        start = 448 + 128 * j
+        out[start:start + 256] += seg
+
+    return out
+
+
+# -----------------------------------------------------------------------------
+# Public Function prototypes (Level 1)
+# -----------------------------------------------------------------------------
+
+def aac_filter_bank(frame_T: FrameT, frame_type: FrameType, win_type: WinType) -> FrameF:
+    """
+    Filterbank stage (MDCT analysis).
+
+    Parameters
+    ----------
+    frame_T : FrameT
+        Time-domain frame, stereo, shape (2048, 2).
+    frame_type : FrameType
+        Type of the frame under encoding ("OLS"|"LSS"|"ESH"|"LPS").
+    win_type : WinType
+        Window type ("KBD" or "SIN") used for the current frame.
+
+    Returns
+    -------
+    frame_F : FrameF
+        Frequency-domain MDCT coefficients:
+        - If frame_type in {"OLS","LSS","LPS"}: array shape (1024, 2)
+          containing MDCT coefficients for both channels.
+        - If frame_type == "ESH": contains 8 subframes, each subframe has shape (128,2),
+          placed in columns according to subframe order, i.e. overall shape (128, 16).
+    """
+    if frame_T.shape != (2048, 2):
+        raise ValueError("frame_T must have shape (2048, 2).")
+
+    xL :FrameChannelT = frame_T[:, 0].astype(np.float64, copy=False)
+    xR :FrameChannelT = frame_T[:, 1].astype(np.float64, copy=False)
+
+    if frame_type in ("OLS", "LSS", "LPS"):
+        w = _window_sequence(frame_type, win_type)  # length 2048
+        XL = _mdct(xL * w)  # length 1024
+        XR = _mdct(xR * w)  # length 1024
+        out = np.empty((1024, 2), dtype=np.float64)
+        out[:, 0] = XL
+        out[:, 1] = XR
+        return out
+
+    if frame_type == "ESH":
+        Xl = _filter_bank_esh_channel(xL, win_type)  # (128, 8)
+        Xr = _filter_bank_esh_channel(xR, win_type)  # (128, 8)
+
+        # Pack into (128, 16): each subframe as (128,2) placed in columns
+        out = np.empty((128, 16), dtype=np.float64)
+        for j in range(8):
+            out[:, 2 * j + 0] = Xl[:, j]
+            out[:, 2 * j + 1] = Xr[:, j]
+        return out
+
+    raise ValueError(f"Invalid frame_type: {frame_type!r}")
+
+
+def aac_i_filter_bank(frame_F: FrameF, frame_type: FrameType, win_type: WinType) -> FrameT:
+    """
+    Inverse filterbank (IMDCT synthesis).
+
+    Parameters
+    ----------
+    frame_F : FrameF
+        Frequency-domain MDCT coefficients as produced by filter_bank().
+    frame_type : FrameType
+        Frame type ("OLS"|"LSS"|"ESH"|"LPS").
+    win_type : WinType
+        Window type ("KBD" or "SIN").
+
+    Returns
+    -------
+    frame_T : FrameT
+        Reconstructed time-domain frame, stereo, shape (2048, 2).
+    """
+    if frame_type in ("OLS", "LSS", "LPS"):
+        if frame_F.shape != (1024, 2):
+            raise ValueError("For OLS/LSS/LPS, frame_F must have shape (1024, 2).")
+
+        w = _window_sequence(frame_type, win_type)
+
+        xL = _imdct(frame_F[:, 0]) * w
+        xR = _imdct(frame_F[:, 1]) * w
+
+        out = np.empty((2048, 2), dtype=np.float64)
+        out[:, 0] = xL
+        out[:, 1] = xR
+        return out
+
+    if frame_type == "ESH":
+        if frame_F.shape != (128, 16):
+            raise ValueError("For ESH, frame_F must have shape (128, 16).")
+
+        Xl, Xr = _unpack_esh(frame_F)
+        xL = _i_filter_bank_esh_channel(Xl, win_type)
+        xR = _i_filter_bank_esh_channel(Xr, win_type)
+
+        out = np.empty((2048, 2), dtype=np.float64)
+        out[:, 0] = xL
+        out[:, 1] = xR
+        return out
+
+    raise ValueError(f"Invalid frame_type: {frame_type!r}")
--- a/source/core/aac_ssc.py
+++ b/source/core/aac_ssc.py
@ -0,0 +1,217 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Sequence Segmentation Control module
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Sequence Segmentation Control module (SSC).
+#   Selects and returns the frame type based on input parameters.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from typing import Dict, Tuple
+from core.aac_types import FrameType, FrameT, FrameChannelT
+
+import numpy as np
+
+# -----------------------------------------------------------------------------
+# Private helpers for SSC
+# -----------------------------------------------------------------------------
+
+# See Table 1 in mm-2025-hw-v0.1.pdf
+STEREO_MERGE_TABLE: Dict[Tuple[FrameType, FrameType], FrameType] = {
+    ("OLS", "OLS"): "OLS",
+    ("OLS", "LSS"): "LSS",
+    ("OLS", "ESH"): "ESH",
+    ("OLS", "LPS"): "LPS",
+    ("LSS", "OLS"): "LSS",
+    ("LSS", "LSS"): "LSS",
+    ("LSS", "ESH"): "ESH",
+    ("LSS", "LPS"): "ESH",
+    ("ESH", "OLS"): "ESH",
+    ("ESH", "LSS"): "ESH",
+    ("ESH", "ESH"): "ESH",
+    ("ESH", "LPS"): "ESH",
+    ("LPS", "OLS"): "LPS",
+    ("LPS", "LSS"): "ESH",
+    ("LPS", "ESH"): "ESH",
+    ("LPS", "LPS"): "LPS",
+}
+
+
+def _detect_attack(next_frame_channel: FrameChannelT) -> bool:
+    """
+    Detect whether the *next* frame (single channel) implies an attack, i.e. ESH
+    according to the assignment's criterion.
+
+    Parameters
+    ----------
+    next_frame_channel : FrameChannelT
+        One channel of next_frame_T (expected shape: (2048,)).
+
+    Returns
+    -------
+    bool
+        True if an attack is detected (=> next frame predicted ESH), else False.
+
+    Notes
+    -----
+    The criterion is implemented as described in the spec:
+
+    1) Apply the high-pass filter:
+           H(z) = (1 - z^-1) / (1 - 0.5 z^-1)
+       implemented in the time domain as:
+           y[n] = x[n] - x[n-1] + 0.5*y[n-1]
+
+    2) Split y into 16 segments of length 128 and compute segment energies s[l].
+
+    3) Compute the ratio:
+           ds[l] = s[l] / s[l-1]
+
+    4) An attack exists if there exists l in {1..7} such that:
+           s[l] > 1e-3  and  ds[l] > 10
+    """
+    # Local alias; expected to be a 1-D array of length 2048.
+    x = next_frame_channel
+
+    # High-pass filter reference implementation (scalar recurrence).
+    y = np.zeros_like(x)
+    prev_x = 0.0
+    prev_y = 0.0
+    for n in range(x.shape[0]):
+        xn = float(x[n])
+        yn = (xn - prev_x) + 0.5 * prev_y
+        y[n] = yn
+        prev_x = xn
+        prev_y = yn
+
+    # Segment energies over 16 blocks of 128 samples.
+    s = np.empty(16, dtype=np.float64)
+    for l in range(16):
+        a = l * 128
+        b = (l + 1) * 128
+        seg = y[a:b]
+        s[l] = float(np.sum(seg * seg))
+
+    # ds[l] for l>=1. For l=0 not defined, keep 0.
+    ds = np.zeros(16, dtype=np.float64)
+    eps = 1e-12  # Avoid division by zero without materially changing the logic.
+    for l in range(1, 16):
+        ds[l] = s[l] / max(s[l - 1], eps)
+
+    # Spec: check l in {1..7}.
+    for l in range(1, 8):
+        if (s[l] > 1e-3) and (ds[l] > 10.0):
+            return True
+
+    return False
+
+
+def _decide_frame_type(prev_frame_type: FrameType, attack: bool) -> FrameType:
+    """
+    Decide the current frame type for a single channel based on the previous
+    frame type and whether the next frame is predicted to be ESH.
+
+    Rules (spec):
+
+    - If prev is "LSS" => current is "ESH"
+    - If prev is "LPS" => current is "OLS"
+    - If prev is "OLS" => current is "LSS" if attack else "OLS"
+    - If prev is "ESH" => current is "ESH" if attack else "LPS"
+
+    Parameters
+    ----------
+    prev_frame_type : FrameType
+        Previous frame type (one of "OLS", "LSS", "ESH", "LPS").
+    attack : bool
+        True if the next frame is predicted ESH for this channel.
+
+    Returns
+    -------
+    FrameType
+        The per-channel decision for the current frame.
+
+    """
+    if prev_frame_type == "LSS":
+        return "ESH"
+    if prev_frame_type == "LPS":
+        return "OLS"
+    if prev_frame_type == "OLS":
+        return "LSS" if attack else "OLS"
+    if prev_frame_type == "ESH":
+        return "ESH" if attack else "LPS"
+
+    raise ValueError(f"Invalid prev_frame_type: {prev_frame_type!r}")
+
+
+def _stereo_merge(ft_l: FrameType, ft_r: FrameType) -> FrameType:
+    """
+    Merge per-channel frame type decisions into one common frame type using
+    the stereo merge table from the spec.
+
+    Parameters
+    ----------
+    ft_l : FrameType
+        Frame type decision for the left channel.
+    ft_r : FrameType
+        Frame type decision for the right channel.
+
+    Returns
+    -------
+    FrameType
+        The merged common frame type.
+    """
+    try:
+        return STEREO_MERGE_TABLE[(ft_l, ft_r)]
+    except KeyError as e:
+        raise ValueError(f"Invalid stereo merge pair: {(ft_l, ft_r)}") from e
+
+
+# -----------------------------------------------------------------------------
+# Public Function prototypes (Level 1)
+# -----------------------------------------------------------------------------
+
+def aac_SSC(frame_T: FrameT, next_frame_T: FrameT, prev_frame_type: FrameType) -> FrameType:
+    """
+    Sequence Segmentation Control (SSC).
+
+    Select and return the frame type for the current frame (i) based on:
+    - the current time-domain frame (stereo),
+    - the next time-domain frame (stereo), used for attack detection,
+    - the previous frame type.
+
+    Parameters
+    ----------
+    frame_T : FrameT
+        Current time-domain frame i (expected shape: (2048, 2)).
+    next_frame_T : FrameT
+        Next time-domain frame (i+1), used to decide transitions to/from ESH
+        (expected shape: (2048, 2)).
+    prev_frame_type : FrameType
+        Frame type chosen for the previous frame (i-1).
+
+    Returns
+    -------
+    FrameType
+        One of: "OLS", "LSS", "ESH", "LPS".
+    """
+    if frame_T.shape != (2048, 2):
+        raise ValueError("frame_T must have shape (2048, 2).")
+    if next_frame_T.shape != (2048, 2):
+        raise ValueError("next_frame_T must have shape (2048, 2).")
+
+    # Detect attack independently per channel on the next frame.
+    attack_l = _detect_attack(next_frame_T[:, 0])
+    attack_r = _detect_attack(next_frame_T[:, 1])
+
+    # Decide per-channel type based on shared prev_frame_type.
+    ft_l = _decide_frame_type(prev_frame_type, attack_l)
+    ft_r = _decide_frame_type(prev_frame_type, attack_r)
+
+    # Stereo merge as per the spec table.
+    return _stereo_merge(ft_l, ft_r)
--- a/source/core/aac_types.py
+++ b/source/core/aac_types.py
@ -0,0 +1,193 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Public Type Aliases
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   This module implements Public Type aliases
+#
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from typing import List, Literal, TypeAlias, TypedDict
+import numpy as np
+from numpy.typing import NDArray
+
+# -----------------------------------------------------------------------------
+# Code enums (for readability; not intended to enforce shapes/lengths)
+# -----------------------------------------------------------------------------
+
+FrameType: TypeAlias = Literal["OLS", "LSS", "ESH", "LPS"]
+"""
+Frame type codes (AAC):
+- "OLS": ONLY_LONG_SEQUENCE
+- "LSS": LONG_START_SEQUENCE
+- "ESH": EIGHT_SHORT_SEQUENCE
+- "LPS": LONG_STOP_SEQUENCE
+"""
+
+WinType: TypeAlias = Literal["KBD", "SIN"]
+"""
+Window type codes (AAC):
+- "KBD": Kaiser-Bessel-Derived
+- "SIN": sinusoid
+"""
+
+ChannelKey: TypeAlias = Literal["chl", "chr"]
+"""Channel dictionary keys used in Level 1 payloads."""
+
+
+# -----------------------------------------------------------------------------
+# Array “semantic” aliases
+#
+# Goal: communicate meaning (time/frequency/window, stereo/channel) without
+# forcing strict shapes in the type system.
+# -----------------------------------------------------------------------------
+
+FloatArray: TypeAlias = NDArray[np.float64]
+"""
+Generic float64 NumPy array.
+
+Note:
+- We standardize internal numeric computations to float64 for stability and
+  reproducibility. External I/O can still be float32, but we convert at the
+  boundaries.
+"""
+
+Window: TypeAlias = FloatArray
+"""
+Time-domain window (weighting sequence), 1-D.
+
+Typical lengths in this assignment:
+- Long: 2048
+- Short: 256
+- Window sequences for LSS/LPS are also 2048
+
+Expected shape: (N,)
+dtype: float64
+"""
+
+TimeSignal: TypeAlias = FloatArray
+"""
+Time-domain signal samples, typically 1-D.
+
+Examples:
+- Windowed MDCT input: shape (N,)
+- IMDCT output: shape (N,)
+
+dtype: float64
+"""
+
+StereoSignal: TypeAlias = FloatArray
+"""
+Time-domain stereo signal stream.
+
+Expected (typical) shape: (N, 2)
+- axis 0: time samples
+- axis 1: channels [L, R]
+
+dtype: float64
+"""
+
+MdctCoeffs: TypeAlias = FloatArray
+"""
+MDCT coefficient vector, typically 1-D.
+
+Examples:
+- Long: shape (1024,)
+- Short: shape (128,)
+
+dtype: float64
+"""
+
+
+FrameT: TypeAlias = FloatArray
+"""
+Time-domain frame (stereo), as used by the filterbank input/output.
+
+Expected (typical) shape for stereo: (2048, 2)
+- axis 0: time samples
+- axis 1: channels [L, R]
+
+dtype: float64
+"""
+
+FrameChannelT: TypeAlias = FloatArray
+"""
+Time-domain single-channel frame.
+
+Expected (typical) shape: (2048,)
+
+dtype: float64
+"""
+
+FrameF: TypeAlias = FloatArray
+"""
+Frequency-domain frame (MDCT coefficients), stereo container.
+
+Typical shapes (Level 1):
+- If frame_type in {"OLS","LSS","LPS"}: (1024, 2)
+- If frame_type == "ESH": (128, 16)
+
+Rationale for ESH (128, 16):
+- 8 short subframes per channel => 8 * 2 = 16 columns total
+- Each short subframe per stereo is (128, 2), flattened into columns
+  in subframe order: [sf0_L, sf0_R, sf1_L, sf1_R, ..., sf7_L, sf7_R]
+
+dtype: float64
+"""
+
+FrameChannelF: TypeAlias = FloatArray
+"""
+Frequency-domain single-channel frame (MDCT coefficients).
+
+Typical shapes (Level 1):
+- If frame_type in {"OLS","LSS","LPS"}: (1024,)
+- If frame_type == "ESH": (128, 8)  (8 short subframes for one channel)
+
+dtype: float64
+"""
+
+
+# -----------------------------------------------------------------------------
+# Level 1 AAC sequence payload types
+# -----------------------------------------------------------------------------
+
+class AACChannelFrameF(TypedDict):
+    """
+    Per-channel payload for aac_seq_1[i]["chl"] or ["chr"] (Level 1).
+
+    Keys
+    ----
+    frame_F:
+        The MDCT coefficients for ONE channel.
+        Typical shapes:
+        - ESH: (128, 8)   (8 short subframes)
+        - else: (1024, )
+    """
+    frame_F: FrameChannelF
+
+
+class AACSeq1Frame(TypedDict):
+    """
+    One frame dictionary element of aac_seq_1 (Level 1).
+    """
+    frame_type: FrameType
+    win_type: WinType
+    chl: AACChannelFrameF
+    chr: AACChannelFrameF
+
+
+AACSeq1: TypeAlias = List[AACSeq1Frame]
+"""
+AAC sequence for Level 1:
+List of length K (K = number of frames).
+
+Each element is a dict with keys:
+- "frame_type", "win_type", "chl", "chr"
+"""
--- a/source/core/tests/test_SSC.py
+++ b/source/core/tests/test_SSC.py
@ -0,0 +1,234 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Sequence Segmentation Control Tests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for Sequence Segmentation Control module (SSC).
+# ------------------------------------------------------------
+
+from __future__ import annotations
+
+import numpy as np
+
+from core.aac_ssc import aac_SSC
+from core.aac_types import FrameT
+
+# -----------------------------------------------------------------------------
+# Helper fixtures for SSC
+# -----------------------------------------------------------------------------
+
+def _next_frame_no_attack() -> FrameT:
+    """
+    Build a next_frame_T that must NOT trigger ESH detection.
+
+    Uses exact zeros so all segment energies are zero and the condition
+    s[l] > 1e-3 cannot hold for any l.
+    """
+    return np.zeros((2048, 2), dtype=np.float64)
+
+
+def _next_frame_strong_attack(
+    *,
+    attack_left: bool,
+    attack_right: bool,
+    segment_l: int = 4,
+    baseline: float = 1e-6,
+    burst_amp: float = 1.0,
+) -> FrameT:
+    """
+    Build a next_frame_T (2048x2) that should trigger ESH detection on selected channels.
+
+    Attack criterion (spec):
+      Attack exists if there exists l in {1..7} such that:
+        s[l] > 1e-3  and  ds[l] > 10,
+      where s[l] is the energy of segment l (length 128) after high-pass filtering,
+      and ds[l] = s[l] / s[l-1].
+
+    Construction:
+    - A small baseline is added everywhere to avoid relying on the epsilon guard in ds,
+      keeping ds behavior stable/reproducible.
+    - A strong burst is added inside a chosen segment l in 1..7.
+    """
+    if not (1 <= segment_l <= 7):
+        raise ValueError(f"segment_l must be in [1, 7], got {segment_l}.")
+
+    x = np.full((2048, 2), baseline, dtype=np.float64)
+
+    a = segment_l * 128
+    b = (segment_l + 1) * 128
+
+    if attack_left:
+        x[a:b, 0] += burst_amp
+    if attack_right:
+        x[a:b, 1] += burst_amp
+
+    return x
+
+
+def _next_frame_below_s_threshold(
+    *,
+    left: bool,
+    right: bool,
+    segment_l: int = 4,
+    impulse_amp: float = 0.01,
+) -> FrameT:
+    """
+    Construct a next_frame_T where s[l] is below 1e-3, so ESH must NOT be triggered,
+    even if the ratio ds[l] could be large.
+
+    We place a single impulse of amplitude 'impulse_amp' inside one segment.
+    Approx. segment energy: s[l] ~= impulse_amp^2.
+
+    Example:
+      impulse_amp = 0.01 => s[l] ~= 1e-4 < 1e-3
+    """
+    if not (1 <= segment_l <= 7):
+        raise ValueError(f"segment_l must be in [1, 7], got {segment_l}.")
+
+    x = np.zeros((2048, 2), dtype=np.float64)
+
+    idx = segment_l * 128 + 10  # inside segment l
+    if left:
+        x[idx, 0] = impulse_amp
+    if right:
+        x[idx, 1] = impulse_amp
+
+    return x
+
+
+# -----------------------------------------------------------------------------
+# 1) Fixed/mandatory cases (prev frame type forces current type)
+# -----------------------------------------------------------------------------
+
+def test_ssc_fixed_cases_prev_lss_and_lps() -> None:
+    """
+    Spec:
+      - If prev was LSS => current MUST be ESH
+      - If prev was LPS => current MUST be OLS
+    independent of attack detection on (i+1).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    next_attack = _next_frame_strong_attack(attack_left=True, attack_right=True)
+
+    out1 = aac_SSC(frame_t, next_attack, "LSS")
+    assert out1 == "ESH"
+
+    out2 = aac_SSC(frame_t, next_attack, "LPS")
+    assert out2 == "OLS"
+
+
+# -----------------------------------------------------------------------------
+# 2) Cases requiring next-frame ESH prediction (attack computation)
+# -----------------------------------------------------------------------------
+
+def test_prev_ols_next_not_esh_returns_ols() -> None:
+    """
+    If prev=OLS, current is:
+      - LSS iff (i+1) is predicted ESH
+      - else OLS
+    Here: no attack => expect OLS.
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_no_attack()
+
+    out = aac_SSC(frame_t, next_t, "OLS")
+    assert out == "OLS"
+
+
+def test_prev_ols_next_esh_both_channels_returns_lss() -> None:
+    """
+    prev=OLS and next predicted ESH for both channels:
+      per-channel: LSS, LSS
+      merged: LSS
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_strong_attack(attack_left=True, attack_right=True)
+
+    out = aac_SSC(frame_t, next_t, "OLS")
+    assert out == "LSS"
+
+
+def test_prev_ols_next_esh_one_channel_returns_lss() -> None:
+    """
+    prev=OLS:
+      - one channel predicts ESH => LSS
+      - other channel predicts not ESH => OLS
+    Merge table: OLS + LSS => LSS (either side).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    next1_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
+    out1 = aac_SSC(frame_t, next1_t, "OLS")
+    assert out1 == "LSS"
+
+    next2_t = _next_frame_strong_attack(attack_left=False, attack_right=True)
+    out2 = aac_SSC(frame_t, next2_t, "OLS")
+    assert out2 == "LSS"
+
+
+def test_prev_esh_next_esh_both_channels_returns_esh() -> None:
+    """
+    prev=ESH and next predicted ESH for both channels:
+      per-channel: ESH, ESH
+      merged: ESH
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_strong_attack(attack_left=True, attack_right=True)
+
+    out = aac_SSC(frame_t, next_t, "ESH")
+    assert out == "ESH"
+
+
+def test_prev_esh_next_not_esh_both_channels_returns_lps() -> None:
+    """
+    prev=ESH and next not predicted ESH for both channels:
+      per-channel: LPS, LPS
+      merged: LPS
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_no_attack()
+
+    out = aac_SSC(frame_t, next_t, "ESH")
+    assert out == "LPS"
+
+
+def test_prev_esh_next_esh_one_channel_merged_is_esh() -> None:
+    """
+    prev=ESH:
+      - one channel predicts ESH => ESH
+      - other channel predicts not ESH => LPS
+    Merge table: ESH + LPS => ESH (either side).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    next1_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
+    out1 = aac_SSC(frame_t, next1_t, "ESH")
+    assert out1 == "ESH"
+
+    next2_t = _next_frame_strong_attack(attack_left=False, attack_right=True)
+    out2 = aac_SSC(frame_t, next2_t, "ESH")
+    assert out2 == "ESH"
+
+
+def test_threshold_s_must_exceed_1e_3() -> None:
+    """
+    Spec: next frame is predicted ESH only if:
+      s[l] > 1e-3  AND  ds[l] > 10
+    for some l in 1..7.
+
+    This test checks the necessity of the s[l] threshold:
+      - Create a frame with s[l] ~= 1e-4 < 1e-3 (single impulse with amp 0.01).
+      - Expect: not classified as ESH -> for prev=OLS return OLS.
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_below_s_threshold(left=True, right=True, impulse_amp=0.01)
+
+    out = aac_SSC(frame_t, next_t, "OLS")
+    assert out == "OLS"
--- a/source/core/tests/test_aac_coder_decoder.py
+++ b/source/core/tests/test_aac_coder_decoder.py
@ -1,3 +1,16 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - AAC Coder/DecoderTests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for AAC Coder/Decoder module.
+# ------------------------------------------------------------
 from __future__ import annotations

 from pathlib import Path
@ -6,18 +19,36 @@ import numpy as np
 import pytest
 import soundfile as sf

-from level_1.level_1 import aac_coder_1, i_aac_coder_1
+from core.aac_coder import aac_coder_1
+from core.aac_decoder import aac_decoder_1
+from core.aac_types import *
+

 # Helper "fixtures" for aac_coder_1 / i_aac_coder_1
 # -----------------------------------------------------------------------------

-def _snr_db(x_ref: np.ndarray, x_hat: np.ndarray) -> float:
+def _snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    """
    Compute overall SNR (dB) over all samples and channels after aligning lengths.
+
+    Parameters
+    ----------
+    x_ref : StereoSignal
+        Reference signal, shape (N, 2) typical.
+    x_hat : StereoSignal
+        Reconstructed signal, shape (M, 2) typical.
+
+    Returns
+    -------
+    float
+        SNR in dB.
+        - Returns +inf if noise power is zero.
+        - Returns -inf if signal power is zero.
    """
    x_ref = np.asarray(x_ref, dtype=np.float64)
    x_hat = np.asarray(x_hat, dtype=np.float64)

+    # Be conservative: align lengths and common channels.
    if x_ref.ndim == 1:
        x_ref = x_ref.reshape(-1, 1)
    if x_hat.ndim == 1:
@ -36,7 +67,7 @@ def _snr_db(x_ref: np.ndarray, x_hat: np.ndarray) -> float:
    if pn <= 0.0:
        return float("inf")
    if ps <= 0.0:
-        return -float("inf")
+        return float("-inf")

    return float(10.0 * np.log10(ps / pn))

@ -49,9 +80,9 @@ def tmp_stereo_wav(tmp_path: Path) -> Path:
    rng = np.random.default_rng(123)
    fs = 48000

-    # ~1 second of audio, keep small for test speed
+    # ~1 second of audio (kept small for test speed).
    n = fs
-    x = rng.normal(size=(n, 2)).astype(np.float64)
+    x: StereoSignal = rng.normal(size=(n, 2)).astype(np.float64)

    wav_path = tmp_path / "in.wav"
    sf.write(str(wav_path), x, fs)
@ -63,7 +94,7 @@ def test_aac_coder_seq_schema_and_shapes(tmp_stereo_wav: Path) -> None:
    Module-level contract test:
    Ensure aac_seq_1 follows the expected schema and per-frame shapes.
    """
-    aac_seq = aac_coder_1(tmp_stereo_wav)
+    aac_seq: AACSeq1 = aac_coder_1(tmp_stereo_wav)

    assert isinstance(aac_seq, list)
    assert len(aac_seq) > 0
@ -88,8 +119,8 @@ def test_aac_coder_seq_schema_and_shapes(tmp_stereo_wav: Path) -> None:
        assert "frame_F" in fr["chl"]
        assert "frame_F" in fr["chr"]

-        chl_f = np.asarray(fr["chl"]["frame_F"])
-        chr_f = np.asarray(fr["chr"]["frame_F"])
+        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
+        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)

        if frame_type == "ESH":
            assert chl_f.shape == (128, 8)
@ -101,23 +132,25 @@ def test_aac_coder_seq_schema_and_shapes(tmp_stereo_wav: Path) -> None:

 def test_end_to_end_aac_coder_decoder_high_snr(tmp_stereo_wav: Path, tmp_path: Path) -> None:
    """
-    End-to-end module test:
+    End-to-end test:
    Encode + decode and check SNR is very high (numerical-noise only).
-    Threshold is intentionally loose to avoid fragility.
+
+    The threshold is intentionally loose to avoid fragility across platforms/BLAS.
    """
    x_ref, fs = sf.read(str(tmp_stereo_wav), always_2d=True)
-    assert fs == 48000
+    x_ref = np.asarray(x_ref, dtype=np.float64)
+    assert int(fs) == 48000

    out_wav = tmp_path / "out.wav"

    aac_seq = aac_coder_1(tmp_stereo_wav)
-    x_hat = i_aac_coder_1(aac_seq, out_wav)
+    x_hat: StereoSignal = aac_decoder_1(aac_seq, out_wav)

    # Basic sanity: output file exists and is readable
    assert out_wav.exists()
    x_hat_file, fs_hat = sf.read(str(out_wav), always_2d=True)
-    assert fs_hat == 48000
+    assert int(fs_hat) == 48000

-    # SNR computed against the array returned by i_aac_coder_1 (should match file, but not required)
+    # SNR against returned array (file should match closely, but we do not require it here).
    snr = _snr_db(x_ref, x_hat)
    assert snr > 80.0
--- a/source/core/tests/test_filterbank.py
+++ b/source/core/tests/test_filterbank.py
@ -0,0 +1,269 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Filterbank Tests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for Filterbank module.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from typing import Sequence
+import pytest
+
+from core.aac_filterbank import aac_filter_bank, aac_i_filter_bank
+from core.aac_types import *
+
+# Helper fixtures for filterbank
+# -----------------------------------------------------------------------------
+
+def _ola_reconstruct(x: StereoSignal, frame_types: Sequence[FrameType], win_type: WinType) -> StereoSignal:
+    """
+    Analyze-synthesize each frame and overlap-add with hop=1024.
+
+    Parameters
+    ----------
+    x : StereoSignal
+        Input stereo stream, expected shape (N, 2).
+    frame_types : Sequence[FrameType]
+        Length K sequence of frame types for frames starting at i*1024.
+    win_type : WinType
+        Window type ("SIN" or "KBD").
+
+    Returns
+    -------
+    StereoSignal
+        Reconstructed stereo stream, same shape as x (N, 2).
+    """
+    hop = 1024
+    win = 2048
+    K = len(frame_types)
+
+    y: StereoSignal = np.zeros_like(x, dtype=np.float64)
+
+    for i in range(K):
+        start = i * hop
+        frame_t: FrameT = x[start:start + win, :]
+        frame_f: FrameF = aac_filter_bank(frame_t, frame_types[i], win_type)
+        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_types[i], win_type)
+        y[start:start + win, :] += frame_t_hat
+
+    return y
+
+
+def _snr_db(x: StereoSignal, y: StereoSignal) -> float:
+    """
+    Compute SNR in dB over all samples/channels.
+    """
+    err = x - y
+    ps = float(np.sum(x * x))
+    pn = float(np.sum(err * err))
+    if pn <= 0.0:
+        return float("inf")
+    if ps <= 0.0:
+        return float("-inf")
+    return 10.0 * float(np.log10(ps / pn))
+
+
+# -----------------------------------------------------------------------------
+# Forward filterbank tests
+# -----------------------------------------------------------------------------
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+@pytest.mark.parametrize("frame_type", ["OLS", "LSS", "LPS"])
+def test_filterbank_shapes_long_sequences(frame_type: FrameType, win_type: WinType) -> None:
+    """
+    Contract test: for OLS/LSS/LPS, aac_filter_bank returns shape (1024, 2).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+    assert frame_f.shape == (1024, 2)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_shapes_esh(win_type: WinType) -> None:
+    """
+    Contract test: for ESH, aac_filter_bank returns shape (128, 16).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_f = aac_filter_bank(frame_t, "ESH", win_type)
+    assert frame_f.shape == (128, 16)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_channel_isolation_long_sequences(win_type: WinType) -> None:
+    """
+    Behavior test: for OLS (representative long-sequence), channels are independent.
+    If right channel is zero and left is random, right spectrum should be near zero.
+    """
+    rng = np.random.default_rng(0)
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_t[:, 0] = rng.normal(size=2048)
+
+    frame_f = aac_filter_bank(frame_t, "OLS", win_type)
+
+    assert np.max(np.abs(frame_f[:, 1])) < 1e-9
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_channel_isolation_esh(win_type: WinType) -> None:
+    """
+    Behavior test: for ESH, channels are independent.
+    If right channel is zero and left is random, all odd columns (right) should be near zero.
+    """
+    rng = np.random.default_rng(1)
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_t[:, 0] = rng.normal(size=2048)
+
+    frame_f = aac_filter_bank(frame_t, "ESH", win_type)
+
+    right_cols = frame_f[:, 1::2]  # columns 1,3,5,...,15
+    assert np.max(np.abs(right_cols)) < 1e-9
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_esh_ignores_outer_regions(win_type: WinType) -> None:
+    """
+    Spec-driven behavior test:
+    ESH uses only the central region [448, 1600), split into 8 overlapping
+    windows of length 256 with 50% overlap.
+
+    Therefore, changing samples outside [448, 1600) must not affect the output.
+    """
+    rng = np.random.default_rng(2)
+
+    frame_a: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_b: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    center = rng.normal(size=(1152, 2))
+    frame_a[448:1600, :] = center
+    frame_b[448:1600, :] = center
+
+    frame_b[0:448, :] = rng.normal(size=(448, 2))
+    frame_b[1600:2048, :] = rng.normal(size=(448, 2))
+
+    fa = aac_filter_bank(frame_a, "ESH", win_type)
+    fb = aac_filter_bank(frame_b, "ESH", win_type)
+
+    # Use a tiny tolerance to avoid flaky failures due to floating-point minutiae.
+    np.testing.assert_allclose(fa, fb, rtol=0.0, atol=1e-12)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_output_is_finite(win_type: WinType) -> None:
+    """
+    Sanity test: output must not contain NaN or inf for representative cases.
+    """
+    rng = np.random.default_rng(3)
+    frame_t: FrameT = rng.normal(size=(2048, 2)).astype(np.float64)
+
+    for frame_type in ("OLS", "LSS", "ESH", "LPS"):
+        frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+        assert np.isfinite(frame_f).all()
+
+
+# -----------------------------------------------------------------------------
+# Reverse i_filterbank tests
+# -----------------------------------------------------------------------------
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ifilterbank_shapes_long_sequences(win_type: WinType) -> None:
+    """
+    Contract test: for OLS/LSS/LPS, aac_i_filter_bank returns shape (2048, 2).
+    """
+    frame_f: FrameF = np.zeros((1024, 2), dtype=np.float64)
+    for frame_type in ("OLS", "LSS", "LPS"):
+        frame_t = aac_i_filter_bank(frame_f, frame_type, win_type)
+        assert frame_t.shape == (2048, 2)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ifilterbank_shapes_esh(win_type: WinType) -> None:
+    """
+    Contract test: for ESH, aac_i_filter_bank returns shape (2048, 2).
+    """
+    frame_f: FrameF = np.zeros((128, 16), dtype=np.float64)
+    frame_t = aac_i_filter_bank(frame_f, "ESH", win_type)
+    assert frame_t.shape == (2048, 2)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_roundtrip_per_frame_is_finite(win_type: WinType) -> None:
+    """
+    Sanity test: per-frame analysis+synthesis must produce finite outputs.
+    """
+    rng = np.random.default_rng(0)
+    frame_t: FrameT = rng.normal(size=(2048, 2)).astype(np.float64)
+
+    for frame_type in ("OLS", "LSS", "ESH", "LPS"):
+        frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+        frame_t_hat = aac_i_filter_bank(frame_f, frame_type, win_type)
+        assert np.isfinite(frame_t_hat).all()
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ola_reconstruction_ols_high_snr(win_type: WinType) -> None:
+    """
+    Module-level test:
+    OLS analysis+synthesis with hop=1024 must reconstruct with high SNR
+    in the steady-state region.
+    """
+    rng = np.random.default_rng(1)
+
+    K = 6
+    N = 1024 * (K + 1)
+    x: StereoSignal = rng.normal(size=(N, 2)).astype(np.float64)
+
+    y = _ola_reconstruct(x, ["OLS"] * K, win_type)
+
+    a = 1024
+    b = N - 1024
+    snr = _snr_db(x[a:b, :], y[a:b, :])
+    assert snr > 50.0
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ola_reconstruction_esh_high_snr(win_type: WinType) -> None:
+    """
+    Module-level test:
+    ESH analysis+synthesis with hop=1024 must reconstruct with high SNR
+    in the steady-state region.
+    """
+    rng = np.random.default_rng(2)
+
+    K = 6
+    N = 1024 * (K + 1)
+    x: StereoSignal = rng.normal(size=(N, 2)).astype(np.float64)
+
+    y = _ola_reconstruct(x, ["ESH"] * K, win_type)
+
+    a = 1024
+    b = N - 1024
+    snr = _snr_db(x[a:b, :], y[a:b, :])
+    assert snr > 45.0
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ola_reconstruction_transition_sequence(win_type: WinType) -> None:
+    """
+    Transition sequence test matching the windowing logic:
+      OLS -> LSS -> ESH -> LPS -> OLS -> OLS
+    """
+    rng = np.random.default_rng(3)
+
+    frame_types: list[FrameType] = ["OLS", "LSS", "ESH", "LPS", "OLS", "OLS"]
+    K = len(frame_types)
+    N = 1024 * (K + 1)
+    x: StereoSignal = rng.normal(size=(N, 2)).astype(np.float64)
+
+    y = _ola_reconstruct(x, frame_types, win_type)
+
+    a = 1024
+    b = N - 1024
+    snr = _snr_db(x[a:b, :], y[a:b, :])
+    assert snr > 40.0
--- a/source/level_1/tests/test_filterbank_internal.py
+++ b/source/level_1/tests/test_filterbank_internal.py
@ -1,16 +1,33 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Filterbank internal (mdct) Tests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for Filterbank internal MDCT/IMDCT functionality.
+# ------------------------------------------------------------
+from __future__ import annotations
+
 import numpy as np
 import pytest

-from level_1.level_1 import _imdct, _mdct
+from core.aac_filterbank import _imdct, _mdct
+from core.aac_types import FloatArray, TimeSignal, MdctCoeffs

-# Helper "fixtures" for filterbank internals (MDCT/IMDCT)
-# -----------------------------------------------------------------------------

-def _assert_allclose(a: np.ndarray, b: np.ndarray, *, rtol: float, atol: float) -> None:
-    # Helper for consistent tolerances across tests.
+def _assert_allclose(a: FloatArray, b: FloatArray, *, rtol: float, atol: float) -> None:
+    """
+    Helper for consistent tolerances across tests.
+    """
    np.testing.assert_allclose(a, b, rtol=rtol, atol=atol)

-def _estimate_gain(y: np.ndarray, x: np.ndarray) -> float:
+
+def _estimate_gain(y: MdctCoeffs, x: MdctCoeffs) -> float:
    """
    Estimate scalar gain g such that y ~= g*x in least-squares sense.
    """
@ -28,18 +45,18 @@ def test_mdct_imdct_mdct_identity_up_to_gain(N: int) -> None:
    Consistency test in coefficient domain:
      mdct(imdct(X)) ~= g * X

-    For our chosen (non-orthonormal) scaling, g is expected to be close to 2.
+    For the chosen (non-orthonormal) scaling, g is expected to be close to 2.
    """
    rng = np.random.default_rng(0)
    K = N // 2

-    X = rng.normal(size=K).astype(np.float64)
-    x = _imdct(X)
-    X_hat = _mdct(x)
+    X: MdctCoeffs = rng.normal(size=K).astype(np.float64)
+    x: TimeSignal = _imdct(X)
+    X_hat: MdctCoeffs = _mdct(x)

    g = _estimate_gain(X_hat, X)
    _assert_allclose(X_hat, g * X, rtol=tolerance, atol=tolerance)
-    _assert_allclose(np.array([g]), np.array([2.0]), rtol=tolerance, atol=tolerance)
+    _assert_allclose(np.array([g], dtype=np.float64), np.array([2.0], dtype=np.float64), rtol=tolerance, atol=tolerance)


@pytest.mark.parametrize("N", [256, 2048])
@ -47,18 +64,16 @@ def test_mdct_linearity(N: int) -> None:
    """
    Linearity test:
      mdct(a*x + b*y) == a*mdct(x) + b*mdct(y)
-
-    This should hold up to numerical error.
    """
    rng = np.random.default_rng(1)
-    x = rng.normal(size=N).astype(np.float64)
-    y = rng.normal(size=N).astype(np.float64)
+    x: TimeSignal = rng.normal(size=N).astype(np.float64)
+    y: TimeSignal = rng.normal(size=N).astype(np.float64)

    a = 0.37
    b = -1.12

-    left = _mdct(a * x + b * y)
-    right = a * _mdct(x) + b * _mdct(y)
+    left: MdctCoeffs = _mdct(a * x + b * y)
+    right: MdctCoeffs = a * _mdct(x) + b * _mdct(y)

    _assert_allclose(left, right, rtol=tolerance, atol=tolerance)

@ -72,14 +87,14 @@ def test_imdct_linearity(N: int) -> None:
    rng = np.random.default_rng(2)
    K = N // 2

-    X = rng.normal(size=K).astype(np.float64)
-    Y = rng.normal(size=K).astype(np.float64)
+    X: MdctCoeffs = rng.normal(size=K).astype(np.float64)
+    Y: MdctCoeffs = rng.normal(size=K).astype(np.float64)

    a = -0.5
    b = 2.0

-    left = _imdct(a * X + b * Y)
-    right = a * _imdct(X) + b * _imdct(Y)
+    left: TimeSignal = _imdct(a * X + b * Y)
+    right: TimeSignal = a * _imdct(X) + b * _imdct(Y)

    _assert_allclose(left, right, rtol=tolerance, atol=tolerance)

@ -92,8 +107,8 @@ def test_mdct_imdct_outputs_are_finite(N: int) -> None:
    rng = np.random.default_rng(3)
    K = N // 2

-    x = rng.normal(size=N).astype(np.float64)
-    X = rng.normal(size=K).astype(np.float64)
+    x: TimeSignal = rng.normal(size=N).astype(np.float64)
+    X: MdctCoeffs = rng.normal(size=K).astype(np.float64)

    X1 = _mdct(x)
    x1 = _imdct(X)
--- a/source/level_1/core/aac_coder.py
+++ b/source/level_1/core/aac_coder.py
@ -0,0 +1,198 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - AAC Coder (Core)
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Level 1 AAC encoder orchestration.
+#   Keeps the same functional behavior as the original level_1 implementation:
+#   - Reads WAV via soundfile
+#   - Validates stereo and 48 kHz
+#   - Frames into 2048 samples with hop=1024 and zero padding at both ends
+#   - SSC decision uses next-frame attack detection
+#   - Filterbank analysis (MDCT)
+#   - Stores per-channel spectra in AACSeq1 schema:
+#       * ESH: (128, 8)
+#       * else: (1024, 1)
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Union
+
+import soundfile as sf
+
+from core.aac_configuration import WIN_TYPE
+from core.aac_filterbank import aac_filter_bank
+from core.aac_ssc import aac_SSC
+from core.aac_types import *
+
+
+# -----------------------------------------------------------------------------
+# Public helpers (useful for level_x demo wrappers)
+# -----------------------------------------------------------------------------
+
+def aac_read_wav_stereo_48k(filename_in: Union[str, Path]) -> tuple[StereoSignal, int]:
+    """
+    Read a WAV file using soundfile and validate the Level-1 assumptions.
+
+    Parameters
+    ----------
+    filename_in : Union[str, Path]
+        Input WAV filename.
+
+    Returns
+    -------
+    x : StereoSignal (np.ndarray)
+        Stereo samples as float64, shape (N, 2).
+    fs : int
+        Sampling rate (Hz). Must be 48000.
+
+    Raises
+    ------
+    ValueError
+        If the input is not stereo or the sampling rate is not 48 kHz.
+    """
+    filename_in = Path(filename_in)
+
+    x, fs = sf.read(str(filename_in), always_2d=True)
+    x = np.asarray(x, dtype=np.float64)
+
+    if x.shape[1] != 2:
+        raise ValueError("Input must be stereo (2 channels).")
+    if int(fs) != 48000:
+        raise ValueError("Input sampling rate must be 48 kHz.")
+
+    return x, int(fs)
+
+
+def aac_pack_frame_f_to_seq_channels(frame_type: FrameType, frame_f: FrameF) -> tuple[FrameChannelF, FrameChannelF]:
+    """
+    Convert the stereo FrameF returned by aac_filter_bank() into per-channel arrays
+    as required by the Level-1 AACSeq1 schema.
+
+    Parameters
+    ----------
+    frame_type : FrameType
+        "OLS" | "LSS" | "ESH" | "LPS".
+    frame_f : FrameF
+        Output of aac_filter_bank():
+        - If frame_type != "ESH": shape (1024, 2)
+        - If frame_type == "ESH": shape (128, 16) packed as [L0 R0 L1 R1 ... L7 R7]
+
+    Returns
+    -------
+    chl_f : FrameChannelF
+        Left channel coefficients:
+        - ESH: shape (128, 8)
+        - else: shape (1024, 1)
+    chr_f : FrameChannelF
+        Right channel coefficients:
+        - ESH: shape (128, 8)
+        - else: shape (1024, 1)
+    """
+    if frame_type == "ESH":
+        if frame_f.shape != (128, 16):
+            raise ValueError("For ESH, frame_f must have shape (128, 16).")
+
+        chl_f = np.empty((128, 8), dtype=np.float64)
+        chr_f = np.empty((128, 8), dtype=np.float64)
+        for j in range(8):
+            chl_f[:, j] = frame_f[:, 2 * j + 0]
+            chr_f[:, j] = frame_f[:, 2 * j + 1]
+        return chl_f, chr_f
+
+    # Non-ESH: store as (1024, 1) as required by the original Level-1 schema.
+    if frame_f.shape != (1024, 2):
+        raise ValueError("For OLS/LSS/LPS, frame_f must have shape (1024, 2).")
+
+    chl_f = frame_f[:, 0:1].astype(np.float64, copy=False)
+    chr_f = frame_f[:, 1:2].astype(np.float64, copy=False)
+    return chl_f, chr_f
+
+
+
+# -----------------------------------------------------------------------------
+# Level 1 encoder
+# -----------------------------------------------------------------------------
+
+def aac_coder_1(filename_in: Union[str, Path]) -> AACSeq1:
+    """
+    Level-1 AAC encoder.
+
+    This function preserves the behavior of the original level_1 implementation:
+    - Read stereo 48 kHz WAV
+    - Pad hop samples at start and hop samples at end
+    - Frame with win=2048, hop=1024
+    - Use SSC with next-frame lookahead
+    - Apply filterbank analysis
+    - Store per-channel coefficients using AACSeq1 schema
+
+    Parameters
+    ----------
+    filename_in : Union[str, Path]
+        Input WAV filename.
+        Assumption: stereo audio, sampling rate 48 kHz.
+
+    Returns
+    -------
+    AACSeq1
+        List of encoded frames (Level 1 schema).
+    """
+    x, fs = aac_read_wav_stereo_48k(filename_in)
+    _ = fs  # kept for clarity; The assignment assumes 48 kHz
+
+    hop = 1024
+    win = 2048
+
+    # Pad at the beginning to support the first overlap region.
+    # Tail padding is kept minimal; next-frame is padded on-the-fly when needed.
+    pad_pre = np.zeros((hop, 2), dtype=np.float64)
+    pad_post = np.zeros((hop, 2), dtype=np.float64)
+    x_pad = np.vstack([pad_pre, x, pad_post])
+
+    # Number of frames such that current frame fits; next frame will be padded if needed.
+    K = int((x_pad.shape[0] - win) // hop + 1)
+    if K <= 0:
+        raise ValueError("Input too short for framing.")
+
+    aac_seq: AACSeq1 = []
+    prev_frame_type: FrameType = "OLS"
+
+    win_type: WinType = WIN_TYPE
+
+    for i in range(K):
+        start = i * hop
+
+        frame_t: FrameT = x_pad[start:start + win, :]
+        if frame_t.shape != (win, 2):
+            # This should not happen due to K definition, but keep it explicit.
+            raise ValueError("Internal framing error: frame_t has wrong shape.")
+
+        next_t = x_pad[start + hop:start + hop + win, :]
+
+        # Ensure next_t is always (2048, 2) by zero-padding at the tail.
+        if next_t.shape[0] < win:
+            tail = np.zeros((win - next_t.shape[0], 2), dtype=np.float64)
+            next_t = np.vstack([next_t, tail])
+
+        frame_type = aac_SSC(frame_t, next_t, prev_frame_type)
+        frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+
+        chl_f, chr_f = aac_pack_frame_f_to_seq_channels(frame_type, frame_f)
+
+        aac_seq.append({
+            "frame_type": frame_type,
+            "win_type": win_type,
+            "chl": {"frame_F": chl_f},
+            "chr": {"frame_F": chr_f},
+        })
+
+        prev_frame_type = frame_type
+
+    return aac_seq
--- a/source/level_1/core/aac_configuration.py
+++ b/source/level_1/core/aac_configuration.py
@ -0,0 +1,22 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Configuration
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   This module contains the global configurations
+#
+# ------------------------------------------------------------
+from __future__ import annotations
+
+# Imports
+from core.aac_types import WinType
+
+# Window type
+# Options: "SIN", "KBD"
+WIN_TYPE: WinType = "SIN"
--- a/source/level_1/core/aac_decoder.py
+++ b/source/level_1/core/aac_decoder.py
@ -0,0 +1,166 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Inverse AAC Coder (Core)
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Level 1 AAC decoder orchestration (inverse of aac_coder_1()).
+#   Keeps the same functional behavior as the original level_1 implementation:
+#   - Re-pack per-channel spectra into FrameF expected by aac_i_filter_bank()
+#   - IMDCT synthesis per frame
+#   - Overlap-add with hop=1024
+#   - Remove encoder boundary padding: hop at start and hop at end
+#
+#   Note:
+#   This core module returns the reconstructed samples. Writing to disk is kept
+#   in level_x demos.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Union
+
+import soundfile as sf
+
+from core.aac_filterbank import aac_i_filter_bank
+from core.aac_types import *
+
+
+# -----------------------------------------------------------------------------
+# Public helpers (useful for level_x demo wrappers)
+# -----------------------------------------------------------------------------
+
+def aac_unpack_seq_channels_to_frame_f(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
+    """
+    Re-pack per-channel spectra from the Level-1 AACSeq1 schema into the stereo
+    FrameF container expected by aac_i_filter_bank().
+
+    Parameters
+    ----------
+    frame_type : FrameType
+        "OLS" | "LSS" | "ESH" | "LPS".
+    chl_f : FrameChannelF
+        Left channel coefficients:
+        - ESH: (128, 8)
+        - else: (1024, 1)
+    chr_f : FrameChannelF
+        Right channel coefficients:
+        - ESH: (128, 8)
+        - else: (1024, 1)
+
+    Returns
+    -------
+    FrameF
+        Stereo coefficients:
+        - ESH: (128, 16) packed as [L0 R0 L1 R1 ... L7 R7]
+        - else: (1024, 2)
+    """
+    if frame_type == "ESH":
+        if chl_f.shape != (128, 8) or chr_f.shape != (128, 8):
+            raise ValueError("ESH channel frame_F must have shape (128, 8).")
+
+        frame_f = np.empty((128, 16), dtype=np.float64)
+        for j in range(8):
+            frame_f[:, 2 * j + 0] = chl_f[:, j]
+            frame_f[:, 2 * j + 1] = chr_f[:, j]
+        return frame_f
+
+    # Non-ESH: expected (1024, 1) per channel in Level-1 schema.
+    if chl_f.shape != (1024, 1) or chr_f.shape != (1024, 1):
+        raise ValueError("Non-ESH channel frame_F must have shape (1024, 1).")
+
+    frame_f = np.empty((1024, 2), dtype=np.float64)
+    frame_f[:, 0] = chl_f[:, 0]
+    frame_f[:, 1] = chr_f[:, 0]
+    return frame_f
+
+
+def aac_remove_padding(y_pad: StereoSignal, hop: int = 1024) -> StereoSignal:
+    """
+    Remove the boundary padding that the Level-1 encoder adds:
+    hop samples at start and hop samples at end.
+
+    Parameters
+    ----------
+    y_pad : StereoSignal (np.ndarray)
+        Reconstructed padded stream, shape (N_pad, 2).
+    hop : int
+        Hop size in samples (default 1024).
+
+    Returns
+    -------
+    StereoSignal (np.ndarray)
+        Unpadded reconstructed stream, shape (N_pad - 2*hop, 2).
+
+    Raises
+    ------
+    ValueError
+        If y_pad is too short to unpad.
+    """
+    if y_pad.shape[0] < 2 * hop:
+        raise ValueError("Decoded stream too short to unpad.")
+    return y_pad[hop:-hop, :]
+
+
+# -----------------------------------------------------------------------------
+# Level 1 decoder (core)
+# -----------------------------------------------------------------------------
+
+def aac_decoder_1(aac_seq_1: AACSeq1, filename_out: Union[str, Path]) -> StereoSignal:
+    """
+    Level-1 AAC decoder (inverse of aac_coder_1()).
+
+    This function preserves the behavior of the original level_1 implementation:
+    - Reconstruct the full padded stream by overlap-adding K synthesized frames
+    - Remove hop padding at the beginning and hop padding at the end
+    - Write the reconstructed stereo WAV file (48 kHz)
+    - Return reconstructed stereo samples as float64
+
+    Parameters
+    ----------
+    aac_seq_1 : AACSeq1
+        Encoded sequence as produced by aac_coder_1().
+    filename_out : Union[str, Path]
+        Output WAV filename. Assumption: 48 kHz, stereo.
+
+    Returns
+    -------
+    StereoSignal
+        Decoded audio samples (time-domain), stereo, shape (N, 2), dtype float64.
+    """
+    filename_out = Path(filename_out)
+
+    hop = 1024
+    win = 2048
+    K = len(aac_seq_1)
+
+    # Output includes the encoder padding region, so we reconstruct the full padded stream.
+    # For K frames: last frame starts at (K-1)*hop and spans win,
+    # so total length = (K-1)*hop + win.
+    n_pad = (K - 1) * hop + win
+    y_pad: StereoSignal = np.zeros((n_pad, 2), dtype=np.float64)
+
+    for i, fr in enumerate(aac_seq_1):
+        frame_type: FrameType = fr["frame_type"]
+        win_type: WinType = fr["win_type"]
+
+        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
+        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
+
+        frame_f: FrameF = aac_unpack_seq_channels_to_frame_f(frame_type, chl_f, chr_f)
+        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
+
+        start = i * hop
+        y_pad[start:start + win, :] += frame_t_hat
+
+    y: StereoSignal = aac_remove_padding(y_pad, hop=hop)
+
+    # Level 1 assumption: 48 kHz output.
+    sf.write(str(filename_out), y, 48000)
+
+    return y
--- a/source/level_1/core/aac_filterbank.py
+++ b/source/level_1/core/aac_filterbank.py
@ -0,0 +1,454 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Filterbank module
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Filterbank stage (MDCT/IMDCT), windowing, ESH packing/unpacking
+#
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from core.aac_types import *
+
+from scipy.signal.windows import kaiser
+
+# Private helpers for Filterbank
+# ------------------------------------------------------------
+
+def _sin_window(N: int) -> Window:
+    """
+    Build a sinusoidal (SIN) window of length N.
+
+    The AAC sinusoid window is:
+        w[n] = sin(pi/N * (n + 0.5)),  for 0 <= n < N
+
+    Parameters
+    ----------
+    N : int
+        Window length in samples.
+
+    Returns
+    -------
+    Window
+        1-D array of shape (N, ) with dtype float64.
+    """
+    n = np.arange(N, dtype=np.float64)
+    return np.sin((np.pi / N) * (n + 0.5))
+
+
+def _kbd_window(N: int, alpha: float) -> Window:
+    """
+    Build a Kaiser-Bessel-Derived (KBD) window of length N.
+
+    This follows the standard KBD construction used in AAC:
+      1) Build a Kaiser kernel of length (N/2 + 1).
+      2) Form the left half by cumulative summation, normalization, and sqrt.
+      3) Mirror the left half to form the right half (symmetric full-length window).
+
+    Notes
+    -----
+    - N must be even (AAC uses N=2048 for long and N=256 for short).
+    - The assignment specifies alpha=6 for long windows and alpha=4 for short windows.
+    - The Kaiser beta parameter is commonly taken as beta = pi * alpha for this context.
+
+    Parameters
+    ----------
+    N : int
+        Window length in samples (must be even).
+    alpha : float
+        KBD alpha parameter.
+
+    Returns
+    -------
+    Window
+        1-D array of shape (N,) with dtype float64.
+    """
+    half = N // 2
+
+    # Kaiser kernel length: half + 1 samples (0 .. half)
+    # beta = pi * alpha per the usual correspondence with the ISO definition
+    kernel = kaiser(half + 1, beta=np.pi * alpha).astype(np.float64)
+
+    csum = np.cumsum(kernel)
+    denom = csum[-1]
+
+    w_left = np.sqrt(csum[:-1] / denom)  # length half, n = 0 .. half-1
+    w_right = w_left[::-1]               # mirror for second half
+
+    return np.concatenate([w_left, w_right])
+
+
+def _long_window(win_type: WinType) -> Window:
+    """
+    Return the long AAC window (length 2048) for the selected window family.
+
+    Parameters
+    ----------
+    win_type : WinType
+        Either "SIN" or "KBD".
+
+    Returns
+    -------
+    Window
+        1-D array of shape (2048,) with dtype float64.
+    """
+    if win_type == "SIN":
+        return _sin_window(2048)
+    if win_type == "KBD":
+        # Assignment-specific alpha values
+        return _kbd_window(2048, alpha=6.0)
+    raise ValueError(f"Invalid win_type: {win_type!r}")
+
+
+def _short_window(win_type: WinType) -> Window:
+    """
+    Return the short AAC window (length 256) for the selected window family.
+
+    Parameters
+    ----------
+    win_type : WinType
+        Either "SIN" or "KBD".
+
+    Returns
+    -------
+    Window
+        1-D array of shape (256,) with dtype float64.
+    """
+    if win_type == "SIN":
+        return _sin_window(256)
+    if win_type == "KBD":
+        # Assignment-specific alpha values
+        return _kbd_window(256, alpha=4.0)
+    raise ValueError(f"Invalid win_type: {win_type!r}")
+
+
+def _window_sequence(frame_type: FrameType, win_type: WinType) -> Window:
+    """
+    Build the 2048-sample analysis/synthesis window for OLS/LSS/LPS.
+
+    In this assignment we assume a single window family is used globally
+    (no mixed KBD/SIN halves). Therefore, both the long and short windows
+    are drawn from the same family.
+
+    For frame_type:
+    - "OLS": return the long window Wl (2048).
+    - "LSS": construct [Wl_left(1024), ones(448), Ws_right(128), zeros(448)].
+    - "LPS": construct [zeros(448), Ws_left(128), ones(448), Wl_right(1024)].
+
+    Parameters
+    ----------
+    frame_type : FrameType
+        One of "OLS", "LSS", "LPS".
+    win_type : WinType
+        Either "SIN" or "KBD".
+
+    Returns
+    -------
+    Window
+        1-D array of shape (2048,) with dtype float64.
+    """
+    wL = _long_window(win_type)   # length 2048
+    wS = _short_window(win_type)  # length 256
+
+    if frame_type == "OLS":
+        return wL
+
+    if frame_type == "LSS":
+        # 0..1023: left half of long window
+        # 1024..1471: ones (448 samples)
+        # 1472..1599: right half of short window (128 samples)
+        # 1600..2047: zeros (448 samples)
+        out = np.zeros(2048, dtype=np.float64)
+        out[0:1024] = wL[0:1024]
+        out[1024:1472] = 1.0
+        out[1472:1600] = wS[128:256]
+        out[1600:2048] = 0.0
+        return out
+
+    if frame_type == "LPS":
+        # 0..447: zeros (448)
+        # 448..575: left half of short window (128)
+        # 576..1023: ones (448)
+        # 1024..2047: right half of long window (1024)
+        out = np.zeros(2048, dtype=np.float64)
+        out[0:448] = 0.0
+        out[448:576] = wS[0:128]
+        out[576:1024] = 1.0
+        out[1024:2048] = wL[1024:2048]
+        return out
+
+    raise ValueError(f"Invalid frame_type for long window sequence: {frame_type!r}")
+
+
+def _mdct(s: TimeSignal) -> MdctCoeffs:
+    """
+    MDCT (direct form) as specified in the assignment.
+
+    Parameters
+    ----------
+    s : TimeSignal
+        Windowed time samples, 1-D array of length N (N = 2048 or 256).
+
+    Returns
+    -------
+    MdctCoeffs
+        MDCT coefficients, 1-D array of length N/2.
+
+    Definition
+    ----------
+    X[k] = 2 * sum_{n=0..N-1} s[n] * cos((2*pi/N) * (n + n0) * (k + 1/2)),
+    where n0 = (N/2 + 1)/2.
+    """
+    s = np.asarray(s, dtype=np.float64).reshape(-1)
+    N = int(s.shape[0])
+    if N not in (2048, 256):
+        raise ValueError("MDCT input length must be 2048 or 256.")
+
+    n0 = (N / 2.0 + 1.0) / 2.0
+    n = np.arange(N, dtype=np.float64) + n0
+    k = np.arange(N // 2, dtype=np.float64) + 0.5
+
+    C = np.cos((2.0 * np.pi / N) * np.outer(n, k))  # (N, N/2)
+    X = 2.0 * (s @ C)  # (N/2,)
+    return X
+
+
+def _imdct(X: MdctCoeffs) -> TimeSignal:
+    """
+    IMDCT (direct form) as specified in the assignment.
+
+    Parameters
+    ----------
+    X : MdctCoeffs
+        MDCT coefficients, 1-D array of length K (K = 1024 or 128).
+
+    Returns
+    -------
+    TimeSignal
+        Reconstructed time samples, 1-D array of length N = 2K.
+
+    Definition
+    ----------
+    s[n] = (2/N) * sum_{k=0..N/2-1} X[k] * cos((2*pi/N) * (n + n0) * (k + 1/2)),
+    where n0 = (N/2 + 1)/2.
+    """
+    X = np.asarray(X, dtype=np.float64).reshape(-1)
+    K = int(X.shape[0])
+    if K not in (1024, 128):
+        raise ValueError("IMDCT input length must be 1024 or 128.")
+
+    N = 2 * K
+    n0 = (N / 2.0 + 1.0) / 2.0
+
+    n = np.arange(N, dtype=np.float64) + n0
+    k = np.arange(K, dtype=np.float64) + 0.5
+
+    C = np.cos((2.0 * np.pi / N) * np.outer(n, k))  # (N, K)
+    s = (2.0 / N) * (C @ X)  # (N,)
+    return s
+
+
+def _filter_bank_esh_channel(x_ch: FrameChannelT, win_type: WinType) -> FrameChannelF:
+    """
+    ESH analysis for one channel.
+
+    Parameters
+    ----------
+    x_ch : FrameChannelT
+        Time-domain channel frame (expected shape: (2048,)).
+    win_type : WinType
+        Window family ("KBD" or "SIN").
+
+    Returns
+    -------
+    FrameChannelF
+        Array of shape (128, 8). Column j contains the 128 MDCT coefficients
+        of the j-th short window.
+    """
+    wS = _short_window(win_type)  # (256,)
+    X_esh = np.empty((128, 8), dtype=np.float64)
+
+    # ESH subwindows are taken from the central region:
+    # start positions: 448 + 128*j, j = 0..7
+    for j in range(8):
+        start = 448 + 128 * j
+        seg = x_ch[start:start + 256] * wS  # (256,)
+        X_esh[:, j] = _mdct(seg)           # (128,)
+
+    return X_esh
+
+
+def _unpack_esh(frame_F: FrameF) -> tuple[FrameChannelF, FrameChannelF]:
+    """
+    Unpack ESH spectrum from shape (128, 16) into per-channel arrays (128, 8).
+
+    Parameters
+    ----------
+    frame_F : FrameF
+        Packed ESH spectrum (expected shape: (128, 16)).
+
+    Returns
+    -------
+    left : FrameChannelF
+        Left channel spectrum, shape (128, 8).
+    right : FrameChannelF
+        Right channel spectrum, shape (128, 8).
+
+    Notes
+    -----
+    Inverse mapping of the packing used in aac_filter_bank():
+      packed[:, 2*j]   = left[:, j]
+      packed[:, 2*j+1] = right[:, j]
+    """
+    if frame_F.shape != (128, 16):
+        raise ValueError("ESH frame_F must have shape (128, 16).")
+
+    left = np.empty((128, 8), dtype=np.float64)
+    right = np.empty((128, 8), dtype=np.float64)
+    for j in range(8):
+        left[:, j] = frame_F[:, 2 * j + 0]
+        right[:, j] = frame_F[:, 2 * j + 1]
+    return left, right
+
+
+def _i_filter_bank_esh_channel(X_esh: FrameChannelF, win_type: WinType) -> FrameChannelT:
+    """
+    ESH synthesis for one channel.
+
+    Parameters
+    ----------
+    X_esh : FrameChannelF
+        MDCT coefficients for 8 short windows (expected shape: (128, 8)).
+    win_type : WinType
+        Window family ("KBD" or "SIN").
+
+    Returns
+    -------
+    FrameChannelT
+        Time-domain channel contribution, shape (2048,).
+        This is already overlap-added internally for the 8 short blocks and
+        ready for OLA at the caller level.
+    """
+    if X_esh.shape != (128, 8):
+        raise ValueError("X_esh must have shape (128, 8).")
+
+    wS = _short_window(win_type)  # (256,)
+    out = np.zeros(2048, dtype=np.float64)
+
+    # Each short IMDCT returns 256 samples. Place them at:
+    # start = 448 + 128*j, j=0..7 (50% overlap)
+    for j in range(8):
+        seg = _imdct(X_esh[:, j]) * wS  # (256,)
+        start = 448 + 128 * j
+        out[start:start + 256] += seg
+
+    return out
+
+
+# -----------------------------------------------------------------------------
+# Public Function prototypes (Level 1)
+# -----------------------------------------------------------------------------
+
+def aac_filter_bank(frame_T: FrameT, frame_type: FrameType, win_type: WinType) -> FrameF:
+    """
+    Filterbank stage (MDCT analysis).
+
+    Parameters
+    ----------
+    frame_T : FrameT
+        Time-domain frame, stereo, shape (2048, 2).
+    frame_type : FrameType
+        Type of the frame under encoding ("OLS"|"LSS"|"ESH"|"LPS").
+    win_type : WinType
+        Window type ("KBD" or "SIN") used for the current frame.
+
+    Returns
+    -------
+    frame_F : FrameF
+        Frequency-domain MDCT coefficients:
+        - If frame_type in {"OLS","LSS","LPS"}: array shape (1024, 2)
+          containing MDCT coefficients for both channels.
+        - If frame_type == "ESH": contains 8 subframes, each subframe has shape (128,2),
+          placed in columns according to subframe order, i.e. overall shape (128, 16).
+    """
+    if frame_T.shape != (2048, 2):
+        raise ValueError("frame_T must have shape (2048, 2).")
+
+    xL :FrameChannelT = frame_T[:, 0].astype(np.float64, copy=False)
+    xR :FrameChannelT = frame_T[:, 1].astype(np.float64, copy=False)
+
+    if frame_type in ("OLS", "LSS", "LPS"):
+        w = _window_sequence(frame_type, win_type)  # length 2048
+        XL = _mdct(xL * w)  # length 1024
+        XR = _mdct(xR * w)  # length 1024
+        out = np.empty((1024, 2), dtype=np.float64)
+        out[:, 0] = XL
+        out[:, 1] = XR
+        return out
+
+    if frame_type == "ESH":
+        Xl = _filter_bank_esh_channel(xL, win_type)  # (128, 8)
+        Xr = _filter_bank_esh_channel(xR, win_type)  # (128, 8)
+
+        # Pack into (128, 16): each subframe as (128,2) placed in columns
+        out = np.empty((128, 16), dtype=np.float64)
+        for j in range(8):
+            out[:, 2 * j + 0] = Xl[:, j]
+            out[:, 2 * j + 1] = Xr[:, j]
+        return out
+
+    raise ValueError(f"Invalid frame_type: {frame_type!r}")
+
+
+def aac_i_filter_bank(frame_F: FrameF, frame_type: FrameType, win_type: WinType) -> FrameT:
+    """
+    Inverse filterbank (IMDCT synthesis).
+
+    Parameters
+    ----------
+    frame_F : FrameF
+        Frequency-domain MDCT coefficients as produced by filter_bank().
+    frame_type : FrameType
+        Frame type ("OLS"|"LSS"|"ESH"|"LPS").
+    win_type : WinType
+        Window type ("KBD" or "SIN").
+
+    Returns
+    -------
+    frame_T : FrameT
+        Reconstructed time-domain frame, stereo, shape (2048, 2).
+    """
+    if frame_type in ("OLS", "LSS", "LPS"):
+        if frame_F.shape != (1024, 2):
+            raise ValueError("For OLS/LSS/LPS, frame_F must have shape (1024, 2).")
+
+        w = _window_sequence(frame_type, win_type)
+
+        xL = _imdct(frame_F[:, 0]) * w
+        xR = _imdct(frame_F[:, 1]) * w
+
+        out = np.empty((2048, 2), dtype=np.float64)
+        out[:, 0] = xL
+        out[:, 1] = xR
+        return out
+
+    if frame_type == "ESH":
+        if frame_F.shape != (128, 16):
+            raise ValueError("For ESH, frame_F must have shape (128, 16).")
+
+        Xl, Xr = _unpack_esh(frame_F)
+        xL = _i_filter_bank_esh_channel(Xl, win_type)
+        xR = _i_filter_bank_esh_channel(Xr, win_type)
+
+        out = np.empty((2048, 2), dtype=np.float64)
+        out[:, 0] = xL
+        out[:, 1] = xR
+        return out
+
+    raise ValueError(f"Invalid frame_type: {frame_type!r}")
--- a/source/level_1/core/aac_ssc.py
+++ b/source/level_1/core/aac_ssc.py
@ -0,0 +1,217 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Sequence Segmentation Control module
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Sequence Segmentation Control module (SSC).
+#   Selects and returns the frame type based on input parameters.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from typing import Dict, Tuple
+from core.aac_types import FrameType, FrameT, FrameChannelT
+
+import numpy as np
+
+# -----------------------------------------------------------------------------
+# Private helpers for SSC
+# -----------------------------------------------------------------------------
+
+# See Table 1 in mm-2025-hw-v0.1.pdf
+STEREO_MERGE_TABLE: Dict[Tuple[FrameType, FrameType], FrameType] = {
+    ("OLS", "OLS"): "OLS",
+    ("OLS", "LSS"): "LSS",
+    ("OLS", "ESH"): "ESH",
+    ("OLS", "LPS"): "LPS",
+    ("LSS", "OLS"): "LSS",
+    ("LSS", "LSS"): "LSS",
+    ("LSS", "ESH"): "ESH",
+    ("LSS", "LPS"): "ESH",
+    ("ESH", "OLS"): "ESH",
+    ("ESH", "LSS"): "ESH",
+    ("ESH", "ESH"): "ESH",
+    ("ESH", "LPS"): "ESH",
+    ("LPS", "OLS"): "LPS",
+    ("LPS", "LSS"): "ESH",
+    ("LPS", "ESH"): "ESH",
+    ("LPS", "LPS"): "LPS",
+}
+
+
+def _detect_attack(next_frame_channel: FrameChannelT) -> bool:
+    """
+    Detect whether the *next* frame (single channel) implies an attack, i.e. ESH
+    according to the assignment's criterion.
+
+    Parameters
+    ----------
+    next_frame_channel : FrameChannelT
+        One channel of next_frame_T (expected shape: (2048,)).
+
+    Returns
+    -------
+    bool
+        True if an attack is detected (=> next frame predicted ESH), else False.
+
+    Notes
+    -----
+    The criterion is implemented as described in the spec:
+
+    1) Apply the high-pass filter:
+           H(z) = (1 - z^-1) / (1 - 0.5 z^-1)
+       implemented in the time domain as:
+           y[n] = x[n] - x[n-1] + 0.5*y[n-1]
+
+    2) Split y into 16 segments of length 128 and compute segment energies s[l].
+
+    3) Compute the ratio:
+           ds[l] = s[l] / s[l-1]
+
+    4) An attack exists if there exists l in {1..7} such that:
+           s[l] > 1e-3  and  ds[l] > 10
+    """
+    # Local alias; expected to be a 1-D array of length 2048.
+    x = next_frame_channel
+
+    # High-pass filter reference implementation (scalar recurrence).
+    y = np.zeros_like(x)
+    prev_x = 0.0
+    prev_y = 0.0
+    for n in range(x.shape[0]):
+        xn = float(x[n])
+        yn = (xn - prev_x) + 0.5 * prev_y
+        y[n] = yn
+        prev_x = xn
+        prev_y = yn
+
+    # Segment energies over 16 blocks of 128 samples.
+    s = np.empty(16, dtype=np.float64)
+    for l in range(16):
+        a = l * 128
+        b = (l + 1) * 128
+        seg = y[a:b]
+        s[l] = float(np.sum(seg * seg))
+
+    # ds[l] for l>=1. For l=0 not defined, keep 0.
+    ds = np.zeros(16, dtype=np.float64)
+    eps = 1e-12  # Avoid division by zero without materially changing the logic.
+    for l in range(1, 16):
+        ds[l] = s[l] / max(s[l - 1], eps)
+
+    # Spec: check l in {1..7}.
+    for l in range(1, 8):
+        if (s[l] > 1e-3) and (ds[l] > 10.0):
+            return True
+
+    return False
+
+
+def _decide_frame_type(prev_frame_type: FrameType, attack: bool) -> FrameType:
+    """
+    Decide the current frame type for a single channel based on the previous
+    frame type and whether the next frame is predicted to be ESH.
+
+    Rules (spec):
+
+    - If prev is "LSS" => current is "ESH"
+    - If prev is "LPS" => current is "OLS"
+    - If prev is "OLS" => current is "LSS" if attack else "OLS"
+    - If prev is "ESH" => current is "ESH" if attack else "LPS"
+
+    Parameters
+    ----------
+    prev_frame_type : FrameType
+        Previous frame type (one of "OLS", "LSS", "ESH", "LPS").
+    attack : bool
+        True if the next frame is predicted ESH for this channel.
+
+    Returns
+    -------
+    FrameType
+        The per-channel decision for the current frame.
+
+    """
+    if prev_frame_type == "LSS":
+        return "ESH"
+    if prev_frame_type == "LPS":
+        return "OLS"
+    if prev_frame_type == "OLS":
+        return "LSS" if attack else "OLS"
+    if prev_frame_type == "ESH":
+        return "ESH" if attack else "LPS"
+
+    raise ValueError(f"Invalid prev_frame_type: {prev_frame_type!r}")
+
+
+def _stereo_merge(ft_l: FrameType, ft_r: FrameType) -> FrameType:
+    """
+    Merge per-channel frame type decisions into one common frame type using
+    the stereo merge table from the spec.
+
+    Parameters
+    ----------
+    ft_l : FrameType
+        Frame type decision for the left channel.
+    ft_r : FrameType
+        Frame type decision for the right channel.
+
+    Returns
+    -------
+    FrameType
+        The merged common frame type.
+    """
+    try:
+        return STEREO_MERGE_TABLE[(ft_l, ft_r)]
+    except KeyError as e:
+        raise ValueError(f"Invalid stereo merge pair: {(ft_l, ft_r)}") from e
+
+
+# -----------------------------------------------------------------------------
+# Public Function prototypes (Level 1)
+# -----------------------------------------------------------------------------
+
+def aac_SSC(frame_T: FrameT, next_frame_T: FrameT, prev_frame_type: FrameType) -> FrameType:
+    """
+    Sequence Segmentation Control (SSC).
+
+    Select and return the frame type for the current frame (i) based on:
+    - the current time-domain frame (stereo),
+    - the next time-domain frame (stereo), used for attack detection,
+    - the previous frame type.
+
+    Parameters
+    ----------
+    frame_T : FrameT
+        Current time-domain frame i (expected shape: (2048, 2)).
+    next_frame_T : FrameT
+        Next time-domain frame (i+1), used to decide transitions to/from ESH
+        (expected shape: (2048, 2)).
+    prev_frame_type : FrameType
+        Frame type chosen for the previous frame (i-1).
+
+    Returns
+    -------
+    FrameType
+        One of: "OLS", "LSS", "ESH", "LPS".
+    """
+    if frame_T.shape != (2048, 2):
+        raise ValueError("frame_T must have shape (2048, 2).")
+    if next_frame_T.shape != (2048, 2):
+        raise ValueError("next_frame_T must have shape (2048, 2).")
+
+    # Detect attack independently per channel on the next frame.
+    attack_l = _detect_attack(next_frame_T[:, 0])
+    attack_r = _detect_attack(next_frame_T[:, 1])
+
+    # Decide per-channel type based on shared prev_frame_type.
+    ft_l = _decide_frame_type(prev_frame_type, attack_l)
+    ft_r = _decide_frame_type(prev_frame_type, attack_r)
+
+    # Stereo merge as per the spec table.
+    return _stereo_merge(ft_l, ft_r)
--- a/source/level_1/core/aac_types.py
+++ b/source/level_1/core/aac_types.py
@ -0,0 +1,193 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Public Type Aliases
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   This module implements Public Type aliases
+#
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from typing import List, Literal, TypeAlias, TypedDict
+import numpy as np
+from numpy.typing import NDArray
+
+# -----------------------------------------------------------------------------
+# Code enums (for readability; not intended to enforce shapes/lengths)
+# -----------------------------------------------------------------------------
+
+FrameType: TypeAlias = Literal["OLS", "LSS", "ESH", "LPS"]
+"""
+Frame type codes (AAC):
+- "OLS": ONLY_LONG_SEQUENCE
+- "LSS": LONG_START_SEQUENCE
+- "ESH": EIGHT_SHORT_SEQUENCE
+- "LPS": LONG_STOP_SEQUENCE
+"""
+
+WinType: TypeAlias = Literal["KBD", "SIN"]
+"""
+Window type codes (AAC):
+- "KBD": Kaiser-Bessel-Derived
+- "SIN": sinusoid
+"""
+
+ChannelKey: TypeAlias = Literal["chl", "chr"]
+"""Channel dictionary keys used in Level 1 payloads."""
+
+
+# -----------------------------------------------------------------------------
+# Array “semantic” aliases
+#
+# Goal: communicate meaning (time/frequency/window, stereo/channel) without
+# forcing strict shapes in the type system.
+# -----------------------------------------------------------------------------
+
+FloatArray: TypeAlias = NDArray[np.float64]
+"""
+Generic float64 NumPy array.
+
+Note:
+- We standardize internal numeric computations to float64 for stability and
+  reproducibility. External I/O can still be float32, but we convert at the
+  boundaries.
+"""
+
+Window: TypeAlias = FloatArray
+"""
+Time-domain window (weighting sequence), 1-D.
+
+Typical lengths in this assignment:
+- Long: 2048
+- Short: 256
+- Window sequences for LSS/LPS are also 2048
+
+Expected shape: (N,)
+dtype: float64
+"""
+
+TimeSignal: TypeAlias = FloatArray
+"""
+Time-domain signal samples, typically 1-D.
+
+Examples:
+- Windowed MDCT input: shape (N,)
+- IMDCT output: shape (N,)
+
+dtype: float64
+"""
+
+StereoSignal: TypeAlias = FloatArray
+"""
+Time-domain stereo signal stream.
+
+Expected (typical) shape: (N, 2)
+- axis 0: time samples
+- axis 1: channels [L, R]
+
+dtype: float64
+"""
+
+MdctCoeffs: TypeAlias = FloatArray
+"""
+MDCT coefficient vector, typically 1-D.
+
+Examples:
+- Long: shape (1024,)
+- Short: shape (128,)
+
+dtype: float64
+"""
+
+
+FrameT: TypeAlias = FloatArray
+"""
+Time-domain frame (stereo), as used by the filterbank input/output.
+
+Expected (typical) shape for stereo: (2048, 2)
+- axis 0: time samples
+- axis 1: channels [L, R]
+
+dtype: float64
+"""
+
+FrameChannelT: TypeAlias = FloatArray
+"""
+Time-domain single-channel frame.
+
+Expected (typical) shape: (2048,)
+
+dtype: float64
+"""
+
+FrameF: TypeAlias = FloatArray
+"""
+Frequency-domain frame (MDCT coefficients), stereo container.
+
+Typical shapes (Level 1):
+- If frame_type in {"OLS","LSS","LPS"}: (1024, 2)
+- If frame_type == "ESH": (128, 16)
+
+Rationale for ESH (128, 16):
+- 8 short subframes per channel => 8 * 2 = 16 columns total
+- Each short subframe per stereo is (128, 2), flattened into columns
+  in subframe order: [sf0_L, sf0_R, sf1_L, sf1_R, ..., sf7_L, sf7_R]
+
+dtype: float64
+"""
+
+FrameChannelF: TypeAlias = FloatArray
+"""
+Frequency-domain single-channel frame (MDCT coefficients).
+
+Typical shapes (Level 1):
+- If frame_type in {"OLS","LSS","LPS"}: (1024,)
+- If frame_type == "ESH": (128, 8)  (8 short subframes for one channel)
+
+dtype: float64
+"""
+
+
+# -----------------------------------------------------------------------------
+# Level 1 AAC sequence payload types
+# -----------------------------------------------------------------------------
+
+class AACChannelFrameF(TypedDict):
+    """
+    Per-channel payload for aac_seq_1[i]["chl"] or ["chr"] (Level 1).
+
+    Keys
+    ----
+    frame_F:
+        The MDCT coefficients for ONE channel.
+        Typical shapes:
+        - ESH: (128, 8)   (8 short subframes)
+        - else: (1024, )
+    """
+    frame_F: FrameChannelF
+
+
+class AACSeq1Frame(TypedDict):
+    """
+    One frame dictionary element of aac_seq_1 (Level 1).
+    """
+    frame_type: FrameType
+    win_type: WinType
+    chl: AACChannelFrameF
+    chr: AACChannelFrameF
+
+
+AACSeq1: TypeAlias = List[AACSeq1Frame]
+"""
+AAC sequence for Level 1:
+List of length K (K = number of frames).
+
+Each element is a dict with keys:
+- "frame_type", "win_type", "chl", "chr"
+"""
--- a/source/level_1/core/tests/test_SSC.py
+++ b/source/level_1/core/tests/test_SSC.py
@ -0,0 +1,234 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Sequence Segmentation Control Tests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for Sequence Segmentation Control module (SSC).
+# ------------------------------------------------------------
+
+from __future__ import annotations
+
+import numpy as np
+
+from core.aac_ssc import aac_SSC
+from core.aac_types import FrameT
+
+# -----------------------------------------------------------------------------
+# Helper fixtures for SSC
+# -----------------------------------------------------------------------------
+
+def _next_frame_no_attack() -> FrameT:
+    """
+    Build a next_frame_T that must NOT trigger ESH detection.
+
+    Uses exact zeros so all segment energies are zero and the condition
+    s[l] > 1e-3 cannot hold for any l.
+    """
+    return np.zeros((2048, 2), dtype=np.float64)
+
+
+def _next_frame_strong_attack(
+    *,
+    attack_left: bool,
+    attack_right: bool,
+    segment_l: int = 4,
+    baseline: float = 1e-6,
+    burst_amp: float = 1.0,
+) -> FrameT:
+    """
+    Build a next_frame_T (2048x2) that should trigger ESH detection on selected channels.
+
+    Attack criterion (spec):
+      Attack exists if there exists l in {1..7} such that:
+        s[l] > 1e-3  and  ds[l] > 10,
+      where s[l] is the energy of segment l (length 128) after high-pass filtering,
+      and ds[l] = s[l] / s[l-1].
+
+    Construction:
+    - A small baseline is added everywhere to avoid relying on the epsilon guard in ds,
+      keeping ds behavior stable/reproducible.
+    - A strong burst is added inside a chosen segment l in 1..7.
+    """
+    if not (1 <= segment_l <= 7):
+        raise ValueError(f"segment_l must be in [1, 7], got {segment_l}.")
+
+    x = np.full((2048, 2), baseline, dtype=np.float64)
+
+    a = segment_l * 128
+    b = (segment_l + 1) * 128
+
+    if attack_left:
+        x[a:b, 0] += burst_amp
+    if attack_right:
+        x[a:b, 1] += burst_amp
+
+    return x
+
+
+def _next_frame_below_s_threshold(
+    *,
+    left: bool,
+    right: bool,
+    segment_l: int = 4,
+    impulse_amp: float = 0.01,
+) -> FrameT:
+    """
+    Construct a next_frame_T where s[l] is below 1e-3, so ESH must NOT be triggered,
+    even if the ratio ds[l] could be large.
+
+    We place a single impulse of amplitude 'impulse_amp' inside one segment.
+    Approx. segment energy: s[l] ~= impulse_amp^2.
+
+    Example:
+      impulse_amp = 0.01 => s[l] ~= 1e-4 < 1e-3
+    """
+    if not (1 <= segment_l <= 7):
+        raise ValueError(f"segment_l must be in [1, 7], got {segment_l}.")
+
+    x = np.zeros((2048, 2), dtype=np.float64)
+
+    idx = segment_l * 128 + 10  # inside segment l
+    if left:
+        x[idx, 0] = impulse_amp
+    if right:
+        x[idx, 1] = impulse_amp
+
+    return x
+
+
+# -----------------------------------------------------------------------------
+# 1) Fixed/mandatory cases (prev frame type forces current type)
+# -----------------------------------------------------------------------------
+
+def test_ssc_fixed_cases_prev_lss_and_lps() -> None:
+    """
+    Spec:
+      - If prev was LSS => current MUST be ESH
+      - If prev was LPS => current MUST be OLS
+    independent of attack detection on (i+1).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    next_attack = _next_frame_strong_attack(attack_left=True, attack_right=True)
+
+    out1 = aac_SSC(frame_t, next_attack, "LSS")
+    assert out1 == "ESH"
+
+    out2 = aac_SSC(frame_t, next_attack, "LPS")
+    assert out2 == "OLS"
+
+
+# -----------------------------------------------------------------------------
+# 2) Cases requiring next-frame ESH prediction (attack computation)
+# -----------------------------------------------------------------------------
+
+def test_prev_ols_next_not_esh_returns_ols() -> None:
+    """
+    If prev=OLS, current is:
+      - LSS iff (i+1) is predicted ESH
+      - else OLS
+    Here: no attack => expect OLS.
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_no_attack()
+
+    out = aac_SSC(frame_t, next_t, "OLS")
+    assert out == "OLS"
+
+
+def test_prev_ols_next_esh_both_channels_returns_lss() -> None:
+    """
+    prev=OLS and next predicted ESH for both channels:
+      per-channel: LSS, LSS
+      merged: LSS
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_strong_attack(attack_left=True, attack_right=True)
+
+    out = aac_SSC(frame_t, next_t, "OLS")
+    assert out == "LSS"
+
+
+def test_prev_ols_next_esh_one_channel_returns_lss() -> None:
+    """
+    prev=OLS:
+      - one channel predicts ESH => LSS
+      - other channel predicts not ESH => OLS
+    Merge table: OLS + LSS => LSS (either side).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    next1_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
+    out1 = aac_SSC(frame_t, next1_t, "OLS")
+    assert out1 == "LSS"
+
+    next2_t = _next_frame_strong_attack(attack_left=False, attack_right=True)
+    out2 = aac_SSC(frame_t, next2_t, "OLS")
+    assert out2 == "LSS"
+
+
+def test_prev_esh_next_esh_both_channels_returns_esh() -> None:
+    """
+    prev=ESH and next predicted ESH for both channels:
+      per-channel: ESH, ESH
+      merged: ESH
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_strong_attack(attack_left=True, attack_right=True)
+
+    out = aac_SSC(frame_t, next_t, "ESH")
+    assert out == "ESH"
+
+
+def test_prev_esh_next_not_esh_both_channels_returns_lps() -> None:
+    """
+    prev=ESH and next not predicted ESH for both channels:
+      per-channel: LPS, LPS
+      merged: LPS
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_no_attack()
+
+    out = aac_SSC(frame_t, next_t, "ESH")
+    assert out == "LPS"
+
+
+def test_prev_esh_next_esh_one_channel_merged_is_esh() -> None:
+    """
+    prev=ESH:
+      - one channel predicts ESH => ESH
+      - other channel predicts not ESH => LPS
+    Merge table: ESH + LPS => ESH (either side).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    next1_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
+    out1 = aac_SSC(frame_t, next1_t, "ESH")
+    assert out1 == "ESH"
+
+    next2_t = _next_frame_strong_attack(attack_left=False, attack_right=True)
+    out2 = aac_SSC(frame_t, next2_t, "ESH")
+    assert out2 == "ESH"
+
+
+def test_threshold_s_must_exceed_1e_3() -> None:
+    """
+    Spec: next frame is predicted ESH only if:
+      s[l] > 1e-3  AND  ds[l] > 10
+    for some l in 1..7.
+
+    This test checks the necessity of the s[l] threshold:
+      - Create a frame with s[l] ~= 1e-4 < 1e-3 (single impulse with amp 0.01).
+      - Expect: not classified as ESH -> for prev=OLS return OLS.
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    next_t = _next_frame_below_s_threshold(left=True, right=True, impulse_amp=0.01)
+
+    out = aac_SSC(frame_t, next_t, "OLS")
+    assert out == "OLS"
--- a/source/level_1/core/tests/test_aac_coder_decoder.py
+++ b/source/level_1/core/tests/test_aac_coder_decoder.py
@ -0,0 +1,156 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - AAC Coder/DecoderTests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for AAC Coder/Decoder module.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+import soundfile as sf
+
+from core.aac_coder import aac_coder_1
+from core.aac_decoder import aac_decoder_1
+from core.aac_types import *
+
+
+# Helper "fixtures" for aac_coder_1 / i_aac_coder_1
+# -----------------------------------------------------------------------------
+
+def _snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
+    """
+    Compute overall SNR (dB) over all samples and channels after aligning lengths.
+
+    Parameters
+    ----------
+    x_ref : StereoSignal
+        Reference signal, shape (N, 2) typical.
+    x_hat : StereoSignal
+        Reconstructed signal, shape (M, 2) typical.
+
+    Returns
+    -------
+    float
+        SNR in dB.
+        - Returns +inf if noise power is zero.
+        - Returns -inf if signal power is zero.
+    """
+    x_ref = np.asarray(x_ref, dtype=np.float64)
+    x_hat = np.asarray(x_hat, dtype=np.float64)
+
+    # Be conservative: align lengths and common channels.
+    if x_ref.ndim == 1:
+        x_ref = x_ref.reshape(-1, 1)
+    if x_hat.ndim == 1:
+        x_hat = x_hat.reshape(-1, 1)
+
+    n = min(x_ref.shape[0], x_hat.shape[0])
+    c = min(x_ref.shape[1], x_hat.shape[1])
+
+    x_ref = x_ref[:n, :c]
+    x_hat = x_hat[:n, :c]
+
+    err = x_ref - x_hat
+    ps = float(np.sum(x_ref * x_ref))
+    pn = float(np.sum(err * err))
+
+    if pn <= 0.0:
+        return float("inf")
+    if ps <= 0.0:
+        return float("-inf")
+
+    return float(10.0 * np.log10(ps / pn))
+
+
+@pytest.fixture()
+def tmp_stereo_wav(tmp_path: Path) -> Path:
+    """
+    Create a temporary 48 kHz stereo WAV with random samples.
+    """
+    rng = np.random.default_rng(123)
+    fs = 48000
+
+    # ~1 second of audio (kept small for test speed).
+    n = fs
+    x: StereoSignal = rng.normal(size=(n, 2)).astype(np.float64)
+
+    wav_path = tmp_path / "in.wav"
+    sf.write(str(wav_path), x, fs)
+    return wav_path
+
+
+def test_aac_coder_seq_schema_and_shapes(tmp_stereo_wav: Path) -> None:
+    """
+    Module-level contract test:
+    Ensure aac_seq_1 follows the expected schema and per-frame shapes.
+    """
+    aac_seq: AACSeq1 = aac_coder_1(tmp_stereo_wav)
+
+    assert isinstance(aac_seq, list)
+    assert len(aac_seq) > 0
+
+    for fr in aac_seq:
+        assert isinstance(fr, dict)
+
+        # Required keys
+        assert "frame_type" in fr
+        assert "win_type" in fr
+        assert "chl" in fr
+        assert "chr" in fr
+
+        frame_type = fr["frame_type"]
+        win_type = fr["win_type"]
+
+        assert frame_type in ("OLS", "LSS", "ESH", "LPS")
+        assert win_type in ("SIN", "KBD")
+
+        assert isinstance(fr["chl"], dict)
+        assert isinstance(fr["chr"], dict)
+        assert "frame_F" in fr["chl"]
+        assert "frame_F" in fr["chr"]
+
+        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
+        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
+
+        if frame_type == "ESH":
+            assert chl_f.shape == (128, 8)
+            assert chr_f.shape == (128, 8)
+        else:
+            assert chl_f.shape == (1024, 1)
+            assert chr_f.shape == (1024, 1)
+
+
+def test_end_to_end_aac_coder_decoder_high_snr(tmp_stereo_wav: Path, tmp_path: Path) -> None:
+    """
+    End-to-end test:
+    Encode + decode and check SNR is very high (numerical-noise only).
+
+    The threshold is intentionally loose to avoid fragility across platforms/BLAS.
+    """
+    x_ref, fs = sf.read(str(tmp_stereo_wav), always_2d=True)
+    x_ref = np.asarray(x_ref, dtype=np.float64)
+    assert int(fs) == 48000
+
+    out_wav = tmp_path / "out.wav"
+
+    aac_seq = aac_coder_1(tmp_stereo_wav)
+    x_hat: StereoSignal = aac_decoder_1(aac_seq, out_wav)
+
+    # Basic sanity: output file exists and is readable
+    assert out_wav.exists()
+    x_hat_file, fs_hat = sf.read(str(out_wav), always_2d=True)
+    assert int(fs_hat) == 48000
+
+    # SNR against returned array (file should match closely, but we do not require it here).
+    snr = _snr_db(x_ref, x_hat)
+    assert snr > 80.0
--- a/source/level_1/core/tests/test_filterbank.py
+++ b/source/level_1/core/tests/test_filterbank.py
@ -0,0 +1,269 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Filterbank Tests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for Filterbank module.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+from typing import Sequence
+import pytest
+
+from core.aac_filterbank import aac_filter_bank, aac_i_filter_bank
+from core.aac_types import *
+
+# Helper fixtures for filterbank
+# -----------------------------------------------------------------------------
+
+def _ola_reconstruct(x: StereoSignal, frame_types: Sequence[FrameType], win_type: WinType) -> StereoSignal:
+    """
+    Analyze-synthesize each frame and overlap-add with hop=1024.
+
+    Parameters
+    ----------
+    x : StereoSignal
+        Input stereo stream, expected shape (N, 2).
+    frame_types : Sequence[FrameType]
+        Length K sequence of frame types for frames starting at i*1024.
+    win_type : WinType
+        Window type ("SIN" or "KBD").
+
+    Returns
+    -------
+    StereoSignal
+        Reconstructed stereo stream, same shape as x (N, 2).
+    """
+    hop = 1024
+    win = 2048
+    K = len(frame_types)
+
+    y: StereoSignal = np.zeros_like(x, dtype=np.float64)
+
+    for i in range(K):
+        start = i * hop
+        frame_t: FrameT = x[start:start + win, :]
+        frame_f: FrameF = aac_filter_bank(frame_t, frame_types[i], win_type)
+        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_types[i], win_type)
+        y[start:start + win, :] += frame_t_hat
+
+    return y
+
+
+def _snr_db(x: StereoSignal, y: StereoSignal) -> float:
+    """
+    Compute SNR in dB over all samples/channels.
+    """
+    err = x - y
+    ps = float(np.sum(x * x))
+    pn = float(np.sum(err * err))
+    if pn <= 0.0:
+        return float("inf")
+    if ps <= 0.0:
+        return float("-inf")
+    return 10.0 * float(np.log10(ps / pn))
+
+
+# -----------------------------------------------------------------------------
+# Forward filterbank tests
+# -----------------------------------------------------------------------------
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+@pytest.mark.parametrize("frame_type", ["OLS", "LSS", "LPS"])
+def test_filterbank_shapes_long_sequences(frame_type: FrameType, win_type: WinType) -> None:
+    """
+    Contract test: for OLS/LSS/LPS, aac_filter_bank returns shape (1024, 2).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+    assert frame_f.shape == (1024, 2)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_shapes_esh(win_type: WinType) -> None:
+    """
+    Contract test: for ESH, aac_filter_bank returns shape (128, 16).
+    """
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_f = aac_filter_bank(frame_t, "ESH", win_type)
+    assert frame_f.shape == (128, 16)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_channel_isolation_long_sequences(win_type: WinType) -> None:
+    """
+    Behavior test: for OLS (representative long-sequence), channels are independent.
+    If right channel is zero and left is random, right spectrum should be near zero.
+    """
+    rng = np.random.default_rng(0)
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_t[:, 0] = rng.normal(size=2048)
+
+    frame_f = aac_filter_bank(frame_t, "OLS", win_type)
+
+    assert np.max(np.abs(frame_f[:, 1])) < 1e-9
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_channel_isolation_esh(win_type: WinType) -> None:
+    """
+    Behavior test: for ESH, channels are independent.
+    If right channel is zero and left is random, all odd columns (right) should be near zero.
+    """
+    rng = np.random.default_rng(1)
+    frame_t: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_t[:, 0] = rng.normal(size=2048)
+
+    frame_f = aac_filter_bank(frame_t, "ESH", win_type)
+
+    right_cols = frame_f[:, 1::2]  # columns 1,3,5,...,15
+    assert np.max(np.abs(right_cols)) < 1e-9
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_esh_ignores_outer_regions(win_type: WinType) -> None:
+    """
+    Spec-driven behavior test:
+    ESH uses only the central region [448, 1600), split into 8 overlapping
+    windows of length 256 with 50% overlap.
+
+    Therefore, changing samples outside [448, 1600) must not affect the output.
+    """
+    rng = np.random.default_rng(2)
+
+    frame_a: FrameT = np.zeros((2048, 2), dtype=np.float64)
+    frame_b: FrameT = np.zeros((2048, 2), dtype=np.float64)
+
+    center = rng.normal(size=(1152, 2))
+    frame_a[448:1600, :] = center
+    frame_b[448:1600, :] = center
+
+    frame_b[0:448, :] = rng.normal(size=(448, 2))
+    frame_b[1600:2048, :] = rng.normal(size=(448, 2))
+
+    fa = aac_filter_bank(frame_a, "ESH", win_type)
+    fb = aac_filter_bank(frame_b, "ESH", win_type)
+
+    # Use a tiny tolerance to avoid flaky failures due to floating-point minutiae.
+    np.testing.assert_allclose(fa, fb, rtol=0.0, atol=1e-12)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_filterbank_output_is_finite(win_type: WinType) -> None:
+    """
+    Sanity test: output must not contain NaN or inf for representative cases.
+    """
+    rng = np.random.default_rng(3)
+    frame_t: FrameT = rng.normal(size=(2048, 2)).astype(np.float64)
+
+    for frame_type in ("OLS", "LSS", "ESH", "LPS"):
+        frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+        assert np.isfinite(frame_f).all()
+
+
+# -----------------------------------------------------------------------------
+# Reverse i_filterbank tests
+# -----------------------------------------------------------------------------
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ifilterbank_shapes_long_sequences(win_type: WinType) -> None:
+    """
+    Contract test: for OLS/LSS/LPS, aac_i_filter_bank returns shape (2048, 2).
+    """
+    frame_f: FrameF = np.zeros((1024, 2), dtype=np.float64)
+    for frame_type in ("OLS", "LSS", "LPS"):
+        frame_t = aac_i_filter_bank(frame_f, frame_type, win_type)
+        assert frame_t.shape == (2048, 2)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ifilterbank_shapes_esh(win_type: WinType) -> None:
+    """
+    Contract test: for ESH, aac_i_filter_bank returns shape (2048, 2).
+    """
+    frame_f: FrameF = np.zeros((128, 16), dtype=np.float64)
+    frame_t = aac_i_filter_bank(frame_f, "ESH", win_type)
+    assert frame_t.shape == (2048, 2)
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_roundtrip_per_frame_is_finite(win_type: WinType) -> None:
+    """
+    Sanity test: per-frame analysis+synthesis must produce finite outputs.
+    """
+    rng = np.random.default_rng(0)
+    frame_t: FrameT = rng.normal(size=(2048, 2)).astype(np.float64)
+
+    for frame_type in ("OLS", "LSS", "ESH", "LPS"):
+        frame_f = aac_filter_bank(frame_t, frame_type, win_type)
+        frame_t_hat = aac_i_filter_bank(frame_f, frame_type, win_type)
+        assert np.isfinite(frame_t_hat).all()
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ola_reconstruction_ols_high_snr(win_type: WinType) -> None:
+    """
+    Module-level test:
+    OLS analysis+synthesis with hop=1024 must reconstruct with high SNR
+    in the steady-state region.
+    """
+    rng = np.random.default_rng(1)
+
+    K = 6
+    N = 1024 * (K + 1)
+    x: StereoSignal = rng.normal(size=(N, 2)).astype(np.float64)
+
+    y = _ola_reconstruct(x, ["OLS"] * K, win_type)
+
+    a = 1024
+    b = N - 1024
+    snr = _snr_db(x[a:b, :], y[a:b, :])
+    assert snr > 50.0
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ola_reconstruction_esh_high_snr(win_type: WinType) -> None:
+    """
+    Module-level test:
+    ESH analysis+synthesis with hop=1024 must reconstruct with high SNR
+    in the steady-state region.
+    """
+    rng = np.random.default_rng(2)
+
+    K = 6
+    N = 1024 * (K + 1)
+    x: StereoSignal = rng.normal(size=(N, 2)).astype(np.float64)
+
+    y = _ola_reconstruct(x, ["ESH"] * K, win_type)
+
+    a = 1024
+    b = N - 1024
+    snr = _snr_db(x[a:b, :], y[a:b, :])
+    assert snr > 45.0
+
+
+@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
+def test_ola_reconstruction_transition_sequence(win_type: WinType) -> None:
+    """
+    Transition sequence test matching the windowing logic:
+      OLS -> LSS -> ESH -> LPS -> OLS -> OLS
+    """
+    rng = np.random.default_rng(3)
+
+    frame_types: list[FrameType] = ["OLS", "LSS", "ESH", "LPS", "OLS", "OLS"]
+    K = len(frame_types)
+    N = 1024 * (K + 1)
+    x: StereoSignal = rng.normal(size=(N, 2)).astype(np.float64)
+
+    y = _ola_reconstruct(x, frame_types, win_type)
+
+    a = 1024
+    b = N - 1024
+    snr = _snr_db(x[a:b, :], y[a:b, :])
+    assert snr > 40.0
--- a/source/level_1/core/tests/test_filterbank_internal.py
+++ b/source/level_1/core/tests/test_filterbank_internal.py
@ -0,0 +1,117 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Filterbank internal (mdct) Tests
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Tests for Filterbank internal MDCT/IMDCT functionality.
+# ------------------------------------------------------------
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from core.aac_filterbank import _imdct, _mdct
+from core.aac_types import FloatArray, TimeSignal, MdctCoeffs
+
+
+def _assert_allclose(a: FloatArray, b: FloatArray, *, rtol: float, atol: float) -> None:
+    """
+    Helper for consistent tolerances across tests.
+    """
+    np.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
+
+
+def _estimate_gain(y: MdctCoeffs, x: MdctCoeffs) -> float:
+    """
+    Estimate scalar gain g such that y ~= g*x in least-squares sense.
+    """
+    denom = float(np.dot(x, x))
+    if denom == 0.0:
+        return 0.0
+    return float(np.dot(y, x) / denom)
+
+
+tolerance = 1e-10
+
+@pytest.mark.parametrize("N", [256, 2048])
+def test_mdct_imdct_mdct_identity_up_to_gain(N: int) -> None:
+    """
+    Consistency test in coefficient domain:
+      mdct(imdct(X)) ~= g * X
+
+    For the chosen (non-orthonormal) scaling, g is expected to be close to 2.
+    """
+    rng = np.random.default_rng(0)
+    K = N // 2
+
+    X: MdctCoeffs = rng.normal(size=K).astype(np.float64)
+    x: TimeSignal = _imdct(X)
+    X_hat: MdctCoeffs = _mdct(x)
+
+    g = _estimate_gain(X_hat, X)
+    _assert_allclose(X_hat, g * X, rtol=tolerance, atol=tolerance)
+    _assert_allclose(np.array([g], dtype=np.float64), np.array([2.0], dtype=np.float64), rtol=tolerance, atol=tolerance)
+
+
+@pytest.mark.parametrize("N", [256, 2048])
+def test_mdct_linearity(N: int) -> None:
+    """
+    Linearity test:
+      mdct(a*x + b*y) == a*mdct(x) + b*mdct(y)
+    """
+    rng = np.random.default_rng(1)
+    x: TimeSignal = rng.normal(size=N).astype(np.float64)
+    y: TimeSignal = rng.normal(size=N).astype(np.float64)
+
+    a = 0.37
+    b = -1.12
+
+    left: MdctCoeffs = _mdct(a * x + b * y)
+    right: MdctCoeffs = a * _mdct(x) + b * _mdct(y)
+
+    _assert_allclose(left, right, rtol=tolerance, atol=tolerance)
+
+
+@pytest.mark.parametrize("N", [256, 2048])
+def test_imdct_linearity(N: int) -> None:
+    """
+    Linearity test for IMDCT:
+      imdct(a*X + b*Y) == a*imdct(X) + b*imdct(Y)
+    """
+    rng = np.random.default_rng(2)
+    K = N // 2
+
+    X: MdctCoeffs = rng.normal(size=K).astype(np.float64)
+    Y: MdctCoeffs = rng.normal(size=K).astype(np.float64)
+
+    a = -0.5
+    b = 2.0
+
+    left: TimeSignal = _imdct(a * X + b * Y)
+    right: TimeSignal = a * _imdct(X) + b * _imdct(Y)
+
+    _assert_allclose(left, right, rtol=tolerance, atol=tolerance)
+
+
+@pytest.mark.parametrize("N", [256, 2048])
+def test_mdct_imdct_outputs_are_finite(N: int) -> None:
+    """
+    Sanity test: no NaN/inf on random inputs.
+    """
+    rng = np.random.default_rng(3)
+    K = N // 2
+
+    x: TimeSignal = rng.normal(size=N).astype(np.float64)
+    X: MdctCoeffs = rng.normal(size=K).astype(np.float64)
+
+    X1 = _mdct(x)
+    x1 = _imdct(X)
+
+    assert np.isfinite(X1).all()
+    assert np.isfinite(x1).all()
--- a/source/level_1/level_1.py
+++ b/source/level_1/level_1.py
@ -1,843 +1,186 @@
-#! /usr/bin/env python
-
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Level 1 Wrappers + Demo
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Level 1 wrapper module.
+#
+#   This file provides:
+#   - Thin wrappers for Level 1 API functions (encode/decode) that delegate
+#     to the corresponding core implementations.
+#   - A demo function that runs end-to-end and computes SNR.
+#   - A small CLI entrypoint for convenience.
+# ------------------------------------------------------------
 from __future__ import annotations

 from pathlib import Path
-from typing import Dict, Tuple, List, Literal, TypedDict, Union
+from typing import Union

 import numpy as np
 import soundfile as sf
-from scipy.signal.windows import kaiser

-# --------------------------------
-# Public Type aliases (Level 1)
-# --------------------------------
+from core.aac_types   import AACSeq1, StereoSignal
+from core.aac_coder   import aac_coder_1 as core_aac_coder_1
+from core.aac_coder   import aac_read_wav_stereo_48k
+from core.aac_decoder import aac_decoder_1 as core_aac_decoder_1

-FrameType = Literal["OLS", "LSS", "ESH", "LPS"]
-"""
-Frame type codes:
- "OLS": ONLY_LONG_SEQUENCE
- "LSS": LONG_START_SEQUENCE
- "ESH": EIGHT_SHORT_SEQUENCE
- "LPS": LONG_STOP_SEQUENCE
-"""
-
-WinType = Literal["KBD", "SIN"]
-"""
-Window type codes:
- "KBD": Kaiser-Bessel-Derived
- "SIN": sinusoid
-"""
-
-FrameT = np.ndarray
-"""
-Time-domain frame.
-Expected shape: (2048, 2) for stereo (two channels).
-dtype: float (e.g., float32/float64).
-"""
-
-FrameChannelT = np.ndarray
-"""
-Time-domain single channel frame.
-Expected shape: (2048,).
-dtype: float (e.g., float32/float64).
-"""
-
-
-FrameF = np.ndarray
-"""
-Frequency-domain frame (MDCT coefficients).
-As per spec (Level 1):
- If frame_type in {"OLS","LSS","LPS"}: shape (1024, 2)
- If frame_type == "ESH": shape (128, 16) where 8 subframes x 2 channels
-  are placed in columns according to the subframe order (i.e., each subframe is (128,2)).
-"""
-
-ChannelKey = Literal["chl", "chr"]
-
-
-class AACChannelFrameF(TypedDict):
-    """Channel payload for aac_seq_1[i]["chl"] or ["chr"] (Level 1)."""
-    frame_F: np.ndarray
-    # frame_F for one channel:
-    # - ESH: shape (128, 8)
-    # - else: shape (1024, 1)
-
-
-class AACSeq1Frame(TypedDict):
-    """One frame dictionary of aac_seq_1 (Level 1)."""
-    frame_type: FrameType
-    win_type: WinType
-    chl: AACChannelFrameF
-    chr: AACChannelFrameF
-
-
-AACSeq1 = List[AACSeq1Frame]
-"""AAC sequence for Level 1:
-List of length K (K = number of frames).
-Each element is a dict with keys:
- "frame_type", "win_type", "chl", "chr"
-"""
-
-# Global Options
-# -----------------------------------------------------------------------------
-
-# Window type
-# Options: "SIN", "KBD"
-WIN_TYPE: WinType = "SIN"
-
-
-# Private helpers for SSC
-# -----------------------------------------------------------------------------
-
-# See Table 1 in mm-2025-hw-v0.1.pdf
-STEREO_MERGE_TABLE: Dict[Tuple[FrameType, FrameType], FrameType] = {
-    ("OLS", "OLS"): "OLS",
-    ("OLS", "LSS"): "LSS",
-    ("OLS", "ESH"): "ESH",
-    ("OLS", "LPS"): "LPS",
-    ("LSS", "OLS"): "LSS",
-    ("LSS", "LSS"): "LSS",
-    ("LSS", "ESH"): "ESH",
-    ("LSS", "LPS"): "ESH",
-    ("ESH", "OLS"): "ESH",
-    ("ESH", "LSS"): "ESH",
-    ("ESH", "ESH"): "ESH",
-    ("ESH", "LPS"): "ESH",
-    ("LPS", "OLS"): "LPS",
-    ("LPS", "LSS"): "ESH",
-    ("LPS", "ESH"): "ESH",
-    ("LPS", "LPS"): "LPS",
-}
-
-def _detect_attack(next_frame_channel: FrameChannelT) -> bool:
-    """
-    Detect if next frame (single channel) implies ESH according to the spec's attack criterion.
-
-    Parameters
-    ----------
-    next_frame_channel : FrameChannelT
-        One channel of next_frame_T (shape: (2048,), dtype float).
-
-    Returns
-    -------
-    attack : bool
-        True if an attack is detected (=> next frame predicted ESH), else False.
-
-    Notes
-    -----
-    The spec describes:
-
-    - High-pass filter applied to next_frame_channel
-    - Split into 16 segments of length 128
-    - Compute segment energies s(l)
-    - Compute ds(l) = s(l) / s(l-1)
-    - Attack exists if there exists l in {1..7} such that:
-        s(l) > 1e-3 and ds(l) > 10
-    """
-    x = next_frame_channel # local alias, x assumed to be a 1-D array of length 2048
-
-    # High-pass filter H(z) = (1 - z^-1) / (1 - 0.5 z^-1)
-    # Implemented as: y[n] = x[n] - x[n-1] + 0.5*y[n-1]
-    y = np.zeros_like(x)
-    prev_x = 0.0
-    prev_y = 0.0
-    for n in range(x.shape[0]):
-        xn = float(x[n])
-        yn = (xn - prev_x) + 0.5 * prev_y
-        y[n] = yn
-        prev_x = xn
-        prev_y = yn
-
-    # Segment energies over 16 blocks of 128 samples.
-    s = np.empty(16, dtype=np.float64)
-    for l in range(16):
-        a = l * 128
-        b = (l + 1) * 128
-        seg = y[a:b]
-        s[l] = float(np.sum(seg * seg))
-
-    # ds(l) for l>=1. For l=0 not defined, keep 0.
-    ds = np.zeros(16, dtype=np.float64)
-    eps = 1e-12  # avoid division by zero without changing logic materially
-    for l in range(1, 16):
-        ds[l] = s[l] / max(s[l - 1], eps)
-
-    # Spec: check l in {1..7}
-    for l in range(1, 8):
-        if (s[l] > 1e-3) and (ds[l] > 10.0):
-            return True
-
-    return False
-
-
-def _decide_frame_type(prev_frame_type: FrameType, attack: bool) -> FrameType:
-    """
-    Decide current frame type for a single channel based on prev_frame_type and next-frame attack.
-
-    Parameters
-    ----------
-    prev_frame_type : FrameType
-        Previous frame type (one of "OLS","LSS","ESH","LPS").
-    attack : bool
-        Whether next frame is predicted ESH for this channel.
-
-    Returns
-    -------
-    frame_type : FrameType
-        The per-channel decision for the current frame.
-
-    Rules (spec)
-    ------------
-    - If prev is "LSS" => current is "ESH" (fixed)
-    - If prev is "LPS" => current is "OLS" (fixed)
-    - If prev is "OLS" => current is "LSS" if attack else "OLS"
-    - If prev is "ESH" => current is "ESH" if attack else "LPS"
-    """
-    if prev_frame_type == "LSS":
-        return "ESH"
-    if prev_frame_type == "LPS":
-        return "OLS"
-    if prev_frame_type == "OLS":
-        return "LSS" if attack else "OLS"
-    if prev_frame_type == "ESH":
-        return "ESH" if attack else "LPS"
-
-    raise ValueError(f"Invalid prev_frame_type: {prev_frame_type!r}")
-
-
-def _stereo_merge(ft_l: FrameType, ft_r: FrameType) -> FrameType:
-    """
-    Merge per-channel frame types into one common frame type using the spec table.
-
-    Parameters
-    ----------
-    ft_l : FrameType
-        Frame type decision for channel 0 (left).
-    ft_r : FrameType
-        Frame type decision for channel 1 (right).
-
-    Returns
-    -------
-    common : FrameType
-        The common final frame type.
-    """
-    try:
-        return STEREO_MERGE_TABLE[(ft_l, ft_r)]
-    except KeyError as e:
-        raise ValueError(f"Invalid stereo merge pair: {(ft_l, ft_r)}") from e
-
-
-
-# Private helpers for Filterbank
-# -----------------------------------------------------------------------------
-
-def _sin_window(N: int) -> np.ndarray:
-    """
-    Sine window (full length N).
-    w[n] = sin(pi/N * (n + 0.5)), 0 <= n < N
-    """
-    n = np.arange(N, dtype=np.float64)
-    return np.sin((np.pi / N) * (n + 0.5))
-
-
-def _kbd_window(N: int, alpha: float) -> np.ndarray:
-    """
-    Kaiser-Bessel-Derived (KBD) window (full length N).
-
-    This follows the standard KBD construction:
-    - Build Kaiser kernel of length N/2 + 1
-    - Use cumulative sum and sqrt normalization to form left and right halves
-    """
-    half = N // 2
-
-    # Kaiser kernel length: half + 1 samples (0 .. half)
-    # beta = pi * alpha per the usual correspondence with the ISO definition
-    kernel = kaiser(half + 1, beta=np.pi * alpha).astype(np.float64)
-
-    csum = np.cumsum(kernel)
-    denom = csum[-1]
-
-    w_left = np.sqrt(csum[:-1] / denom)  # length half, n = 0 .. half-1
-    w_right = w_left[::-1]               # mirror for second half
-
-    return np.concatenate([w_left, w_right])
-
-
-def _long_window(win_type: WinType) -> np.ndarray:
-    """
-    Long window (length 2048) for the selected win_type.
-    """
-    if win_type == "SIN":
-        return _sin_window(2048)
-    if win_type == "KBD":
-        # Assignment-specific alpha values
-        return _kbd_window(2048, alpha=6.0)
-    raise ValueError(f"Invalid win_type: {win_type!r}")
-
-
-def _short_window(win_type: WinType) -> np.ndarray:
-    """
-    Short window (length 256) for the selected win_type.
-    """
-    if win_type == "SIN":
-        return _sin_window(256)
-    if win_type == "KBD":
-        # Assignment-specific alpha values
-        return _kbd_window(256, alpha=4.0)
-    raise ValueError(f"Invalid win_type: {win_type!r}")
-
-
-def _window_sequence(frame_type: FrameType, win_type: WinType) -> np.ndarray:
-    """
-    Build the 2048-sample window sequence for OLS/LSS/LPS.
-
-    We follow the simplified assumption:
-    - The same window shape (KBD or SIN) is used globally (no mixed halves).
-    - Therefore, the left and right halves are drawn from the same family.
-    """
-    wL = _long_window(win_type)   # length 2048
-    wS = _short_window(win_type)  # length 256
-
-    if frame_type == "OLS":
-        return wL
-
-    if frame_type == "LSS":
-        # 0..1023: left half of long window
-        # 1024..1471: ones (448 samples)
-        # 1472..1599: right half of short window (128 samples)
-        # 1600..2047: zeros (448 samples)
-        out = np.zeros(2048, dtype=np.float64)
-        out[0:1024] = wL[0:1024]
-        out[1024:1472] = 1.0
-        out[1472:1600] = wS[128:256]
-        out[1600:2048] = 0.0
-        return out
-
-    if frame_type == "LPS":
-        # 0..447: zeros (448)
-        # 448..575: left half of short window (128)
-        # 576..1023: ones (448)
-        # 1024..2047: right half of long window (1024)
-        out = np.zeros(2048, dtype=np.float64)
-        out[0:448] = 0.0
-        out[448:576] = wS[0:128]
-        out[576:1024] = 1.0
-        out[1024:2048] = wL[1024:2048]
-        return out
-
-    raise ValueError(f"Invalid frame_type for long window sequence: {frame_type!r}")
-
-
-def _mdct(s: np.ndarray) -> np.ndarray:
-    """
-    MDCT (direct form) as given in the assignment.
-
-    Input:
-      s: windowed time samples of length N (N = 2048 or 256)
-
-    Output:
-      X: MDCT coefficients of length N/2
-
-    Definition:
-      X[k] = 2 * sum_{n=0 .. N-1} s[n] * cos(2*pi/N * (n + n0) * (k + 1/2))
-      where n0 = (N/2 + 1)/2
-    """
-    s = np.asarray(s, dtype=np.float64)
-    N = int(s.shape[0])
-    if N not in (2048, 256):
-        raise ValueError("MDCT input length must be 2048 or 256.")
-
-    n0 = (N / 2.0 + 1.0) / 2.0
-
-    n = np.arange(N, dtype=np.float64) + n0
-    k = np.arange(N // 2, dtype=np.float64) + 0.5
-
-    # Cosine matrix: shape (N, N/2)
-    C = np.cos((2.0 * np.pi / N) * np.outer(n, k))
-    X = 2.0 * (s @ C)
-
-    return X
-
-def _imdct(X: np.ndarray) -> np.ndarray:
-    """
-    IMDCT (direct form) as given in the assignment.
-
-    Input:
-      X: MDCT coefficients of length N/2 (N = 2048 or 256)
-
-    Output:
-      s: time samples of length N
-
-    Definition:
-      s[n] = (2/N) * sum_{k=0 .. N/2-1} X[k] * cos(2*pi/N * (n + n0) * (k + 1/2))
-      where n0 = (N/2 + 1)/2
-    """
-    X = np.asarray(X, dtype=np.float64).reshape(-1)
-    K = int(X.shape[0])
-    if K not in (1024, 128):
-        raise ValueError("IMDCT input length must be 1024 or 128.")
-
-    N = 2 * K
-    n0 = (N / 2.0 + 1.0) / 2.0
-
-    n = np.arange(N, dtype=np.float64) + n0
-    k = np.arange(K, dtype=np.float64) + 0.5
-
-    C = np.cos((2.0 * np.pi / N) * np.outer(n, k))  # (N, K)
-    s = (2.0 / N) * (C @ X)
-
-    return s
-
-
-def _filter_bank_esh_channel(x_ch: np.ndarray, win_type: WinType) -> np.ndarray:
-    """
-    ESH analysis for one channel.
-
-    Returns:
-      X_esh: shape (128, 8), where each column is the 128 MDCT coeffs of one short window.
-    """
-    wS = _short_window(win_type)
-    X_esh = np.empty((128, 8), dtype=np.float64)
-
-    # ESH subwindows are taken from the central region:
-    # start positions: 448 + 128*j, j = 0..7
-    for j in range(8):
-        start = 448 + 128 * j
-        seg = x_ch[start:start + 256] * wS
-        X_esh[:, j] = _mdct(seg)
-
-    return X_esh
-
-
-
-
-def _unpack_esh(frame_F: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Unpack ESH spectrum from shape (128, 16) into per-channel arrays (128, 8).
-
-    Mapping is the inverse of the packing used in filter_bank():
-      out[:, 2*j]   = left[:, j]
-      out[:, 2*j+1] = right[:, j]
-    """
-    if frame_F.shape != (128, 16):
-        raise ValueError("ESH frame_F must have shape (128, 16).")
-
-    left = np.empty((128, 8), dtype=np.float64)
-    right = np.empty((128, 8), dtype=np.float64)
-    for j in range(8):
-        left[:, j] = frame_F[:, 2 * j + 0]
-        right[:, j] = frame_F[:, 2 * j + 1]
-    return left, right
-
-
-def _i_filter_bank_esh_channel(X_esh: np.ndarray, win_type: WinType) -> np.ndarray:
-    """
-    ESH synthesis for one channel.
-
-    Input:
-      X_esh: (128, 8) MDCT coeffs for 8 short windows
-
-    Output:
-      x_ch: (2048, ) time-domain frame contribution (windowed),
-            ready for OLA at the caller level.
-    """
-    if X_esh.shape != (128, 8):
-        raise ValueError("X_esh must have shape (128, 8).")
-
-    wS = _short_window(win_type)
-    out = np.zeros(2048, dtype=np.float64)
-
-    # Each short IMDCT returns 256 samples. Place them at:
-    # start = 448 + 128*j, j=0..7 (50% overlap)
-    for j in range(8):
-        seg = _imdct(X_esh[:, j]) * wS  # (256,)
-        start = 448 + 128 * j
-        out[start:start + 256] += seg
-
-    return out

 # -----------------------------------------------------------------------------
-# Public Function prototypes (Level 1)
+# Public Level 1 API (wrappers)
 # -----------------------------------------------------------------------------

-def SSC(frame_T: FrameT, next_frame_T: FrameT, prev_frame_type: FrameType) -> FrameType:
-    """
-    Sequence Segmentation Control (SSC).
-    Selects and returns the frame type for the current frame (i) based on input parameters.
-
-    Parameters
-    -------
-    frame_T: FrameT
-        current time-domain frame i, stereo, shape (2048, 2)
-    next_frame_T: FrameT
-        next time-domain frame (i+1), stereo, shape (2048, 2)
-        (used to decide transitions to/from ESH)
-    prev_frame_type: FrameType
-        frame type chosen for the previous frame (i-1)
-
-    Returns
-    -------
-    frame_type : FrameType
-        - "OLS" (ONLY_LONG_SEQUENCE)
-        - "LSS" (LONG_START_SEQUENCE)
-        - "ESH" (EIGHT_SHORT_SEQUENCE)
-        - "LPS" (LONG_STOP_SEQUENCE)
-    """
-    if frame_T.shape != (2048, 2):
-        raise ValueError("frame_T must have shape (2048, 2).")
-    if next_frame_T.shape != (2048, 2):
-        raise ValueError("next_frame_T must have shape (2048, 2).")
-
-    # Detect attack independently per channel on next frame.
-    attack_l = _detect_attack(next_frame_T[:, 0])
-    attack_r = _detect_attack(next_frame_T[:, 1])
-
-    # Decide per-channel type based on shared prev_frame_type.
-    ft_l = _decide_frame_type(prev_frame_type, attack_l)
-    ft_r = _decide_frame_type(prev_frame_type, attack_r)
-
-    # Stereo merge as per Table 1.
-    return _stereo_merge(ft_l, ft_r)
-
-
-def filter_bank(frame_T: FrameT, frame_type: FrameType, win_type: WinType) -> FrameF:
-    """
-    Filterbank stage (MDCT analysis).
-
-    Parameters
-    ----------
-    frame_T : FrameT
-        Time-domain frame, stereo, shape (2048, 2).
-    frame_type : FrameType
-        Type of the frame under encoding ("OLS"|"LSS"|"ESH"|"LPS").
-    win_type : WinType
-        Window type ("KBD" or "SIN") used for the current frame.
-
-    Returns
-    -------
-    frame_F : FrameF
-        Frequency-domain MDCT coefficients:
-        - If frame_type in {"OLS","LSS","LPS"}: array shape (1024, 2)
-          containing MDCT coefficients for both channels.
-        - If frame_type == "ESH": contains 8 subframes, each subframe has shape (128,2),
-          placed in columns according to subframe order, i.e. overall shape (128, 16).
-    """
-    if frame_T.shape != (2048, 2):
-        raise ValueError("frame_T must have shape (2048, 2).")
-
-    xL = frame_T[:, 0].astype(np.float64, copy=False)
-    xR = frame_T[:, 1].astype(np.float64, copy=False)
-
-    if frame_type in ("OLS", "LSS", "LPS"):
-        w = _window_sequence(frame_type, win_type)  # length 2048
-        XL = _mdct(xL * w)  # length 1024
-        XR = _mdct(xR * w)  # length 1024
-        out = np.empty((1024, 2), dtype=np.float64)
-        out[:, 0] = XL
-        out[:, 1] = XR
-        return out
-
-    if frame_type == "ESH":
-        Xl = _filter_bank_esh_channel(xL, win_type)  # (128, 8)
-        Xr = _filter_bank_esh_channel(xR, win_type)  # (128, 8)
-
-        # Pack into (128, 16): each subframe as (128,2) placed in columns
-        out = np.empty((128, 16), dtype=np.float64)
-        for j in range(8):
-            out[:, 2 * j + 0] = Xl[:, j]
-            out[:, 2 * j + 1] = Xr[:, j]
-        return out
-
-    raise ValueError(f"Invalid frame_type: {frame_type!r}")
-
-
-def i_filter_bank(frame_F: FrameF, frame_type: FrameType, win_type: WinType) -> FrameT:
-    """
-    Inverse filterbank (IMDCT synthesis).
-
-    Parameters
-    ----------
-    frame_F : FrameF
-        Frequency-domain MDCT coefficients as produced by filter_bank().
-    frame_type : FrameType
-        Frame type ("OLS"|"LSS"|"ESH"|"LPS").
-    win_type : WinType
-        Window type ("KBD" or "SIN").
-
-    Returns
-    -------
-    frame_T : FrameT
-        Reconstructed time-domain frame, stereo, shape (2048, 2).
-    """
-    if frame_type in ("OLS", "LSS", "LPS"):
-        if frame_F.shape != (1024, 2):
-            raise ValueError("For OLS/LSS/LPS, frame_F must have shape (1024, 2).")
-
-        w = _window_sequence(frame_type, win_type)
-
-        xL = _imdct(frame_F[:, 0]) * w
-        xR = _imdct(frame_F[:, 1]) * w
-
-        out = np.empty((2048, 2), dtype=np.float64)
-        out[:, 0] = xL
-        out[:, 1] = xR
-        return out
-
-    if frame_type == "ESH":
-        if frame_F.shape != (128, 16):
-            raise ValueError("For ESH, frame_F must have shape (128, 16).")
-
-        Xl, Xr = _unpack_esh(frame_F)
-        xL = _i_filter_bank_esh_channel(Xl, win_type)
-        xR = _i_filter_bank_esh_channel(Xr, win_type)
-
-        out = np.empty((2048, 2), dtype=np.float64)
-        out[:, 0] = xL
-        out[:, 1] = xR
-        return out
-
-    raise ValueError(f"Invalid frame_type: {frame_type!r}")
-
-
 def aac_coder_1(filename_in: Union[str, Path]) -> AACSeq1:
    """
-    Level-1 AAC encoder.
+    Level-1 AAC encoder (wrapper).
+
+    Delegates to core implementation.

    Parameters
    ----------
-    filename_in : str | Path
+    filename_in : Union[str, Path]
        Input WAV filename.
        Assumption: stereo audio, sampling rate 48 kHz.

    Returns
    -------
-    aac_seq_1 : AACSeq1
-        List of K encoded frames.
-        For each i:
-
-        - aac_seq_1[i]["frame_type"]: FrameType
-        - aac_seq_1[i]["win_type"]: WinType
-        - aac_seq_1[i]["chl"]["frame_F"]:
-            - ESH: shape (128, 8)
-            - else: shape (1024, 1)
-        - aac_seq_1[i]["chr"]["frame_F"]:
-            - ESH: shape (128, 8)
-            - else: shape (1024, 1)
+    AACSeq1
+        List of encoded frames (Level 1 schema).
    """
-    filename_in = Path(filename_in)
-
-    x, fs = sf.read(str(filename_in), always_2d=True)
-    x = np.asarray(x, dtype=np.float64)
-
-    if x.shape[1] != 2:
-        raise ValueError("Input must be stereo (2 channels).")
-    if fs != 48000:
-        raise ValueError("Input sampling rate must be 48 kHz.")
-
-    hop = 1024
-    win = 2048
-
-    # Pad at the beginning to support the first overlap region.
-    # Tail padding is kept minimal; next-frame is padded on-the-fly when needed.
-    pad_pre = np.zeros((hop, 2), dtype=np.float64)
-    pad_post = np.zeros((hop, 2), dtype=np.float64)
-    x_pad = np.vstack([pad_pre, x, pad_post])
-
-    # Number of frames such that current frame fits; next frame will be padded if needed.
-    K = int((x_pad.shape[0] - win) // hop + 1)
-    if K <= 0:
-        raise ValueError("Input too short for framing.")
-
-    aac_seq: AACSeq1 = []
-    prev_frame_type: FrameType = "OLS"
-
-    for i in range(K):
-        start = i * hop
-
-        frame_t: FrameT = x_pad[start:start + win, :]
-        if frame_t.shape != (win, 2):
-            # This should not happen due to K definition, but we keep it explicit.
-            raise ValueError("Internal framing error: frame_t has wrong shape.")
-
-        next_t = x_pad[start + hop:start + hop + win, :]
-
-        # Ensure next_t is always (2048,2) by zero-padding at the tail.
-        if next_t.shape[0] < win:
-            tail = np.zeros((win - next_t.shape[0], 2), dtype=np.float64)
-            next_t = np.vstack([next_t, tail])
-
-        frame_type = SSC(frame_t, next_t, prev_frame_type)
-        frame_f = filter_bank(frame_t, frame_type, WIN_TYPE)
-
-        # Store per-channel as required by AACSeq1 schema
-        if frame_type == "ESH":
-            # frame_f: (128, 16) packed as [L0 R0 L1 R1 ... L7 R7]
-            chl_f = np.empty((128, 8), dtype=np.float64)
-            chr_f = np.empty((128, 8), dtype=np.float64)
-            for j in range(8):
-                chl_f[:, j] = frame_f[:, 2 * j + 0]
-                chr_f[:, j] = frame_f[:, 2 * j + 1]
-        else:
-            # frame_f: (1024, 2)
-            chl_f = frame_f[:, 0:1].astype(np.float64, copy=False)
-            chr_f = frame_f[:, 1:2].astype(np.float64, copy=False)
-
-        aac_seq.append({
-            "frame_type": frame_type,
-            "win_type": WIN_TYPE,
-            "chl": {"frame_F": chl_f},
-            "chr": {"frame_F": chr_f},
-        })
-        prev_frame_type = frame_type
-    return aac_seq
+    return core_aac_coder_1(filename_in)


-def i_aac_coder_1(aac_seq_1: AACSeq1, filename_out: Union[str, Path]) -> np.ndarray:
+def aac_decoder_1(aac_seq_1: AACSeq1, filename_out: Union[str, Path]) -> StereoSignal:
    """
-    Level-1 AAC decoder (inverse of aac_coder_1()).
+    Level-1 AAC decoder (wrapper).
+
+    Delegates to core implementation.

    Parameters
    ----------
    aac_seq_1 : AACSeq1
        Encoded sequence as produced by aac_coder_1().
-    filename_out : str | Path
-        Output WAV filename.
-        Assumption: stereo audio, sampling rate 48 kHz.
+    filename_out : Union[str, Path]
+        Output WAV filename. Assumption: 48 kHz, stereo.

    Returns
    -------
-    x : np.ndarray
-        Decoded audio samples (time-domain).
-        Expected shape: (N, 2) for stereo (N depends on input length).
+    StereoSignal
+        Decoded audio samples (time-domain), stereo, shape (N, 2), dtype float64.
    """
-    filename_out = Path(filename_out)
+    return core_aac_decoder_1(aac_seq_1, filename_out)

-    hop = 1024
-    win = 2048
-    K = len(aac_seq_1)

-    # Output includes the encoder padding region, so we reconstruct
-    # full padded stream. For K frames: last frame starts at (K-1)*hop and spans win,
-    # so total length = (K-1)*hop + win
-    n_pad = (K - 1) * hop + win
-    y_pad = np.zeros((n_pad, 2), dtype=np.float64)
+# -----------------------------------------------------------------------------
+# Demo (Level 1)
+# -----------------------------------------------------------------------------

-    for i, fr in enumerate(aac_seq_1):
-        frame_type = fr["frame_type"]
-        win_type = fr["win_type"]
+def _snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
+    """
+    Compute overall SNR (dB) over all samples and channels after aligning lengths.

-        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
-        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
+    Parameters
+    ----------
+    x_ref : StereoSignal
+        Reference stereo stream.
+    x_hat : StereoSignal
+        Reconstructed stereo stream.

-        # Re-pack into the format expected by i_filter_bank()
-        if frame_type == "ESH":
-            if chl_f.shape != (128, 8) or chr_f.shape != (128, 8):
-                raise ValueError("ESH channel frame_F must have shape (128, 8).")
+    Returns
+    -------
+    float
+        SNR in dB.
+        - Returns +inf if noise power is zero.
+        - Returns -inf if signal power is zero.
+    """
+    x_ref = np.asarray(x_ref, dtype=np.float64)
+    x_hat = np.asarray(x_hat, dtype=np.float64)

-            frame_f = np.empty((128, 16), dtype=np.float64)
-            for j in range(8):
-                frame_f[:, 2 * j + 0] = chl_f[:, j]
-                frame_f[:, 2 * j + 1] = chr_f[:, j]
-        else:
-            if chl_f.shape != (1024, 1) or chr_f.shape != (1024, 1):
-                raise ValueError("Non-ESH channel frame_F must have shape (1024, 1).")
+    if x_ref.ndim == 1:
+        x_ref = x_ref.reshape(-1, 1)
+    if x_hat.ndim == 1:
+        x_hat = x_hat.reshape(-1, 1)

-            frame_f = np.empty((1024, 2), dtype=np.float64)
-            frame_f[:, 0] = chl_f[:, 0]
-            frame_f[:, 1] = chr_f[:, 0]
+    n = min(x_ref.shape[0], x_hat.shape[0])
+    c = min(x_ref.shape[1], x_hat.shape[1])

-        frame_t_hat = i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
+    x_ref = x_ref[:n, :c]
+    x_hat = x_hat[:n, :c]

-        start = i * hop
-        y_pad[start:start + win, :] += frame_t_hat
+    err = x_ref - x_hat
+    ps = float(np.sum(x_ref * x_ref))
+    pn = float(np.sum(err * err))

-    # Remove boundary padding that encoder adds: hop samples at start and hop at end.
-    if y_pad.shape[0] < 2 * hop:
-        raise ValueError("Decoded stream too short to unpad.")
+    if pn <= 0.0:
+        return float("inf")
+    if ps <= 0.0:
+        return float("-inf")

-    y = y_pad[hop:-hop, :]
-
-    sf.write(str(filename_out), y, 48000)
-    return y
+    return float(10.0 * np.log10(ps / pn))


 def demo_aac_1(filename_in: Union[str, Path], filename_out: Union[str, Path]) -> float:
    """
-    Demonstration for Level-1 codec.
+    Demonstration for the Level-1 codec.

    Runs:
    - aac_coder_1(filename_in)
-    - i_aac_coder_1(aac_seq_1, filename_out)
+    - aac_decoder_1(aac_seq_1, filename_out)
    and computes total SNR between original and decoded audio.

    Parameters
    ----------
-    filename_in : str | Path
+    filename_in : Union[str, Path]
        Input WAV filename (stereo, 48 kHz).
-    filename_out : str | Path
+    filename_out : Union[str, Path]
        Output WAV filename (stereo, 48 kHz).

    Returns
    -------
-    SNR : float
-        Overall Signal-to-Noise Ratio in dB.
+    float
+        Overall SNR in dB.
    """
    filename_in = Path(filename_in)
    filename_out = Path(filename_out)

-    # Read original audio (reference)
-    x_ref, fs_ref = sf.read(str(filename_in), always_2d=True)
-    x_ref = np.asarray(x_ref, dtype=np.float64)
+    # Read original audio (reference) with the same validation as the codec.
+    x_ref, fs_ref = aac_read_wav_stereo_48k(filename_in)
+    if int(fs_ref) != 48000:
+        raise ValueError("Input sampling rate must be 48 kHz.")

    # Encode / decode
    aac_seq_1 = aac_coder_1(filename_in)
-    x_hat = i_aac_coder_1(aac_seq_1, filename_out)
-    x_hat = np.asarray(x_hat, dtype=np.float64)
+    x_hat = aac_decoder_1(aac_seq_1, filename_out)

-    # Ensure 2D stereo shape (N, 2)
-    if x_hat.ndim == 1:
-        x_hat = x_hat.reshape(-1, 1)
-    if x_ref.ndim == 1:
-        x_ref = x_ref.reshape(-1, 1)
+    # Optional sanity: ensure output file exists and is readable
+    x_hat_file, fs_hat = sf.read(str(filename_out), always_2d=True)
+    _ = x_hat_file
+    if int(fs_hat) != 48000:
+        raise ValueError("Decoded output sampling rate must be 48 kHz.")

-    # Align lengths (use common overlap)
-    n = min(x_ref.shape[0], x_hat.shape[0])
-    x_ref = x_ref[:n, :]
-    x_hat = x_hat[:n, :]
+    return _snr_db(x_ref, x_hat)

-    # Match channel count conservatively (common channels)
-    c = min(x_ref.shape[1], x_hat.shape[1])
-    x_ref = x_ref[:, :c]
-    x_hat = x_hat[:, :c]
-
-    # Compute overall SNR over all samples and channels
-    err = x_ref - x_hat
-    p_signal = float(np.sum(x_ref * x_ref))
-    p_noise = float(np.sum(err * err))
-
-    if p_noise <= 0.0:
-        return float("inf")
-    if p_signal <= 0.0:
-        # Degenerate case: silent input
-        return -float("inf")
-    # else:
-    snr_db = 10.0 * np.log10(p_signal / p_noise)
-    return float(snr_db)

+# -----------------------------------------------------------------------------
+# CLI
+# -----------------------------------------------------------------------------

 if __name__ == "__main__":
-    # Example usage:
+    # Example:
    #   python -m level_1.level_1 input.wav output.wav
    import sys

    if len(sys.argv) != 3:
        raise SystemExit("Usage: python -m level_1.level_1 <input.wav> <output.wav>")

-    in_wav = sys.argv[1]
-    out_wav = sys.argv[2]
+    in_wav = Path(sys.argv[1])
+    out_wav = Path(sys.argv[2])

    print(f"Encoding/Decoding {in_wav} to {out_wav}")
    snr = demo_aac_1(in_wav, out_wav)
    print(f"SNR = {snr:.3f} dB")
-
--- a/source/level_1/tests/test_SSC.py
+++ b/source/level_1/tests/test_SSC.py
@ -1,199 +0,0 @@
-import numpy as np
-import pytest
-
-# Adjust the import based on package/module layout.
-from level_1.level_1 import SSC
-
-# Helper "fixtures" for SSC
-# -----------------------------------------------------------------------------
-
-def _next_frame_no_attack() -> np.ndarray:
-    """
-    Build a next_frame_T that should NOT trigger ESH detection.
-
-    Uses exact zeros so all s2l are zero and the ESH condition (s2l > 1e-3) cannot hold.
-    """
-    return np.zeros((2048, 2), dtype=np.float64)
-
-
-def _next_frame_strong_attack(
-    *,
-    attack_left: bool,
-    attack_right: bool,
-    segment_l: int = 4,
-    baseline: float = 1e-6,
-    burst_amp: float = 1.0,
-) -> np.ndarray:
-    """
-    Build a next_frame_T (2048x2) that should trigger ESH detection on selected channels.
-
-    Spec: ESH if exists l in {1..7} with s2l > 1e-3 AND ds2l > 10.
-    We create:
-      - small baseline energy in all samples (avoids division by zero in ds2l),
-      - a strong burst inside one 128-sample segment l in 1..7.
-    """
-    assert 1 <= segment_l <= 7
-    x = np.full((2048, 2), baseline, dtype=np.float64)
-
-    a = segment_l * 128
-    b = (segment_l + 1) * 128
-
-    if attack_left:
-        x[a:b, 0] += burst_amp
-    if attack_right:
-        x[a:b, 1] += burst_amp
-
-    return x
-
-
-def _next_frame_below_s2l_threshold(
-    *,
-    left: bool,
-    right: bool,
-    segment_l: int = 4,
-    impulse_amp: float = 0.01,
-) -> np.ndarray:
-    """
-    Construct a next_frame_T where s2l is below 1e-3, so ESH must NOT be triggered,
-    even if ds2l could be large.
-
-    Put a single impulse of amplitude 'impulse_amp' inside a segment.
-    Energy in the 128-sample segment: s2l ~= impulse_amp^2.
-    With impulse_amp=0.01 => s2l ~= 1e-4 < 1e-3.
-    """
-    assert 1 <= segment_l <= 7
-    x = np.zeros((2048, 2), dtype=np.float64)
-
-    idx = segment_l * 128 + 10  # inside segment
-    if left:
-        x[idx, 0] = impulse_amp
-    if right:
-        x[idx, 1] = impulse_amp
-
-    return x
-
-
-# ---------------------------------------------------------------------
-# 1) Fixed/mandatory cases (prev frame type forces current type)
-# ---------------------------------------------------------------------
-
-def test_ssc_fixed_cases_prev_lss_and_lps() -> None:
-    """
-    Spec: if prev was:
-      - LSS => current MUST be ESH
-      - LPS => current MUST be OLS
-    independent of next frame check.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-
-    # Even if next frame has a strong attack, LSS must force ESH.
-    next_attack = _next_frame_strong_attack(attack_left=True, attack_right=True)
-    out1 = SSC(frame_t, next_attack, "LSS")
-    assert out1 == "ESH"
-
-    # Even if next frame has a strong attack, LPS must force OLS.
-    out2 = SSC(frame_t, next_attack, "LPS")
-    assert out2 == "OLS"
-
-
-# ---------------------------------------------------------------------
-# 2) Cases requiring next-frame ESH prediction (energy/attack computation)
-# ---------------------------------------------------------------------
-
-def test_prev_ols_next_not_esh_returns_ols() -> None:
-    """
-    Spec: if prev=OLS, current is OLS or LSS.
-    Choose LSS iff (i+1) predicted ESH, else OLS.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    next_t = _next_frame_no_attack()
-
-    out = SSC(frame_t, next_t, "OLS")
-    assert out == "OLS"
-
-
-def test_prev_ols_next_esh_both_channels_returns_lss() -> None:
-    """
-    prev=OLS, next predicted ESH (both channels) => per-channel decisions are LSS and LSS
-    and merge table keeps LSS.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    next_t = _next_frame_strong_attack(attack_left=True, attack_right=True)
-
-    out = SSC(frame_t, next_t, "OLS")
-    assert out == "LSS"
-
-
-def test_prev_ols_next_esh_one_channel_returns_lss() -> None:
-    """
-    prev=OLS:
-      - one channel predicts ESH => LSS
-      - other channel predicts not ESH => OLS
-    Merge table: OLS + LSS => LSS.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-
-    next1_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
-    out1 = SSC(frame_t, next1_t, "OLS")
-    assert out1 == "LSS"
-
-    next2_t = _next_frame_strong_attack(attack_left=False, attack_right=True)
-    out2 = SSC(frame_t, next2_t, "OLS")
-    assert out2 == "LSS"
-
-
-def test_prev_esh_next_esh_both_channels_returns_esh() -> None:
-    """
-    prev=ESH:
-      - next predicted ESH => current ESH (per-channel)
-    Merge table: ESH + ESH => ESH.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    next_t = _next_frame_strong_attack(attack_left=True, attack_right=True)
-
-    out = SSC(frame_t, next_t, "ESH")
-    assert out == "ESH"
-
-
-def test_prev_esh_next_not_esh_both_channels_returns_lps() -> None:
-    """
-    prev=ESH:
-      - next not predicted ESH => current LPS (per-channel)
-    Merge table: LPS + LPS => LPS.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    next_t = _next_frame_no_attack()
-
-    out = SSC(frame_t, next_t, "ESH")
-    assert out == "LPS"
-
-
-def test_prev_esh_next_esh_one_channel_merged_is_esh() -> None:
-    """
-    prev=ESH:
-      - one channel predicts ESH => ESH
-      - other channel predicts not ESH => LPS
-    Merge table: ESH + LPS => ESH.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-
-    next1_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
-    out1 = SSC(frame_t, next1_t, "ESH")
-    assert out1 == "ESH"
-
-    next2_t = _next_frame_strong_attack(attack_left=True, attack_right=False)
-    out2 = SSC(frame_t, next2_t, "ESH")
-    assert out2 == "ESH"
-
-def test_threshold_s2l_must_exceed_1e_3() -> None:
-    """
-    Spec: next frame is ESH only if s2l > 1e-3 AND ds2l > 10 for some l in 1..7.
-    This test checks the necessity of the s2l threshold:
-      - Create a frame with s2l ~= 1e-4 < 1e-3 (single impulse with amp 0.01).
-      - Expect: not classified as ESH -> for prev=OLS return OLS.
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    next_t = _next_frame_below_s2l_threshold(left=True, right=True, impulse_amp=0.01)
-
-    out = SSC(frame_t, next_t, "OLS")
-    assert out == "OLS"
--- a/source/level_1/tests/test_filterbank.py
+++ b/source/level_1/tests/test_filterbank.py
@ -1,235 +0,0 @@
-import numpy as np
-import pytest
-
-from level_1.level_1 import FrameType, WinType, filter_bank, i_filter_bank
-
-# Helper "fixtures" for filterbank
-# -----------------------------------------------------------------------------
-
-def _ola_reconstruct(x: np.ndarray, frame_types: list[str], win_type: str) -> np.ndarray:
-    """
-    Analyze-synthesize each frame and overlap-add with hop=1024.
-    x: shape (N,2)
-    frame_types: length K, for frames starting at i*1024
-    """
-    hop = 1024
-    win = 2048
-    K = len(frame_types)
-
-    y = np.zeros_like(x, dtype=np.float64)
-
-    for i in range(K):
-        start = i * hop
-        frame_t = x[start:start + win, :]
-        frame_f = filter_bank(frame_t, frame_types[i], win_type)
-        frame_t_hat = i_filter_bank(frame_f, frame_types[i], win_type)
-        y[start:start + win, :] += frame_t_hat
-
-    return y
-
-
-def _snr_db(x: np.ndarray, y: np.ndarray) -> float:
-    err = x - y
-    ps = float(np.sum(x * x))
-    pn = float(np.sum(err * err))
-    if pn <= 0.0:
-        return float("inf")
-    return 10.0 * np.log10(ps / pn)
-
-# ---------------------------------------------------------------------
-# Forward filterbank tests
-# ---------------------------------------------------------------------
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-@pytest.mark.parametrize("frame_type", ["OLS", "LSS", "LPS"])
-def test_filterbank_shapes_long_sequences(frame_type: FrameType, win_type: WinType) -> None:
-    """
-    Contract test:
-    For OLS/LSS/LPS, filter_bank returns shape (1024, 2).
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    frame_f = filter_bank(frame_t, frame_type, win_type)
-    assert frame_f.shape == (1024, 2)
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_filterbank_shapes_esh(win_type: WinType) -> None:
-    """
-    Contract test:
-    For ESH, filter_bank returns shape (128, 16).
-    """
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    frame_f = filter_bank(frame_t, "ESH", win_type)
-    assert frame_f.shape == (128, 16)
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_filterbank_channel_isolation_long_sequences(win_type: WinType) -> None:
-    """
-    Module behavior test:
-    For OLS (representative long-sequence), channels are processed independently:
-    - If right channel is zero and left is random, right spectrum should be near zero.
-    """
-    rng = np.random.default_rng(0)
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    frame_t[:, 0] = rng.normal(size=2048)
-
-    frame_f = filter_bank(frame_t, "OLS", win_type)
-
-    # Right channel output should be (close to) zero
-    assert np.max(np.abs(frame_f[:, 1])) < 1e-9
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_filterbank_channel_isolation_esh(win_type: WinType) -> None:
-    """
-    Module behavior test:
-    For ESH, channels are processed independently:
-    - If right channel is zero and left is random, all odd columns (right) should be near zero.
-    """
-    rng = np.random.default_rng(1)
-    frame_t = np.zeros((2048, 2), dtype=np.float64)
-    frame_t[:, 0] = rng.normal(size=2048)
-
-    frame_f = filter_bank(frame_t, "ESH", win_type)
-
-    # Right channel appears in columns 1,3,5,...,15
-    right_cols = frame_f[:, 1::2]
-    assert np.max(np.abs(right_cols)) < 1e-9
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_filterbank_esh_ignores_outer_regions(win_type: WinType) -> None:
-    """
-    Spec-driven behavior test:
-    ESH uses only the central 1152 samples (from 448 to 1599), split into 8 overlapping
-    windows of length 256 with 50% overlap.
-
-    Therefore, changing samples outside [448, 1600) must not affect the output.
-    """
-    rng = np.random.default_rng(2)
-
-    frame_a = np.zeros((2048, 2), dtype=np.float64)
-    frame_b = np.zeros((2048, 2), dtype=np.float64)
-
-    # Same central region for both frames
-    center = rng.normal(size=(1152, 2))
-    frame_a[448:1600, :] = center
-    frame_b[448:1600, :] = center
-
-    # Modify only the outer regions of frame_b
-    frame_b[0:448, :] = rng.normal(size=(448, 2))
-    frame_b[1600:2048, :] = rng.normal(size=(448, 2))
-
-    fa = filter_bank(frame_a, "ESH", win_type)
-    fb = filter_bank(frame_b, "ESH", win_type)
-
-    np.testing.assert_allclose(fa, fb, rtol=0.0, atol=0.0)
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_filterbank_output_is_finite(win_type: WinType) -> None:
-    """
-    Sanity test:
-    Output must not contain NaN or inf for representative cases.
-    """
-    rng = np.random.default_rng(3)
-    frame_t = rng.normal(size=(2048, 2)).astype(np.float64)
-
-    for frame_type in ("OLS", "LSS", "ESH", "LPS"):
-        frame_f = filter_bank(frame_t, frame_type, win_type)
-        assert np.isfinite(frame_f).all()
-
-
-# ---------------------------------------------------------------------
-# Reverse i_filterbank tests
-# ---------------------------------------------------------------------
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_ifilterbank_shapes_long_sequences(win_type: str) -> None:
-    frame_f = np.zeros((1024, 2), dtype=np.float64)
-    for frame_type in ("OLS", "LSS", "LPS"):
-        frame_t = i_filter_bank(frame_f, frame_type, win_type)
-        assert frame_t.shape == (2048, 2)
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_ifilterbank_shapes_esh(win_type: str) -> None:
-    frame_f = np.zeros((128, 16), dtype=np.float64)
-    frame_t = i_filter_bank(frame_f, "ESH", win_type)
-    assert frame_t.shape == (2048, 2)
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_roundtrip_per_frame_is_finite(win_type: str) -> None:
-    rng = np.random.default_rng(0)
-    frame_t = rng.normal(size=(2048, 2)).astype(np.float64)
-
-    for frame_type in ("OLS", "LSS", "ESH", "LPS"):
-        frame_f = filter_bank(frame_t, frame_type, win_type)
-        frame_t_hat = i_filter_bank(frame_f, frame_type, win_type)
-        assert np.isfinite(frame_t_hat).all()
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_ola_reconstruction_ols_high_snr(win_type: str) -> None:
-    """
-    Core module-level test:
-    OLS analysis+synthesis with hop=1024 must reconstruct with high SNR
-    in the steady-state region.
-    """
-    rng = np.random.default_rng(1)
-
-    K = 6
-    N = 1024 * (K + 1)
-    x = rng.normal(size=(N, 2)).astype(np.float64)
-
-    y = _ola_reconstruct(x, ["OLS"] * K, win_type)
-
-    # Exclude edges (first and last hop) where full overlap is not available
-    a = 1024
-    b = N - 1024
-    snr = _snr_db(x[a:b, :], y[a:b, :])
-    assert snr > 50.0
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_ola_reconstruction_esh_high_snr(win_type: str) -> None:
-    """
-    ESH analysis+synthesis with hop=1024 must reconstruct with high SNR
-    in the steady-state region.
-    """
-    rng = np.random.default_rng(2)
-
-    K = 6
-    N = 1024 * (K + 1)
-    x = rng.normal(size=(N, 2)).astype(np.float64)
-
-    y = _ola_reconstruct(x, ["ESH"] * K, win_type)
-
-    a = 1024
-    b = N - 1024
-    snr = _snr_db(x[a:b, :], y[a:b, :])
-    assert snr > 45.0
-
-
-@pytest.mark.parametrize("win_type", ["SIN", "KBD"])
-def test_ola_reconstruction_transition_sequence(win_type: str) -> None:
-    """
-    Transition sequence test matching the windowing logic:
-      OLS -> LSS -> ESH -> LPS -> OLS -> OLS
-    """
-    rng = np.random.default_rng(3)
-
-    frame_types = ["OLS", "LSS", "ESH", "LPS", "OLS", "OLS"]
-    K = len(frame_types)
-    N = 1024 * (K + 1)
-    x = rng.normal(size=(N, 2)).astype(np.float64)
-
-    y = _ola_reconstruct(x, frame_types, win_type)
-
-    a = 1024
-    b = N - 1024
-    snr = _snr_db(x[a:b, :], y[a:b, :])
-    assert snr > 40.0
--- a/source/level_2/level_2.py
+++ b/source/level_2/level_2.py
@ -0,0 +1,21 @@
+# ------------------------------------------------------------
+# AAC Coder/Decoder - Level 2 Wrappers + Demo
+#
+# Multimedia course at Aristotle University of
+# Thessaloniki (AUTh)
+#
+# Author:
+#   Christos Choutouridis (ΑΕΜ 8997)
+#   cchoutou@ece.auth.gr
+#
+# Description:
+#   Level 2 wrapper module.
+#
+#   This file provides:
+#   - Thin wrappers for Level 2 API functions (encode/decode) that delegate
+#     to the corresponding core implementations.
+#   - A demo function that runs end-to-end and computes SNR.
+#   - A small CLI entrypoint for convenience.
+# ------------------------------------------------------------
+from __future__ import annotations
+
--- a/source/pytest.ini
+++ b/source/pytest.ini
@ -0,0 +1,4 @@
+[pytest]
+pythonpath = .
+testpaths =
+    core/tests