Level 3: Fix alpha miss-calculation that affected SNR

2026-02-16 00:10:05 +02:00 · 2026-02-16 00:10:05 +02:00 · 6b1ebc3cfe
commit 6b1ebc3cfe
parent 781cb3047f
11 changed files with 268 additions and 396 deletions
--- a/source/core/aac_coder.py
+++ b/source/core/aac_coder.py
@ -485,46 +485,16 @@ def aac_coder_3(
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
        sfc_R_dpcm = np.asarray(sfc_R, dtype=np.int64)[1:, ...]

-        # Codebook 11:
-        # maxAbsCodeVal = 16 is RESERVED for ESCAPE.
-        # We must stay strictly within [-15, +15] to avoid escape decoding.
-        # sf_cb = 11
-        # sf_max_abs = int(huff_LUT_list[sf_cb]["maxAbsCodeVal"]) - 1  # -> 15
-        #
-        # sfc_L_dpcm = np.clip(
-        #     sfc_L_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-        #
-        # sfc_R_dpcm = np.clip(
-        #     sfc_R_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-
-        sfc_L_stream, cb_sfc_L = aac_encode_huff(
-            sfc_L_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
-        sfc_R_stream, cb_sfc_R = aac_encode_huff(
-            sfc_R_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
+        # sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        # sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list)
+        sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list)

        if cb_sfc_L != 11 or cb_sfc_R != 11:
-            print (f"frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}")
+            raise ValueError(f"Illegal codebook value for frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}.")

-        mdct_L_stream, cb_L = aac_encode_huff(
-            np.asarray(S_L, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
-        mdct_R_stream, cb_R = aac_encode_huff(
-            np.asarray(S_R, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
+        mdct_L_stream, cb_L = aac_encode_huff(np.asarray(S_L, dtype=np.int64).reshape(-1), huff_LUT_list)
+        mdct_R_stream, cb_R = aac_encode_huff(np.asarray(S_R, dtype=np.int64).reshape(-1), huff_LUT_list)

        # Typed dict construction helps static analyzers validate the schema.
        frame_out: AACSeq3Frame = {
--- a/source/core/aac_quantizer.py
+++ b/source/core/aac_quantizer.py
@ -28,13 +28,8 @@ from core.aac_types import *
 # Constants (assignment)
 # -----------------------------------------------------------------------------
 MAGIC_NUMBER: float = 0.4054
-MQ: int = 8191
-
 EPS: float = 1e-12
-MAX_SFC_DIFF: int = 60
-
-# Safeguard: prevents infinite loops in pathological cases
-MAX_ALPHA_ITERS: int = 2000
+MAX_SF_DELTA:int  = 60


 # -----------------------------------------------------------------------------
@ -149,39 +144,35 @@ def _dequantize_symbol(S: QuantizedSymbols, alpha: float) -> FloatArray:
 # -----------------------------------------------------------------------------
 # Alpha initialization (Eq. 14)
 # -----------------------------------------------------------------------------
-def _alpha_initial_from_frame_max(x_all: FloatArray) -> int:
+def _initial_alpha_hat(X: "FloatArray", MQ: int = 8191) -> int:
    """
-    Compute the initial scalefactor gain alpha_hat for a frame.
+    Compute the initial scalefactor estimate alpha_hat for a frame.

-    Implements Eq. (14):
-        alpha_hat = (16/3) * log2( max_k(|X(k)|^(3/4)) / MQ )
+    The assignment proposes the following first approximation (Equation 14):

-    The same initial value is used for all bands before the per-band refinement.
+        alpha_hat = (16/3) * log2( max_k(|X(k)|)^(3/4) / MQ )
+
+    where max_k runs over all MDCT coefficients of the frame (not per band),
+    and MQ is the maximum quantization level parameter (2*MQ + 1 levels).

    Parameters
    ----------
-    x_all : FloatArray
-        All MDCT coefficients of a frame (one channel), flattened.
+    X : FloatArray
+        MDCT coefficients of one frame (or one ESH subframe), shape (N,).
+    MQ : int
+        Quantizer parameter (default 8191, as per assignment).

    Returns
    -------
    int
-        Initial alpha value (integer).
+        Integer alpha_hat (rounded to nearest integer).
    """
-    x_all = np.asarray(x_all, dtype=np.float64).reshape(-1)
-    if x_all.size == 0:
+    x_max = float(np.max(np.abs(X)))
+    if x_max <= 0.0:
        return 0

-    max_abs = float(np.max(np.abs(x_all)))
-    if max_abs <= 0.0:
-        return 0
-
-    val = (max_abs ** 0.75) / float(MQ)
-    if val <= 0.0:
-        return 0
-
-    alpha_hat = (16.0 / 3.0) * np.log2(val)
-    return int(np.floor(alpha_hat))
+    alpha_hat = (16.0 / 3.0) * np.log2((x_max ** (3.0 / 4.0)) / float(MQ))
+    return int(np.round(alpha_hat))


 # -----------------------------------------------------------------------------
@ -279,35 +270,38 @@ def _threshold_T_from_SMR(
 # Alpha selection per band + neighbor-difference constraint
 # -----------------------------------------------------------------------------
 def _best_alpha_for_band(
-    X: FloatArray,
-    lo: int,
-    hi: int,
-    T_b: float,
-    alpha0: int,
-    alpha_min: int,
-    alpha_max: int,
+    X: "FloatArray", lo: int, hi: int, T_b: float,
+    alpha_hat: int, alpha_prev: int, alpha_min: int, alpha_max: int,
 ) -> int:
    """
-    Determine the band-wise scalefactor alpha(b) by iteratively increasing alpha.
+    Determine the band-wise scalefactor alpha(b) following the assignment.

-    The algorithm increases alpha until the quantization error power exceeds
-    the band threshold:
+    Procedure:
+    - Start from a frame-wise initial estimate alpha_hat.
+    - Iteratively increase alpha(b) by 1 as long as the quantization error power
+      stays below the psychoacoustic threshold T(b):  P_e(b) = sum_{k in band} ( X(k) - Xhat(k) )^2

-        P_e(b) = sum_{k in band} (X(k) - Xhat(k))^2
-        select the largest alpha such that P_e(b) <= T(b)
+    - Stop increasing alpha(b) if the neighbor constraint would be violated: |alpha(b) - alpha(b-1)| <= 60
+      When processing bands sequentially (low -> high), this becomes: alpha(b) <= alpha_prev + 60
+
+    Notes:
+    - This function does not decrease alpha if the initial value already violates
+      the threshold; the assignment only specifies iterative increase.

    Parameters
    ----------
    X : FloatArray
-        Full MDCT vector (one frame or one subframe), shape (N,).
+        Full MDCT vector of the current (sub)frame, shape (N,).
    lo, hi : int
-        Inclusive MDCT index range defining the band.
+        Band index bounds (inclusive), defining the band slice.
    T_b : float
-        Psychoacoustic threshold for this band.
-    alpha0 : int
-        Initial alpha value for the band.
+        Threshold T(b) for this band.
+    alpha_hat : int
+        Initial frame-wise estimate (Equation 14).
+    alpha_prev : int
+        Previously selected alpha for band b-1 (neighbor constraint reference).
    alpha_min, alpha_max : int
-        Bounds for alpha, used as a safeguard.
+        Safeguard bounds for alpha.

    Returns
    -------
@ -315,81 +309,41 @@ def _best_alpha_for_band(
        Selected integer alpha(b).
    """
    if T_b <= 0.0:
-        return int(alpha0)
-
+        return int(alpha_hat)
    Xsec = X[lo : hi + 1]

-    alpha = int(alpha0)
-    alpha = max(alpha_min, min(alpha, alpha_max))
+    # Neighbor constraint (sequential processing): alpha(b) <= alpha_prev + 60
+    alpha_limit = min(int(alpha_max), int(alpha_prev) + MAX_SF_DELTA)
+
+    # Start from alpha_hat, clamped to feasible range
+    alpha = int(alpha_hat)
+    alpha = max(int(alpha_min), min(alpha, int(alpha_limit)))

    # Evaluate at current alpha
    Ssec = _quantize_symbol(Xsec, alpha)
    Xhat = _dequantize_symbol(Ssec, alpha)
    Pe = float(np.sum((Xsec - Xhat) ** 2))

+    # If already above threshold, return current alpha (no decrease step specified)
    if Pe > T_b:
        return alpha

-    iters = 0
-    while iters < MAX_ALPHA_ITERS:
-        iters += 1
+    # Increase alpha while still under threshold and within constraints
+    while True:
        alpha_next = alpha + 1
-        if alpha_next > alpha_max:
+        if alpha_next > alpha_limit:
            break
-
        Ssec = _quantize_symbol(Xsec, alpha_next)
        Xhat = _dequantize_symbol(Ssec, alpha_next)
        Pe_next = float(np.sum((Xsec - Xhat) ** 2))

        if Pe_next > T_b:
            break
-
        alpha = alpha_next
-        Pe = Pe_next

    return alpha


-def _enforce_max_diff(alpha: np.ndarray, max_diff: int = MAX_SFC_DIFF) -> np.ndarray:
-    """
-    Enforce neighbor constraint |alpha[b] - alpha[b-1]| <= max_diff by clamping.
-
-    Uses a forward pass and a backward pass to reduce drift.
-
-    Parameters
-    ----------
-    alpha : np.ndarray
-        Alpha vector, shape (NB,).
-    max_diff : int
-        Maximum allowed absolute difference between adjacent bands.
-
-    Returns
-    -------
-    np.ndarray
-        Clamped alpha vector, int64, shape (NB,).
-    """
-    a = np.asarray(alpha, dtype=np.int64).copy()
-    nb = a.shape[0]
-
-    for b in range(1, nb):
-        lo = int(a[b - 1] - max_diff)
-        hi = int(a[b - 1] + max_diff)
-        if a[b] < lo:
-            a[b] = lo
-        elif a[b] > hi:
-            a[b] = hi
-
-    for b in range(nb - 2, -1, -1):
-        lo = int(a[b + 1] - max_diff)
-        hi = int(a[b + 1] + max_diff)
-        if a[b] < lo:
-            a[b] = lo
-        elif a[b] > hi:
-            a[b] = hi
-
-    return a
-
-
 # -----------------------------------------------------------------------------
 # Public API
 # -----------------------------------------------------------------------------
@ -401,7 +355,16 @@ def aac_quantizer(
    """
    AAC quantizer for one channel (Level 3).

-    Quantizes MDCT coefficients using band-wise scalefactors derived from SMR.
+    Quantizes MDCT coefficients (after TNS) using band-wise scalefactors derived
+    from psychoacoustic thresholds computed via SMR.
+
+    The implementation follows the assignment procedure:
+    - Compute an initial frame-wise alpha_hat using Equation (14), based on the
+      maximum MDCT coefficient magnitude of the (sub)frame.
+    - For each band b, increase alpha(b) by 1 while the quantization error power
+      P_e(b) stays below the threshold T(b).
+    - Enforce the neighbor constraint |alpha(b) - alpha(b-1)| <= 60 during the
+      band-by-band search (no post-processing needed).

    Parameters
    ----------
@ -421,16 +384,19 @@ def aac_quantizer(
    Returns
    -------
    S : QuantizedSymbols
-        Quantized symbols S(k), shape (1024, 1) for all frame types.
+        Quantized symbols S(k), packed as shape (1024, 1) for all frame types.
+        For ESH, the 8 subframes are packed in column-major subframe layout.
    sfc : ScaleFactors
-        DPCM-coded scalefactor differences sfc(b) = alpha(b) - alpha(b-1).
+        DPCM-coded scalefactors:
+            sfc(0) = alpha(0) = G
+            sfc(b) = alpha(b) - alpha(b-1), for b > 0
        Shapes:
        - Long: (NB, 1)
        - ESH: (NB, 8)
    G : GlobalGain
        Global gain G = alpha(0).
        - Long: scalar float
-        - ESH: array shape (1, 8)
+        - ESH: array shape (1, 8), dtype float64
    """
    bands = _band_slices(frame_type)
    NB = len(bands)
@ -438,6 +404,9 @@ def aac_quantizer(
    X = np.asarray(frame_F, dtype=np.float64)
    SMR = np.asarray(SMR, dtype=np.float64)

+    # -------------------------------------------------------------------------
+    # ESH: 8 short subframes, each of length 128
+    # -------------------------------------------------------------------------
    if frame_type == "ESH":
        if X.shape != (128, 8):
            raise ValueError("For ESH, frame_F must have shape (128, 8).")
@ -446,42 +415,58 @@ def aac_quantizer(

        S_out: QuantizedSymbols = np.zeros((1024, 1), dtype=np.int64)
        sfc: ScaleFactors = np.zeros((NB, 8), dtype=np.int64)
-        G_arr = np.zeros((1, 8), dtype=np.int64)
+        G_arr = np.zeros((1, 8), dtype=np.float64)
+
+        # Packed output view: (128, 8) with column-major layout
+        S_pack = S_out[:, 0].reshape(128, 8, order="F")

        for j in range(8):
            Xj = X[:, j].reshape(128)
            SMRj = SMR[:, j].reshape(NB)

+            # Compute psychoacoustic threshold T(b) for this subframe
            T = _threshold_T_from_SMR(Xj, SMRj, bands)

-            alpha0 = _alpha_initial_from_frame_max(Xj)
-            alpha = np.full((NB,), alpha0, dtype=np.int64)
+            # Frame-wise initial estimate alpha_hat (Equation 14)
+            alpha_hat = _initial_alpha_hat(Xj)
+
+            # Band-wise scalefactors alpha(b)
+            alpha = np.zeros((NB,), dtype=np.int64)
+            alpha_prev = int(alpha_hat)

            for b, (lo, hi) in enumerate(bands):
-                alpha[b] = _best_alpha_for_band(
-                    X=Xj, lo=lo, hi=hi, T_b=float(T[b]),
-                    alpha0=int(alpha[b]),
+                alpha_b = _best_alpha_for_band(
+                    X=Xj,
+                    lo=lo,
+                    hi=hi,
+                    T_b=float(T[b]),
+                    alpha_hat=int(alpha_hat),
+                    alpha_prev=int(alpha_prev),
                    alpha_min=-4096,
                    alpha_max=4096,
                )
+                alpha[b] = int(alpha_b)
+                alpha_prev = int(alpha_b)

-            alpha = _enforce_max_diff(alpha, MAX_SFC_DIFF)
-
-            G_arr[0, j] = int(alpha[0])
+            # DPCM-coded scalefactors
+            G_arr[0, j] = float(alpha[0])
            sfc[0, j] = int(alpha[0])
            for b in range(1, NB):
                sfc[b, j] = int(alpha[b] - alpha[b - 1])

+            # Quantize MDCT coefficients band-by-band
            Sj = np.zeros((128,), dtype=np.int64)
            for b, (lo, hi) in enumerate(bands):
                Sj[lo : hi + 1] = _quantize_symbol(Xj[lo : hi + 1], float(alpha[b]))

-            # Place this subframe in the packed output (column-major subframe layout)
-            S_out[:, 0].reshape(128, 8, order="F")[:, j] = Sj
+            # Store subframe in packed output
+            S_pack[:, j] = Sj

-        return S_out, sfc, G_arr.astype(np.float64)
+        return S_out, sfc, G_arr

-    # Long frames
+    # -------------------------------------------------------------------------
+    # Long frames: OLS / LSS / LPS, length 1024
+    # -------------------------------------------------------------------------
    if X.shape == (1024,):
        Xv = X
    elif X.shape == (1024, 1):
@ -496,33 +481,44 @@ def aac_quantizer(
    else:
        raise ValueError(f"For non-ESH, SMR must have shape ({NB},) or ({NB}, 1).")

+    # Compute psychoacoustic threshold T(b) for the long frame
    T = _threshold_T_from_SMR(Xv, SMRv, bands)

-    alpha0 = _alpha_initial_from_frame_max(Xv)
-    alpha = np.full((NB,), alpha0, dtype=np.int64)
+    # Frame-wise initial estimate alpha_hat (Equation 14)
+    alpha_hat = _initial_alpha_hat(Xv)
+
+    # Band-wise scalefactors alpha(b)
+    alpha = np.zeros((NB,), dtype=np.int64)
+    alpha_prev = int(alpha_hat)

    for b, (lo, hi) in enumerate(bands):
-        alpha[b] = _best_alpha_for_band(
-            X=Xv, lo=lo, hi=hi, T_b=float(T[b]),
-            alpha0=int(alpha[b]),
+        alpha_b = _best_alpha_for_band(
+            X=Xv,
+            lo=lo,
+            hi=hi,
+            T_b=float(T[b]),
+            alpha_hat=int(alpha_hat),
+            alpha_prev=int(alpha_prev),
            alpha_min=-4096,
            alpha_max=4096,
        )
+        alpha[b] = int(alpha_b)
+        alpha_prev = int(alpha_b)

-    alpha = _enforce_max_diff(alpha, MAX_SFC_DIFF)
-
-    sfc: ScaleFactors = np.zeros((NB, 1), dtype=np.int64)
-    sfc[0, 0] = int(alpha[0])
+    # DPCM-coded scalefactors
+    sfc_out: ScaleFactors = np.zeros((NB, 1), dtype=np.int64)
+    sfc_out[0, 0] = int(alpha[0])
    for b in range(1, NB):
-        sfc[b, 0] = int(alpha[b] - alpha[b - 1])
+        sfc_out[b, 0] = int(alpha[b] - alpha[b - 1])

    G: float = float(alpha[0])

+    # Quantize MDCT coefficients band-by-band
    S_vec = np.zeros((1024,), dtype=np.int64)
    for b, (lo, hi) in enumerate(bands):
        S_vec[lo : hi + 1] = _quantize_symbol(Xv[lo : hi + 1], float(alpha[b]))

-    return S_vec.reshape(1024, 1), sfc, G
+    return S_vec.reshape(1024, 1), sfc_out, G


 def aac_i_quantizer(
--- a/source/core/tests/test_aac_coder_decoder.py
+++ b/source/core/tests/test_aac_coder_decoder.py
@ -310,5 +310,5 @@ def test_end_to_end_level_3_high_snr(mk_actual_stereo_wav: Path, tmp_path: Path)

    n = min(x_ref.shape[0], y_hat.shape[0])
    s = snr_db(x_ref[:n, :], y_hat[:n, :])
-    print(f"SNR={s}")
-    assert s > 2.0
+    # print(f" with SNR={s}")
+    assert s > 7.0
--- a/source/level_1/core/aac_coder.py
+++ b/source/level_1/core/aac_coder.py
@ -485,46 +485,16 @@ def aac_coder_3(
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
        sfc_R_dpcm = np.asarray(sfc_R, dtype=np.int64)[1:, ...]

-        # Codebook 11:
-        # maxAbsCodeVal = 16 is RESERVED for ESCAPE.
-        # We must stay strictly within [-15, +15] to avoid escape decoding.
-        # sf_cb = 11
-        # sf_max_abs = int(huff_LUT_list[sf_cb]["maxAbsCodeVal"]) - 1  # -> 15
-        #
-        # sfc_L_dpcm = np.clip(
-        #     sfc_L_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-        #
-        # sfc_R_dpcm = np.clip(
-        #     sfc_R_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-
-        sfc_L_stream, cb_sfc_L = aac_encode_huff(
-            sfc_L_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
-        sfc_R_stream, cb_sfc_R = aac_encode_huff(
-            sfc_R_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
+        # sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        # sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list)
+        sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list)

        if cb_sfc_L != 11 or cb_sfc_R != 11:
-            print (f"frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}")
+            raise ValueError(f"Illegal codebook value for frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}.")

-        mdct_L_stream, cb_L = aac_encode_huff(
-            np.asarray(S_L, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
-        mdct_R_stream, cb_R = aac_encode_huff(
-            np.asarray(S_R, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
+        mdct_L_stream, cb_L = aac_encode_huff(np.asarray(S_L, dtype=np.int64).reshape(-1), huff_LUT_list)
+        mdct_R_stream, cb_R = aac_encode_huff(np.asarray(S_R, dtype=np.int64).reshape(-1), huff_LUT_list)

        # Typed dict construction helps static analyzers validate the schema.
        frame_out: AACSeq3Frame = {
--- a/source/level_1/material/LicorDeCalandraca_out1.wav
+++ b/source/level_1/material/LicorDeCalandraca_out1.wav
--- a/source/level_2/core/aac_coder.py
+++ b/source/level_2/core/aac_coder.py
@ -485,46 +485,16 @@ def aac_coder_3(
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
        sfc_R_dpcm = np.asarray(sfc_R, dtype=np.int64)[1:, ...]

-        # Codebook 11:
-        # maxAbsCodeVal = 16 is RESERVED for ESCAPE.
-        # We must stay strictly within [-15, +15] to avoid escape decoding.
-        # sf_cb = 11
-        # sf_max_abs = int(huff_LUT_list[sf_cb]["maxAbsCodeVal"]) - 1  # -> 15
-        #
-        # sfc_L_dpcm = np.clip(
-        #     sfc_L_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-        #
-        # sfc_R_dpcm = np.clip(
-        #     sfc_R_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-
-        sfc_L_stream, cb_sfc_L = aac_encode_huff(
-            sfc_L_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
-        sfc_R_stream, cb_sfc_R = aac_encode_huff(
-            sfc_R_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
+        # sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        # sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list)
+        sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list)

        if cb_sfc_L != 11 or cb_sfc_R != 11:
-            print (f"frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}")
+            raise ValueError(f"Illegal codebook value for frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}.")

-        mdct_L_stream, cb_L = aac_encode_huff(
-            np.asarray(S_L, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
-        mdct_R_stream, cb_R = aac_encode_huff(
-            np.asarray(S_R, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
+        mdct_L_stream, cb_L = aac_encode_huff(np.asarray(S_L, dtype=np.int64).reshape(-1), huff_LUT_list)
+        mdct_R_stream, cb_R = aac_encode_huff(np.asarray(S_R, dtype=np.int64).reshape(-1), huff_LUT_list)

        # Typed dict construction helps static analyzers validate the schema.
        frame_out: AACSeq3Frame = {
--- a/source/level_2/material/LicorDeCalandraca_out2.wav
+++ b/source/level_2/material/LicorDeCalandraca_out2.wav
--- a/source/level_3/core/aac_coder.py
+++ b/source/level_3/core/aac_coder.py
@ -485,46 +485,16 @@ def aac_coder_3(
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
        sfc_R_dpcm = np.asarray(sfc_R, dtype=np.int64)[1:, ...]

-        # Codebook 11:
-        # maxAbsCodeVal = 16 is RESERVED for ESCAPE.
-        # We must stay strictly within [-15, +15] to avoid escape decoding.
-        # sf_cb = 11
-        # sf_max_abs = int(huff_LUT_list[sf_cb]["maxAbsCodeVal"]) - 1  # -> 15
-        #
-        # sfc_L_dpcm = np.clip(
-        #     sfc_L_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-        #
-        # sfc_R_dpcm = np.clip(
-        #     sfc_R_dpcm,
-        #     -sf_max_abs,
-        #     sf_max_abs,
-        # ).astype(np.int64, copy=False)
-
-        sfc_L_stream, cb_sfc_L = aac_encode_huff(
-            sfc_L_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
-        sfc_R_stream, cb_sfc_R = aac_encode_huff(
-            sfc_R_dpcm.reshape(-1, order="F"),
-            huff_LUT_list,
-            # force_codebook=11,
-        )
+        # sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        # sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list, force_codebook=11)
+        sfc_L_stream, cb_sfc_L = aac_encode_huff(sfc_L_dpcm.reshape(-1, order="F"), huff_LUT_list)
+        sfc_R_stream, cb_sfc_R = aac_encode_huff(sfc_R_dpcm.reshape(-1, order="F"), huff_LUT_list)

        if cb_sfc_L != 11 or cb_sfc_R != 11:
-            print (f"frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}")
+            raise ValueError(f"Illegal codebook value for frame: {i}: cb_sfc_l={cb_sfc_L}, cb_sfc_r={cb_sfc_R}.")

-        mdct_L_stream, cb_L = aac_encode_huff(
-            np.asarray(S_L, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
-        mdct_R_stream, cb_R = aac_encode_huff(
-            np.asarray(S_R, dtype=np.int64).reshape(-1),
-            huff_LUT_list,
-        )
+        mdct_L_stream, cb_L = aac_encode_huff(np.asarray(S_L, dtype=np.int64).reshape(-1), huff_LUT_list)
+        mdct_R_stream, cb_R = aac_encode_huff(np.asarray(S_R, dtype=np.int64).reshape(-1), huff_LUT_list)

        # Typed dict construction helps static analyzers validate the schema.
        frame_out: AACSeq3Frame = {
--- a/source/level_3/core/aac_quantizer.py
+++ b/source/level_3/core/aac_quantizer.py
@ -28,13 +28,8 @@ from core.aac_types import *
 # Constants (assignment)
 # -----------------------------------------------------------------------------
 MAGIC_NUMBER: float = 0.4054
-MQ: int = 8191
-
 EPS: float = 1e-12
-MAX_SFC_DIFF: int = 60
-
-# Safeguard: prevents infinite loops in pathological cases
-MAX_ALPHA_ITERS: int = 2000
+MAX_SF_DELTA:int  = 60


 # -----------------------------------------------------------------------------
@ -149,39 +144,35 @@ def _dequantize_symbol(S: QuantizedSymbols, alpha: float) -> FloatArray:
 # -----------------------------------------------------------------------------
 # Alpha initialization (Eq. 14)
 # -----------------------------------------------------------------------------
-def _alpha_initial_from_frame_max(x_all: FloatArray) -> int:
+def _initial_alpha_hat(X: "FloatArray", MQ: int = 8191) -> int:
    """
-    Compute the initial scalefactor gain alpha_hat for a frame.
+    Compute the initial scalefactor estimate alpha_hat for a frame.

-    Implements Eq. (14):
-        alpha_hat = (16/3) * log2( max_k(|X(k)|^(3/4)) / MQ )
+    The assignment proposes the following first approximation (Equation 14):

-    The same initial value is used for all bands before the per-band refinement.
+        alpha_hat = (16/3) * log2( max_k(|X(k)|)^(3/4) / MQ )
+
+    where max_k runs over all MDCT coefficients of the frame (not per band),
+    and MQ is the maximum quantization level parameter (2*MQ + 1 levels).

    Parameters
    ----------
-    x_all : FloatArray
-        All MDCT coefficients of a frame (one channel), flattened.
+    X : FloatArray
+        MDCT coefficients of one frame (or one ESH subframe), shape (N,).
+    MQ : int
+        Quantizer parameter (default 8191, as per assignment).

    Returns
    -------
    int
-        Initial alpha value (integer).
+        Integer alpha_hat (rounded to nearest integer).
    """
-    x_all = np.asarray(x_all, dtype=np.float64).reshape(-1)
-    if x_all.size == 0:
+    x_max = float(np.max(np.abs(X)))
+    if x_max <= 0.0:
        return 0

-    max_abs = float(np.max(np.abs(x_all)))
-    if max_abs <= 0.0:
-        return 0
-
-    val = (max_abs ** 0.75) / float(MQ)
-    if val <= 0.0:
-        return 0
-
-    alpha_hat = (16.0 / 3.0) * np.log2(val)
-    return int(np.floor(alpha_hat))
+    alpha_hat = (16.0 / 3.0) * np.log2((x_max ** (3.0 / 4.0)) / float(MQ))
+    return int(np.round(alpha_hat))


 # -----------------------------------------------------------------------------
@ -279,35 +270,38 @@ def _threshold_T_from_SMR(
 # Alpha selection per band + neighbor-difference constraint
 # -----------------------------------------------------------------------------
 def _best_alpha_for_band(
-    X: FloatArray,
-    lo: int,
-    hi: int,
-    T_b: float,
-    alpha0: int,
-    alpha_min: int,
-    alpha_max: int,
+    X: "FloatArray", lo: int, hi: int, T_b: float,
+    alpha_hat: int, alpha_prev: int, alpha_min: int, alpha_max: int,
 ) -> int:
    """
-    Determine the band-wise scalefactor alpha(b) by iteratively increasing alpha.
+    Determine the band-wise scalefactor alpha(b) following the assignment.

-    The algorithm increases alpha until the quantization error power exceeds
-    the band threshold:
+    Procedure:
+    - Start from a frame-wise initial estimate alpha_hat.
+    - Iteratively increase alpha(b) by 1 as long as the quantization error power
+      stays below the psychoacoustic threshold T(b):  P_e(b) = sum_{k in band} ( X(k) - Xhat(k) )^2

-        P_e(b) = sum_{k in band} (X(k) - Xhat(k))^2
-        select the largest alpha such that P_e(b) <= T(b)
+    - Stop increasing alpha(b) if the neighbor constraint would be violated: |alpha(b) - alpha(b-1)| <= 60
+      When processing bands sequentially (low -> high), this becomes: alpha(b) <= alpha_prev + 60
+
+    Notes:
+    - This function does not decrease alpha if the initial value already violates
+      the threshold; the assignment only specifies iterative increase.

    Parameters
    ----------
    X : FloatArray
-        Full MDCT vector (one frame or one subframe), shape (N,).
+        Full MDCT vector of the current (sub)frame, shape (N,).
    lo, hi : int
-        Inclusive MDCT index range defining the band.
+        Band index bounds (inclusive), defining the band slice.
    T_b : float
-        Psychoacoustic threshold for this band.
-    alpha0 : int
-        Initial alpha value for the band.
+        Threshold T(b) for this band.
+    alpha_hat : int
+        Initial frame-wise estimate (Equation 14).
+    alpha_prev : int
+        Previously selected alpha for band b-1 (neighbor constraint reference).
    alpha_min, alpha_max : int
-        Bounds for alpha, used as a safeguard.
+        Safeguard bounds for alpha.

    Returns
    -------
@ -315,81 +309,41 @@ def _best_alpha_for_band(
        Selected integer alpha(b).
    """
    if T_b <= 0.0:
-        return int(alpha0)
-
+        return int(alpha_hat)
    Xsec = X[lo : hi + 1]

-    alpha = int(alpha0)
-    alpha = max(alpha_min, min(alpha, alpha_max))
+    # Neighbor constraint (sequential processing): alpha(b) <= alpha_prev + 60
+    alpha_limit = min(int(alpha_max), int(alpha_prev) + MAX_SF_DELTA)
+
+    # Start from alpha_hat, clamped to feasible range
+    alpha = int(alpha_hat)
+    alpha = max(int(alpha_min), min(alpha, int(alpha_limit)))

    # Evaluate at current alpha
    Ssec = _quantize_symbol(Xsec, alpha)
    Xhat = _dequantize_symbol(Ssec, alpha)
    Pe = float(np.sum((Xsec - Xhat) ** 2))

+    # If already above threshold, return current alpha (no decrease step specified)
    if Pe > T_b:
        return alpha

-    iters = 0
-    while iters < MAX_ALPHA_ITERS:
-        iters += 1
+    # Increase alpha while still under threshold and within constraints
+    while True:
        alpha_next = alpha + 1
-        if alpha_next > alpha_max:
+        if alpha_next > alpha_limit:
            break
-
        Ssec = _quantize_symbol(Xsec, alpha_next)
        Xhat = _dequantize_symbol(Ssec, alpha_next)
        Pe_next = float(np.sum((Xsec - Xhat) ** 2))

        if Pe_next > T_b:
            break
-
        alpha = alpha_next
-        Pe = Pe_next

    return alpha


-def _enforce_max_diff(alpha: np.ndarray, max_diff: int = MAX_SFC_DIFF) -> np.ndarray:
-    """
-    Enforce neighbor constraint |alpha[b] - alpha[b-1]| <= max_diff by clamping.
-
-    Uses a forward pass and a backward pass to reduce drift.
-
-    Parameters
-    ----------
-    alpha : np.ndarray
-        Alpha vector, shape (NB,).
-    max_diff : int
-        Maximum allowed absolute difference between adjacent bands.
-
-    Returns
-    -------
-    np.ndarray
-        Clamped alpha vector, int64, shape (NB,).
-    """
-    a = np.asarray(alpha, dtype=np.int64).copy()
-    nb = a.shape[0]
-
-    for b in range(1, nb):
-        lo = int(a[b - 1] - max_diff)
-        hi = int(a[b - 1] + max_diff)
-        if a[b] < lo:
-            a[b] = lo
-        elif a[b] > hi:
-            a[b] = hi
-
-    for b in range(nb - 2, -1, -1):
-        lo = int(a[b + 1] - max_diff)
-        hi = int(a[b + 1] + max_diff)
-        if a[b] < lo:
-            a[b] = lo
-        elif a[b] > hi:
-            a[b] = hi
-
-    return a
-
-
 # -----------------------------------------------------------------------------
 # Public API
 # -----------------------------------------------------------------------------
@ -401,7 +355,16 @@ def aac_quantizer(
    """
    AAC quantizer for one channel (Level 3).

-    Quantizes MDCT coefficients using band-wise scalefactors derived from SMR.
+    Quantizes MDCT coefficients (after TNS) using band-wise scalefactors derived
+    from psychoacoustic thresholds computed via SMR.
+
+    The implementation follows the assignment procedure:
+    - Compute an initial frame-wise alpha_hat using Equation (14), based on the
+      maximum MDCT coefficient magnitude of the (sub)frame.
+    - For each band b, increase alpha(b) by 1 while the quantization error power
+      P_e(b) stays below the threshold T(b).
+    - Enforce the neighbor constraint |alpha(b) - alpha(b-1)| <= 60 during the
+      band-by-band search (no post-processing needed).

    Parameters
    ----------
@ -421,16 +384,19 @@ def aac_quantizer(
    Returns
    -------
    S : QuantizedSymbols
-        Quantized symbols S(k), shape (1024, 1) for all frame types.
+        Quantized symbols S(k), packed as shape (1024, 1) for all frame types.
+        For ESH, the 8 subframes are packed in column-major subframe layout.
    sfc : ScaleFactors
-        DPCM-coded scalefactor differences sfc(b) = alpha(b) - alpha(b-1).
+        DPCM-coded scalefactors:
+            sfc(0) = alpha(0) = G
+            sfc(b) = alpha(b) - alpha(b-1), for b > 0
        Shapes:
        - Long: (NB, 1)
        - ESH: (NB, 8)
    G : GlobalGain
        Global gain G = alpha(0).
        - Long: scalar float
-        - ESH: array shape (1, 8)
+        - ESH: array shape (1, 8), dtype float64
    """
    bands = _band_slices(frame_type)
    NB = len(bands)
@ -438,6 +404,9 @@ def aac_quantizer(
    X = np.asarray(frame_F, dtype=np.float64)
    SMR = np.asarray(SMR, dtype=np.float64)

+    # -------------------------------------------------------------------------
+    # ESH: 8 short subframes, each of length 128
+    # -------------------------------------------------------------------------
    if frame_type == "ESH":
        if X.shape != (128, 8):
            raise ValueError("For ESH, frame_F must have shape (128, 8).")
@ -446,42 +415,58 @@ def aac_quantizer(

        S_out: QuantizedSymbols = np.zeros((1024, 1), dtype=np.int64)
        sfc: ScaleFactors = np.zeros((NB, 8), dtype=np.int64)
-        G_arr = np.zeros((1, 8), dtype=np.int64)
+        G_arr = np.zeros((1, 8), dtype=np.float64)
+
+        # Packed output view: (128, 8) with column-major layout
+        S_pack = S_out[:, 0].reshape(128, 8, order="F")

        for j in range(8):
            Xj = X[:, j].reshape(128)
            SMRj = SMR[:, j].reshape(NB)

+            # Compute psychoacoustic threshold T(b) for this subframe
            T = _threshold_T_from_SMR(Xj, SMRj, bands)

-            alpha0 = _alpha_initial_from_frame_max(Xj)
-            alpha = np.full((NB,), alpha0, dtype=np.int64)
+            # Frame-wise initial estimate alpha_hat (Equation 14)
+            alpha_hat = _initial_alpha_hat(Xj)
+
+            # Band-wise scalefactors alpha(b)
+            alpha = np.zeros((NB,), dtype=np.int64)
+            alpha_prev = int(alpha_hat)

            for b, (lo, hi) in enumerate(bands):
-                alpha[b] = _best_alpha_for_band(
-                    X=Xj, lo=lo, hi=hi, T_b=float(T[b]),
-                    alpha0=int(alpha[b]),
+                alpha_b = _best_alpha_for_band(
+                    X=Xj,
+                    lo=lo,
+                    hi=hi,
+                    T_b=float(T[b]),
+                    alpha_hat=int(alpha_hat),
+                    alpha_prev=int(alpha_prev),
                    alpha_min=-4096,
                    alpha_max=4096,
                )
+                alpha[b] = int(alpha_b)
+                alpha_prev = int(alpha_b)

-            alpha = _enforce_max_diff(alpha, MAX_SFC_DIFF)
-
-            G_arr[0, j] = int(alpha[0])
+            # DPCM-coded scalefactors
+            G_arr[0, j] = float(alpha[0])
            sfc[0, j] = int(alpha[0])
            for b in range(1, NB):
                sfc[b, j] = int(alpha[b] - alpha[b - 1])

+            # Quantize MDCT coefficients band-by-band
            Sj = np.zeros((128,), dtype=np.int64)
            for b, (lo, hi) in enumerate(bands):
                Sj[lo : hi + 1] = _quantize_symbol(Xj[lo : hi + 1], float(alpha[b]))

-            # Place this subframe in the packed output (column-major subframe layout)
-            S_out[:, 0].reshape(128, 8, order="F")[:, j] = Sj
+            # Store subframe in packed output
+            S_pack[:, j] = Sj

-        return S_out, sfc, G_arr.astype(np.float64)
+        return S_out, sfc, G_arr

-    # Long frames
+    # -------------------------------------------------------------------------
+    # Long frames: OLS / LSS / LPS, length 1024
+    # -------------------------------------------------------------------------
    if X.shape == (1024,):
        Xv = X
    elif X.shape == (1024, 1):
@ -496,33 +481,44 @@ def aac_quantizer(
    else:
        raise ValueError(f"For non-ESH, SMR must have shape ({NB},) or ({NB}, 1).")

+    # Compute psychoacoustic threshold T(b) for the long frame
    T = _threshold_T_from_SMR(Xv, SMRv, bands)

-    alpha0 = _alpha_initial_from_frame_max(Xv)
-    alpha = np.full((NB,), alpha0, dtype=np.int64)
+    # Frame-wise initial estimate alpha_hat (Equation 14)
+    alpha_hat = _initial_alpha_hat(Xv)
+
+    # Band-wise scalefactors alpha(b)
+    alpha = np.zeros((NB,), dtype=np.int64)
+    alpha_prev = int(alpha_hat)

    for b, (lo, hi) in enumerate(bands):
-        alpha[b] = _best_alpha_for_band(
-            X=Xv, lo=lo, hi=hi, T_b=float(T[b]),
-            alpha0=int(alpha[b]),
+        alpha_b = _best_alpha_for_band(
+            X=Xv,
+            lo=lo,
+            hi=hi,
+            T_b=float(T[b]),
+            alpha_hat=int(alpha_hat),
+            alpha_prev=int(alpha_prev),
            alpha_min=-4096,
            alpha_max=4096,
        )
+        alpha[b] = int(alpha_b)
+        alpha_prev = int(alpha_b)

-    alpha = _enforce_max_diff(alpha, MAX_SFC_DIFF)
-
-    sfc: ScaleFactors = np.zeros((NB, 1), dtype=np.int64)
-    sfc[0, 0] = int(alpha[0])
+    # DPCM-coded scalefactors
+    sfc_out: ScaleFactors = np.zeros((NB, 1), dtype=np.int64)
+    sfc_out[0, 0] = int(alpha[0])
    for b in range(1, NB):
-        sfc[b, 0] = int(alpha[b] - alpha[b - 1])
+        sfc_out[b, 0] = int(alpha[b] - alpha[b - 1])

    G: float = float(alpha[0])

+    # Quantize MDCT coefficients band-by-band
    S_vec = np.zeros((1024,), dtype=np.int64)
    for b, (lo, hi) in enumerate(bands):
        S_vec[lo : hi + 1] = _quantize_symbol(Xv[lo : hi + 1], float(alpha[b]))

-    return S_vec.reshape(1024, 1), sfc, G
+    return S_vec.reshape(1024, 1), sfc_out, G


 def aac_i_quantizer(
--- a/source/level_3/material/LicorDeCalandraca_out2.wav
+++ b/source/level_3/material/LicorDeCalandraca_out2.wav
--- a/source/level_3/material/aac_seq_3.mat
+++ b/source/level_3/material/aac_seq_3.mat