Level 3: small fixes and report

2026-02-22 01:39:12 +02:00 · 2026-02-22 01:39:12 +02:00 · 17228f8539
commit 17228f8539
parent 6b1ebc3cfe
36 changed files with 4660 additions and 154 deletions
--- a/report/.gitignore
+++ b/report/.gitignore
@ -0,0 +1,6 @@
 # Report related files
 *.aux
 *.out
 *.log
 *.synctex.gz
 _minted-report/*
--- a/report/figures/bitrate_per_frame.png
+++ b/report/figures/bitrate_per_frame.png
--- a/report/figures/compression_per_frame.png
+++ b/report/figures/compression_per_frame.png
--- a/report/report.pdf
+++ b/report/report.pdf
--- a/report/report.tex
+++ b/report/report.tex
@ -0,0 +1,585 @@
 %
 % !TEX TS-program = xelatex
 % !TEX encoding   = UTF-8 Unicode
 % !TEX spellcheck = el-GR
 %
 % Information Systems Security assignment report
 %
 % Requires compilation with XeLaTeX
 %
 % authors:
 %   Χρήστος Χουτουρίδης ΑΕΜ 8997
 %   cchoutou@ece.auth.gr
 % Options:
 %
 % 1) mainlang=<language>
 %    Default:  english
 %    Set the default language of the document which affects hyphenations,
 %    localization (section, dates, etc...)
 %
 %    example: \documentclass[mainlang=greek]{AUThReport}
 %
 % 2) <language>
 %    Add hyphenation and typesetting support for other languages
 %    Currently supports: english, greek, german, frenc
 %
 %    example: \documentclass[english, greek]{AUThReport}
 %
 % 3) short: Requests a shorter title for the document
 %    Default: no short
 %
 %    example: \documentclass[short]{AUThReport}
 %
 \documentclass[a4paper, 11pt, mainlang=greek, english]{AUThReport/AUThReport}
 \CurrentDate{\today}
 % Greek report document setup suggestions
 %---------------------------------
 % Document configuration
 \AuthorName{Χρήστος Χουτουρίδης}
 \AuthorAEM{8997}
 \AuthorMail{cchoutou@ece.auth.gr}
 %\CoAuthorName{CoAuthor Name}
 %\CoAuthorAEM{AEM}
 %\CoAuthorMail{CoAuthor Mail}
 % \WorkGroup{Ομάδα Χ}
 \DocTitle{Εργασία Εξαμήνου}
 \DocSubTitle{Απλοποιημένη κωδικοποίηση-αποκωδικοποίηση AAC}
 \Department{Τμήμα ΗΜΜΥ. Τομέας Ηλεκτρονικής}
 \ClassName{Συστήματα Πολυμέσων και Εικονική Πραγματικότητα}
 \InstructorName{Αναστάσιος Ντελόπουλος}
 \InstructorMail{antelopo@ece.auth.gr}
 %\CoInstructorName{}
 %\CoInstructorMail{}
 % Local package requirements
 %---------------------------------
 %\usepackage{tabularx}
 %\usepackage{array}
 %\usepackage{commath}
 \usepackage{amsmath, amssymb, amsfonts}
 \usepackage{graphicx}
 \usepackage{caption}
 \usepackage{subcaption}
 \usepackage{float}
 \captionsetup[figure]{name=Εικόνα}
 % Requires: -shell-escape compile argument
 \usepackage{minted}
 \usepackage{xcolor} %
 \setminted[python]{
    fontsize=\small,
    breaklines,
    autogobble,
    baselinestretch=1.1,
    tabsize=2,
    numbersep=8pt,
    startinline,
    gobble=0
 }
 \setminted[bash]{
    fontsize=\small,
    breaklines,
    autogobble,
    baselinestretch=1.1,
    tabsize=2,
    numbersep=8pt,
    startinline,
    gobble=0
 }
 \newcommand{\repo}{https://git.hoo2.net/hoo2/Multimedia_AAC_Project}
 \begin{document}
 % Request a title page or header
 \InsertTitle
 \section{Εισαγωγή}
 Η παρούσα εργασία αφορά την υλοποίηση ενός απλοποιημένου κωδικοποιητή και αποκωδικοποιητή ήχου κατά το πρότυπο \textbf{Advanced Audio Coding} (AAC).
 Το AAC αποτελεί μία μέθοδο κωδικοποίησης μετασχηματισμού (transform coding) η οποία συνδυάζει ανάλυση στο πεδίο της συχνότητας, ψυχοακουστικό μοντέλο και κωδικοποίηση εντροπίας, με στόχο την αποδοτική συμπίεση ηχητικών σημάτων υψηλής ποιότητας.
 Η βασική αρχή λειτουργίας του βασίζεται στη μείωση της πλεονάζουσας πληροφορίας μέσω του μετασχηματισμού \textbf{MDCT} και στην εκμετάλλευση των ιδιοτήτων της ανθρώπινης ακοής ώστε να επιτρέπεται ελεγχόμενη απώλεια πληροφορίας που δεν είναι αντιληπτή.
 Στο πλαίσιο της εργασίας υλοποιούμε μία \textit{απλοποιημένη εκδοχή} του προτύπου, όπως αυτή περιγράφεται στην εκφώνηση, παραλείποντας ορισμένες βαθμίδες όπως το Mid/Side stereo και το Bit Reservoir, διατηρώντας όμως τον βασικό κορμό της αλυσίδας κωδικοποίησης.
 Συγκεκριμένα, αναπτύσσουμε διαδοχικά τις βαθμίδες Sequence Segmentation Control, Filterbank (MDCT/IMDCT), Temporal Noise Shaping, Psychoacoustic Model, Quantization και Huffman coding, καθώς και τις αντίστροφες διαδικασίες αποκωδικοποίησης.
 Στην παρούσα αναφορά περιγράφουμε τη θεωρητική βάση κάθε βαθμίδας, τον τρόπο υλοποίησής της και τα αποτελέσματα που προκύπτουν από την πειραματική αξιολόγηση του συστήματος.
 \subsection{Παραδοτέα}
 Τα παραδοτέα της εργασίας αποτελούνται από:
 \begin{itemize}
    \item Την παρούσα αναφορά.
    \item Τους καταλόγους \texttt{level\_<i>}, $i=1,2,3$ με τον κώδικα της εφαρμογής.
    Ο καθένας περιέχει ένα αντίστοιχο script \texttt{level\_<i>.py} που καλεί την demonstration συνάρτηση του αντίστοιχου level.
    \item Το \href{\repo}{σύνδεσμο} με το αποθετήριο που περιέχει όλο το project με τον κώδικα της εφαρμογής και της παρούσας αναφοράς.
 \end{itemize}
 \section{Υλοποίηση}
 Η υλοποίηση οργανώνεται \textbf{αρθρωτά}, με σαφή διαχωρισμό των επιμέρους λειτουργικών βαθμίδων, ώστε κάθε στάδιο της κωδικοποίησης και της αποκωδικοποίησης να μπορεί να ελεγχθεί και να επαληθευτεί ανεξάρτητα.
 Η δομή αυτή μας επιτρέπει να αναπτύξουμε σταδιακά τρία επίπεδα ολοκλήρωσης, από την πλήρως αναστρέψιμη μετασχηματιστική κωδικοποίηση έως την πλήρη απωλεστική κωδικοποίηση με ψυχοακουστικό έλεγχο και κωδικοποίηση εντροπίας.
 Οι επιμέρους βαθμίδες του κωδικοποιητή και του αποκωδικοποιητή υλοποιήθηκαν ως \textbf{ανεξάρτητα modules}, τα οποία επαναχρησιμοποιούνται σε κάθε κατάλογο \texttt{level\_<i>}.
 Αν και η δομή του παραδοτέου δίνει την εντύπωση ότι οι κατάλογοι των επιπέδων περιέχουν αντίγραφα των ίδιων αρχείων, στην πράξη αυτό δεν ισχύει.
 Η αρχική μας προσέγγιση ήταν η ύπαρξη ενός κεντρικού καταλόγου με κοινή υλοποίηση για όλα τα επίπεδα, ωστόσο οι περιορισμοί της εκφώνησης δεν επέτρεπαν τέτοια οργάνωση.
 Αντί της χρήσης υπο-αποθετηρίων, επιλέξαμε τη λύση των hard links, ώστε τα αρχεία του κοινού πυρήνα να εμφανίζονται σε κάθε κατάλογο επιπέδου χωρίς να υπάρχει πραγματικός διπλασιασμός του κώδικα.
 Με τον τρόπο αυτό διατηρούμε \textbf{ένα ενιαίο σημείο που συντηρείται ο κώδικας} και αποφεύγουμε αποκλίσεις μεταξύ των επιπέδων.
 Κάθε module αναπτύχθηκε ανεξάρτητα, με εκτεταμένη χρήση αυτοματοποιημένων δοκιμών.
 Η ανάπτυξη ακολούθησε προσέγγιση \textbf{Test-Driven Development}, όπου τα tests σχεδιάστηκαν με βάση το δημόσιο interface κάθε module και περιγράφουν τη λειτουργική του συμπεριφορά.
 Τα tests έχουν λειτουργικό και συμβατικό χαρακτήρα (functional and contract-based) και ελέγχουν σχήματα δεδομένων, αριθμητική σταθερότητα, οριακές περιπτώσεις και ιδιότητες αναστρεψιμότητας.
 Η πρακτική αυτή μας επέτρεψε να εκτελούμε εύκολα \textbf{regression tests} μετά από κάθε τροποποίηση, διασφαλίζοντας ότι δεν εισάγονται ανεπιθύμητες παρενέργειες σε άλλα μέρη του συστήματος.
 Δημιουργήσαμε επίσης wrapper module για το δοθέν module κωδικοποίησης Huffman.
 Κατά την ενσωμάτωσή του \textbf{εντοπίστηκε σφάλμα τύπου off-by-one} στον μηχανισμό αποκωδικοποίησης escape τιμών του codebook 11.
 Το σφάλμα αφορούσε τον χειρισμό του bit διαχωρισμού (delimiter) στη μορφή κωδικοποίησης των escape τιμών.
 Συγκεκριμένα, κατά την αποκωδικοποίηση μετρούνταν σωστά τα N διαδοχικά bits τιμής ‘1’, όμως το επόμενο bit ‘0’, που λειτουργεί ως διαχωριστής, δεν παραλειπόταν πριν την ανάγνωση των N+4 bits του payload.
 Ως αποτέλεσμα, το bit διαχωρισμού συμπεριλαμβανόταν εσφαλμένα στα bits της escape τιμής, οδηγώντας σε μετατόπιση κατά ένα bit και σε λανθασμένη ανακατασκευή του μεγέθους.
 Το σφάλμα αυτό δεν προκαλούσε εξαίρεση εκτέλεσης, αλλά αλλοίωνε τις διαφορές των scalefactors.
 Η διόρθωση συνίσταται στη σωστή κατανάλωση του bit διαχωρισμού πριν την ανάγνωση των N+4 bits της escape τιμής.
 Η προτεινόμενη αλλαγή κοινοποιήθηκε στους διδάσκοντες και έγινε αποδεκτή.
 Συνοπτικά, τα βασικά modules του συστήματος είναι τα ακόλουθα:
 \begin{itemize}
    \item \textbf{\texttt{aac\_ssc}}: Υλοποίηση της βαθμίδας Sequence Segmentation Control.
    \item \textbf{\texttt{aac\_filterbank}}: Υλοποίηση MDCT/IMDCT και διαχείριση παραθύρων.
    \item \textbf{\texttt{aac\_tns}}: Υλοποίηση Temporal Noise Shaping.
    \item \textbf{\texttt{aac\_psycho}}: Υλοποίηση ψυχοακουστικού μοντέλου και υπολογισμός SMR.
    \item \textbf{\texttt{aac\_quantizer}}: Υλοποίηση μη ομοιόμορφου κβαντιστή και αντίστροφης κβάντισης.
    \item \textbf{\texttt{aac\_huffman}}: Διαχείριση κωδικοποίησης και αποκωδικοποίησης Huffman.
    \item \textbf{\texttt{aac\_coder} / \texttt{aac\_decoder}}: Σύνθεση της πλήρους ροής κωδικοποίησης και αποκωδικοποίησης.
    \item \textbf{\texttt{aac\_utils}}: Βοηθητικές συναρτήσεις και μετρικές αξιολόγησης.
 \end{itemize}
 Για την επιτυχή εκτέλεση του κώδικα απαιτείται η εγκατάσταση των εξαρτήσεων.
 Για το λόγο αυτό από τον κεντρικό κατάλογο πρέπει να εκτελεστεί:
 \begin{minted}{bash}
    pip install -r requirements.txt
 \end{minted}
 Τα αυτοματοποιημένα tests δεν απαιτούνται από την εκφώνηση και δεν εκτελούνται από τα scripts των επιπέδων.
 Παρ' όλα αυτά, ο αναγνώστης μπορεί να τα εκτελέσει από τον κεντρικό κατάλογο του project:
 \begin{minted}{bash}
    pytest -v
    # to run a specific module (for example filterbank)
    pytest -v core/tests/test_filterbank.py
 \end{minted}
 \section{Level 1 -- SSC και MDCT}
 \subsection{Απαιτήσεις Level 1}
 Στο πρώτο επίπεδο ζητείται η υλοποίηση ενός πλήρως αναστρέψιμου συστήματος κωδικοποίησης--αποκωδικοποίησης βασισμένου στον μετασχηματισμό MDCT.
 Το επίπεδο αυτό δεν περιλαμβάνει ψυχοακουστικό μοντέλο, κβάντιση ή κωδικοποίηση εντροπίας.
 Στόχος είναι η σωστή υλοποίηση του Sequence Segmentation Control και του Filterbank, έτσι ώστε \textbf{η ανακατασκευή του σήματος να είναι αριθμητικά ταυτόσημη με το αρχικό}, μέχρι αριθμητική ακρίβεια κινητής υποδιαστολής.
 Το Level 1 λειτουργεί ως θεμέλιο για τα επόμενα επίπεδα, καθώς οποιοδήποτε σφάλμα στο μετασχηματιστικό στάδιο θα μεταφερόταν και θα ενισχυόταν στα επόμενα στάδια.
 \subsection{Modules που χρησιμοποιούνται}
 Για το Level 1 χρησιμοποιούνται τα εξής modules:
 \begin{itemize}
    \item \textbf{\texttt{aac\_ssc}} για την επιλογή τύπου frame.
    \item \textbf{\texttt{aac\_filterbank}} για MDCT/IMDCT και OLA.
    \item \textbf{\texttt{aac\_coder\_1}} και \textbf{\texttt{aac\_decoder\_1}} για τη σύνθεση της ροής.
    \item \textbf{\texttt{aac\_utils}} για βοηθητικές συναρτήσεις και υπολογισμό SNR.
 \end{itemize}
 \subsection{Sequence Segmentation Control}
 Η βαθμίδα SSC υλοποιεί τον μηχανισμό επιλογής τύπου παραθύρου (OLS, LSS, ESH, LPS) με βάση την ανίχνευση spikes (attack detection).
 Η ανίχνευση βασίζεται στον υπολογισμό της ενέργειας σε υπο-τμήματα μήκους 128 δειγμάτων και στον λόγο διαδοχικών ενεργειών.
 Η υλοποίηση είναι πλήρως συμμετρική για τα δύο κανάλια και ο τελικός τύπος frame προκύπτει από συγχώνευση των δύο καναλιών σύμφωνα με τον πίνακα μετάβασης.
 Επίσης, δόθηκε ιδιαίτερη προσοχή στη αριθμητική σταθερότητα, ώστε να αποφεύγονται διαιρέσεις με μηδενική ενέργεια.
 \subsection{Filterbank και MDCT}
 Το module \texttt{aac\_filterbank} υλοποιεί τον MDCT και τον αντίστροφό του IMDCT σε διακριτή μορφή.
 Χρησιμοποιούνται παράθυρα τύπου SIN και KBD, σύμφωνα με τον τύπο frame.
 Η υλοποίηση διαχειρίζεται σωστά τις μεταβάσεις μεταξύ παραθύρων, εφαρμόζοντας overlap-add (OLA) ώστε να επιτυγχάνεται perfect reconstruction σε steady state.
 Τόσο η ιδιότητα γραμμικότητας του μετασχηματισμού, όσο και η σχέση ενίσχυσης:
 \[
 \text{MDCT}(\text{IMDCT}(X)) \approx 2X
 \]
 ελέγχθηκαν αριθμητικά στο πλαίσιο των tests.
 Ενδεικτικά να αναφαίρουμε το παράδειγμα στον έλεγχος της σχέσης ενίσχυσης:
 \begin{minted}{python}
    X: MdctCoeffs     = rng.normal(size=K)
    x: TimeSignal     = imdct(X)
    X_hat: MdctCoeffs = mdct(x)
    _assert_allclose(X_hat, 2.0 * X, rtol=tolerance, atol=tolerance)
 \end{minted}
 \subsection{Φιλοσοφία Ελέγχου Ορθότητας}
 Ως γνωστόν, στον προγραμματισμό και στο quality assurance, \textbf{δεν μπορεί να υπάρχει απόλυτη απόδειξη ορθότητας}.
 Υπάρχει μόνο μείωση της πιθανότητας σφάλματος μέσω στοχευμένων ελέγχων.
 Για το Level 1 ελέγχθηκαν:
 \begin{itemize}
    \item Ιδιότητες γραμμικότητας MDCT/IMDCT.
    \item Απουσία NaN/Inf σε τυχαίες εισόδους.
    \item Σωστή διαχείριση μεταβάσεων παραθύρων.
    \item Ορθή συμπεριφορά SSC σε edge cases.
    \item End-to-end ταυτότητα σήματος με υψηλό SNR.
 \end{itemize}
 Τα tests μπορούν να εκτελεστούν με:
 \begin{minted}{bash}
    pytest -v core/tests/test_filterbank.py
    pytest -v core/tests/test_ssc.py
    pytest -v core/tests/test_utils.py
 \end{minted}
 Η επιτυχής εκτέλεση των παραπάνω tests διασφαλίζει ότι το Level 1 αποτελεί αριθμητικά σταθερό θεμέλιο για τα επόμενα επίπεδα.
 \subsection{Demo και Αποτελέσματα}
 Η demonstration του Level 1 μπορεί να εκτελεστεί με:
 \begin{minted}{bash}
    cd level_1
    python -m level_1 material/LicorDeCalandraca.wav material/LicorDeCalandraca_out_l1.wav
 \end{minted}
 Η έξοδος που προέκυψε ήταν:
 \begin{minted}{text}
    Encoding ...................... done
    Decoding ...................... done
    SNR = 257.437 dB
 \end{minted}
 Η τιμή SNR άνω των 250 dB \textbf{υποδηλώνει πρακτικά αριθμητική ταυτότητα μεταξύ αρχικού και ανακατασκευασμένου σήματος}.
 Η απόκλιση οφείλεται αποκλειστικά σε \textbf{αριθμητική ακρίβεια κινητής υποδιαστολής} και όχι σε δομικό σφάλμα της υλοποίησης.
 Το αποτέλεσμα αυτό επιβεβαιώνει ότι η υλοποίηση του Filterbank και του SSC είναι συνεπής και πλήρως αναστρέψιμη.
 \section{Level 2 – Temporal Noise Shaping}
 \subsection{Απαιτήσεις Level 2}
 Στο δεύτερο επίπεδο ζητείται η ενσωμάτωση της βαθμίδας \textbf{Temporal Noise Shaping (TNS)} στη ροή κωδικοποίησης.
 Το TNS εφαρμόζεται στο πεδίο των συντελεστών MDCT και βασίζεται σε γραμμική πρόβλεψη.
 Στόχος της βαθμίδας είναι η χρονική ανακατανομή του σφάλματος κβάντισης, ώστε το αντιληπτό σφάλμα να κατανέμεται χρονικά με πιο ευνοϊκό τρόπο.
 Στο Level 2 η διαδικασία παραμένει πλήρως αναστρέψιμη.
 Η κβάντιση των συντελεστών πρόβλεψης γίνεται με συγκεκριμένο βήμα και σε περιορισμένο εύρος τιμών, ώστε να διασφαλίζεται η σταθερότητα του αντίστροφου φίλτρου.
 \subsection{Modules που χρησιμοποιούνται}
 Για το Level 2 χρησιμοποιούνται:
 \begin{itemize}
    \item \textbf{\texttt{aac\_ssc}}.
    \item \textbf{\texttt{aac\_filterbank}}.
    \item \textbf{\texttt{aac\_tns}}.
    \item \textbf{\texttt{aac\_coder\_2}} και \textbf{\texttt{aac\_decoder\_2}}.
    \item \textbf{\texttt{aac\_utils}}.
 \end{itemize}
 Το νέο στοιχείο σε σχέση με το Level 1 είναι το module \texttt{aac\_tns}, το οποίο εφαρμόζεται μετά τον MDCT και πριν από την αντίστροφη διαδικασία στον αποκωδικοποιητή.
 \subsection{Υλοποίηση της βαθμίδας TNS}
 Για κάθε frame υπολογίζονται συντελεστές γραμμικής πρόβλεψης τάξης \texttt{PRED\_ORDER} (4).
 Στην περίπτωση ESH, η διαδικασία εφαρμόζεται ανεξάρτητα σε κάθε ένα από τα 8 υπο-frames.
 Οι συντελεστές πρόβλεψης κβαντίζονται με σταθερό βήμα \texttt{QUANT\_STEP} (0.1) και περιορίζονται στο διάστημα $[-\texttt{QUANT\_MAX}(-0.7), +\texttt{QUANT\_MAX}(+0.7)]$.
 Το φίλτρο που ορίζεται από τους συντελεστές $\{a_k\}$ έχει μορφή:
 \[
 H(z) = 1 - \sum_{k=1}^{p} a_k z^{-k}.
 \]
 Στον κωδικοποιητή το φίλτρο εφαρμόζεται σε μορφή \textbf{FIR}.
 Κάθε νέος συντελεστής MDCT προκύπτει αφαιρώντας γραμμικό συνδυασμό προηγούμενων συντελεστών.
 Η επιλογή FIR μορφής εξασφαλίζει αριθμητική σταθερότητα, καθώς δεν υπάρχει ανατροφοδότηση.
 Στον αποκωδικοποιητή εφαρμόζεται το αντίστροφο φίλτρο:
 \[
 H^{-1}(z) = \frac{1}{1 - \sum_{k=1}^{p} a_k z^{-k}},
 \]
 το οποίο έχει μορφή \textbf{IIR}.
 Η αντιστροφή υλοποιείται αναδρομικά, επαναφέροντας τους αρχικούς συντελεστές MDCT.
 Η διαδικασία αυτή είναι πλήρως αναστρέψιμη υπό την προϋπόθεση ότι το IIR φίλτρο είναι σταθερό.
 Η σταθερότητα απαιτεί όλες οι ρίζες του πολυωνύμου:
 \[
 z^p - a_1 z^{p-1} - \dots - a_p = 0
 \]
 να βρίσκονται \textbf{εντός του μοναδιαίου κύκλου}.
 Ακόμη και μικρή υπέρβαση της μοναδιαίας ακτίνας θα οδηγούσε σε εκθετική ενίσχυση κατά την αναδρομική εφαρμογή του φίλτρου και σε αριθμητική υπερχείλιση.
 \subsection{Φιλοσοφία Ελέγχου Ορθότητας}
 Όπως και στο Level 1, δεν υπάρχει απόλυτη μαθηματική απόδειξη ορθότητας.
 Για τη βαθμίδα TNS ελέγχθηκαν:
 \begin{itemize}
    \item Σωστά σχήματα εξόδου για long και ESH frames.
    \item Κβάντιση πάνω στο επιθυμητό πλέγμα τιμών.
    \item Περιορισμός συντελεστών στο επιτρεπτό εύρος.
    \item Ρητός έλεγχος σταθερότητας μέσω υπολογισμού ριζών.
    \item Ιδιότητα round-trip: $\text{iTNS}(\text{TNS}(X)) \approx X$.
    \item Απουσία NaN/Inf σε τυχαίες και οριακές εισόδους.
 \end{itemize}
 Ο έλεγχος σταθερότητας υλοποιείται αριθμητικά με την εξής λογική:
 \begin{minted}{python}
    poly = np.empty(p + 1)
    poly[0] = 1.0
    poly[1:] = -a_q
    roots = np.roots(poly)
    margin = 1e-12
    assert np.all(np.abs(roots) < (1.0 - margin))
 \end{minted}
 Η ιδιότητα αντιστρεψιμότητας ελέγχεται μέσω round-trip ελέγχου:
 \begin{minted}{python}
    frame_F_tns, coeffs = aac_tns(frame_F_in, frame_type)
    frame_F_hat = aac_i_tns(frame_F_tns, frame_type, coeffs)
    np.testing.assert_allclose(frame_F_hat, frame_F_in)
 \end{minted}
 Οι παραπάνω έλεγχοι δεν αποδεικνύουν μαθηματικά την ορθότητα, αλλά διασφαλίζουν ότι το ζεύγος FIR/IIR λειτουργεί ως ακριβές αντίστροφο εντός αριθμητικής ακρίβειας και ότι η κβάντιση δεν εισάγει αστάθεια.
 Τα ακριβή tests μπορούν να εκτελεστούν με:
 \begin{minted}{bash}
    pytest -v core/tests/test_tns.py
 \end{minted}
 \subsection{Demo και Αποτελέσματα}
 Η demonstration του Level 2 εκτελείται με:
 \begin{minted}{bash}
    cd level_2
    python -m level_2 material/LicorDeCalandraca.wav material/LicorDeCalandraca_out_l2.wav
 \end{minted}
 Η έξοδος που προέκυψε ήταν:
 \begin{minted}{text}
    Encoding ...................... done
    Decoding ...................... done
    SNR = 257.437 dB
 \end{minted}
 Η τιμή SNR είναι πρακτικά ταυτόσημη με εκείνη του Level 1.
 Το αποτέλεσμα αυτό επιβεβαιώνει ότι η ενσωμάτωση του TNS \textbf{δεν εισάγει απώλεια πληροφορίας στο συγκεκριμένο στάδιο}.
 Η υλοποίηση του FIR/IIR ζεύγους και ο έλεγχος σταθερότητας εξασφαλίζουν \textbf{πλήρη αναστρεψιμότητα} της διαδικασίας.
 \section{Level 3 – Πλήρης Απωλεστική Κωδικοποίηση}
 \subsection{Απαιτήσεις Level 3}
 Στο τρίτο επίπεδο ζητείται η υλοποίηση της \textbf{πλήρους απωλεστικής αλυσίδας κωδικοποίησης}.
 Προστίθενται το ψυχοακουστικό μοντέλο, ο μη ομοιόμορφος κβαντιστής και η κωδικοποίηση Huffman.
 Σε αντίθεση με τα προηγούμενα επίπεδα, το Level 3 \textbf{δεν είναι πλέον πλήρως αναστρέψιμο}.
 Η απώλεια πληροφορίας είναι συνειδητή και ελεγχόμενη, με βάση τις αρχές της ψυχοακουστικής απόκρυψης.
 Στόχος είναι η μείωση του bitrate διατηρώντας αποδεκτή αντιληπτή ποιότητα.
 \subsection{Modules που χρησιμοποιούνται}
 Για το Level 3 χρησιμοποιούνται όλα τα modules του συστήματος:
 \begin{itemize}
    \item \textbf{\texttt{aac\_ssc}}.
    \item \textbf{\texttt{aac\_filterbank}}.
    \item \textbf{\texttt{aac\_tns}}.
    \item \textbf{\texttt{aac\_psycho}}.
    \item \textbf{\texttt{aac\_quantizer}}.
    \item \textbf{\texttt{aac\_huffman}}.
    \item \textbf{\texttt{aac\_coder\_3}} και \textbf{\texttt{aac\_decoder\_3}}.
    \item \textbf{\texttt{aac\_utils}}.
 \end{itemize}
 Το Level 3 αποτελεί σύνθεση όλων των προηγούμενων βαθμίδων με προσθήκη κβάντισης και κωδικοποίησης εντροπίας.
 \subsection{Ψυχοακουστικό Μοντέλο}
 Το module \texttt{aac\_psycho} υπολογίζει το Signal-to-Mask Ratio (SMR) για κάθε Bark band.
 Η διαδικασία βασίζεται στους πίνακες B219a και B219b για long και short frames αντίστοιχα.
 Υπολογίζεται η ενέργεια ανά band, εφαρμόζεται spreading function και κατώφλι απόλυτης ακουστότητας.
 Το SMR καθορίζει το επιτρεπτό σφάλμα κβάντισης σε κάθε band.
 Δόθηκε ιδιαίτερη προσοχή:
 \begin{itemize}
    \item Στην αποφυγή διαίρεσης με μηδέν μέσω χρήσης μικρού όρου \texttt{EPS}.
    \item Στον έλεγχο ότι όλες οι ενδιάμεσες ποσότητες παραμένουν πεπερασμένες.
 \end{itemize}
 \subsection{Κβαντιστής και Scalefactors}
 Το module \texttt{aac\_quantizer} εφαρμόζει μη ομοιόμορφη κβάντιση των συντελεστών MDCT.
 Για κάθε band επιλέγεται scalefactor που ικανοποιεί το αντίστοιχο SMR.
 Οι scalefactors κωδικοποιούνται διαφορικά (DPCM).
 Η ανακατασκευή γίνεται αθροιστικά και εφαρμόζεται εκθετικός παράγοντας της μορφής:
 \[
 \hat{X} = \text{sign}(S) |S|^{4/3} \times 2^{\alpha/4}.
 \]
 Η συγκεκριμένη μορφή καθιστά το σύστημα \textbf{ιδιαίτερα ευαίσθητο σε αριθμητικά σφάλματα}.
 Για τον λόγο αυτό χρησιμοποιήθηκαν:
 \begin{itemize}
    \item Έλεγχοι πεπερασμένων τιμών (finite checks).
    \item Έλεγχος αποφυγής overflow.
    \item Προστασία με \texttt{EPS} σε παρονομαστές.
 \end{itemize}
 \subsection{Κωδικοποίηση Huffman}
 Οι κβαντισμένοι συντελεστές και τα DPCM scalefactors κωδικοποιούνται με Huffman.
 Το bitstream ανακατασκευάζεται πλήρως στον αποκωδικοποιητή.
 Ιδιαίτερη προσοχή δόθηκε:
 \begin{itemize}
    \item Στη σωστή \textbf{διαχείριση escape τιμών}.
    \item Στην ορθή \textbf{κατανάλωση bitstreams}.
    \item Στην \textbf{αποφυγή out-of-bounds} προσπελάσεων.
 \end{itemize}
 \subsection{Φιλοσοφία Ελέγχου Ορθότητας}
 Το Level 3 \textbf{δεν μπορεί να ελεγχθεί με κριτήριο ταυτότητας}.
 Ελέγχθηκε με κριτήρια λειτουργικής ορθότητας και αριθμητικής σταθερότητας.
 Συγκεκριμένα ελέγχθηκαν:
 \begin{itemize}
    \item Ορθότητα DPCM ανακατασκευής scalefactors.
    \item Συνεπής αντιστοίχιση ESH packing και unpacking.
    \item Απουσία NaN/Inf σε όλη τη ροή.
    \item Απουσία εκθετικής υπερχείλισης.
    \item Έλεγχος μηδενικού lag μεταξύ αρχικού και ανακατασκευασμένου σήματος.
    \item Έλεγχος μη ανταλλαγής καναλιών (L/R swap detection).
    \item Έλεγχος απουσίας clipping.
    \item Έλεγχος διατήρησης συνολικού gain.
 \end{itemize}
 Ο \textbf{έλεγχος lag} υλοποιείται μέσω εκτίμησης χρονικής μετατόπισης.
 Ο έλεγχος καναλιών διασφαλίζει ότι η \textbf{ενέργεια του αριστερού και δεξιού καναλιού δεν έχει ανταλλαγεί}.
 Οι έλεγχοι αυτοί διασφαλίζουν ότι η απώλεια ποιότητας οφείλεται αποκλειστικά στην κβάντιση και όχι σε δομικό σφάλμα.
 Τα tests μπορούν να εκτελεστούν με:
 \begin{minted}{bash}
    pytest -v core/tests/test_psycho.py
    pytest -v core/tests/test_quantizer.py
    pytest -v core/tests/test_huffman.py
    pytest -v core/tests/test_aac_coder_decoder.py
 \end{minted}
 \subsection{Demo και Αποτελέσματα}
 Η demonstration του Level 3 εκτελείται με:
 \begin{minted}{bash}
    cd level_3
    python -m level_3 material/LicorDeCalandraca.wav material/LicorDeCalandraca_out_l3.wav material/aac_seq_3.mat
 \end{minted}
 Η έξοδος που προέκυψε ήταν:
 \begin{minted}{text}
    Storing coded sequence to material/aac_seq_3.mat
    Encoding ...................... done
    Decoding ...................... done
    SNR = 9.454 dB
    Bitrate (coded) = 216710.22 bits/s
    Compression ratio = 7.0881
 \end{minted}
 Παρατηρούμε πως η τιμή SNR είναι σημαντικά χαμηλότερη σε σχέση με τα προηγούμενα επίπεδα, γεγονός αναμενόμενο λόγω απωλεστικής κβάντισης.
 Παρά τη μείωση του SNR, ελέγχθηκε ότι:
 \begin{itemize}
    \item Δεν υπάρχει χρονική μετατόπιση (lag).
    \item Δεν υπάρχει ανταλλαγή καναλιών.
    \item Δεν εμφανίζεται clipping.
    \item Δεν υπάρχει μη ελεγχόμενη ενίσχυση (gain drift).
 \end{itemize}
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=0.75\linewidth]{figures/bitrate_per_frame.png}
    \caption{Κατανομή του bitrate ανά frame.
        Παρατηρείται σημαντική διακύμανση που αντανακλά την προσαρμοστική φύση της κωδικοποίησης.}
    \label{fig:bitrate_per_frame}
 \end{figure}
 \begin{figure}[!ht]
    \centering
    \includegraphics[width=0.75\linewidth]{figures/compression_per_frame.png}
    \caption{Λόγος συμπίεσης ανά frame.
        Η μεταβολή του compression ratio επιβεβαιώνει τη δυναμική κατανομή bit ανάλογα με το περιεχόμενο του σήματος.}
    \label{fig:compression_per_frame}
 \end{figure}
 Η χαμηλή τιμή SNR δεν αντικατοπτρίζει πλήρως την αντιληπτή ποιότητα, καθώς το ψυχοακουστικό μοντέλο επιτρέπει μεγάλο ενεργειακό σφάλμα σε περιοχές όπου το σφάλμα καλύπτεται από το φαινόμενο απόκρυψης.
 Η παρατηρούμενη απώλεια αφορά κυρίως λεπτομέρειες υψηλών συχνοτήτων, χωρίς δομική αλλοίωση του σήματος.
 Το αποτέλεσμα επιβεβαιώνει ότι η υλοποίηση είναι αριθμητικά σταθερή και ότι η απώλεια ποιότητας είναι αποτέλεσμα ελεγχόμενης κβάντισης και όχι υλοποιητικού σφάλματος.
 Όπως φαίνεται στην Εικόνα~\ref{fig:bitrate_per_frame}, παρατηρείται σημαντική διακύμανση του bitrate μεταξύ διαδοχικών frames, με τιμές που κυμαίνονται περίπου \textbf{μεταξύ 160 kbps και 250 kbps} και σποραδικές κορυφές υψηλότερων τιμών.
 Η συμπεριφορά αυτή είναι αναμενόμενη, καθώς το ψυχοακουστικό μοντέλο κατανέμει περισσότερα bits σε χρονικές περιοχές με αυξημένη ενεργειακή ή φασματική πολυπλοκότητα.
 Αντίστοιχα, στην Εικόνα~\ref{fig:compression_per_frame} παρατηρούμε ότι ο λόγος συμπίεσης μεταβάλλεται δυναμικά, με \textbf{τυπικές τιμές μεταξύ 6:1 και 9:1}.
 Οι χαμηλότερες τιμές λόγου συμπίεσης αντιστοιχούν σε frames όπου απαιτούνται περισσότερα bits για την ικανοποίηση του SMR, ενώ οι υψηλότερες τιμές εμφανίζονται σε περιοχές μικρότερης φασματικής πυκνότητας.
 Η δυναμική αυτή συμπεριφορά επιβεβαιώνει ότι \textbf{η κωδικοποίηση δεν είναι σταθερού ρυθμού} (CBR), αλλά προσαρμοστική ως προς το περιεχόμενο του σήματος.
 Η συνολική μέση τιμή συμπίεσης περίπου \textbf{7:1} επιτυγχάνεται μέσω της συνδυασμένης λειτουργίας ψυχοακουστικής κβάντισης και κωδικοποίησης εντροπίας, χωρίς να παρατηρούνται δομικά σφάλματα όπως χρονική μετατόπιση, ανταλλαγή καναλιών ή ανεξέλεγκτη ενίσχυση.
 \section{Συμπεράσματα}
 Στην παρούσα εργασία υλοποιήσαμε μία απλοποιημένη αλλά πλήρως λειτουργική αλυσίδα κωδικοποίησης και αποκωδικοποίησης τύπου AAC, ακολουθώντας σταδιακή προσέγγιση τριών επιπέδων.
 Ξεκινήσαμε από μία αυστηρά αναστρέψιμη μετασχηματιστική κωδικοποίηση με MDCT και Sequence Segmentation Control, επεκτείναμε το σύστημα με Temporal Noise Shaping διατηρώντας την αριθμητική αντιστρεψιμότητα και ολοκληρώσαμε με την ενσωμάτωση ψυχοακουστικού μοντέλου, μη ομοιόμορφης κβάντισης και κωδικοποίησης εντροπίας.
 Η \textbf{αρθρωτή σχεδίαση} και η \textbf{εκτεταμένη χρήση αυτοματοποιημένων δοκιμών μας επέτρεψαν να ελέγχουμε ανεξάρτητα κάθε βαθμίδα και να περιορίζουμε συστηματικά την πιθανότητα σφαλμάτων}.
 Δώσαμε ιδιαίτερη έμφαση στη \textbf{αριθμητική σταθερότητα} του συστήματος.
 Εφαρμόσαμε ελέγχους σταθερότητας φίλτρων, προστασίες τύπου \texttt{EPS} σε παρονομαστές, ελέγχους πεπερασμένων τιμών, καθώς και ελέγχους απουσίας χρονικής μετατόπισης, ανταλλαγής καναλιών και clipping.
 Η φιλοσοφία αυτή αποδείχθηκε κρίσιμη στο Level 3, όπου μικρά αριθμητικά σφάλματα μπορούν να ενισχυθούν εκθετικά μέσω της απο-κβάντισης.
 Η συστηματική προσέγγιση testing και οι round-trip έλεγχοι διασφάλισαν ότι η παρατηρούμενη απώλεια ποιότητας οφείλεται αποκλειστικά στη σχεδιασμένη κβάντιση και όχι σε υλοποιητικά σφάλματα.
 Τα αποτελέσματα έδειξαν ότι το σύστημα επιτυγχάνει \textbf{λόγο συμπίεσης περίπου 7:1}, με μέσο \textbf{bitrate περίπου 216 kbps}, διατηρώντας αποδεκτή αντιληπτή ποιότητα.
 Η ανάλυση ανά frame κατέδειξε ότι η κατανομή bit είναι δυναμική και εξαρτάται από τη φασματική πολυπλοκότητα του σήματος, γεγονός που επιβεβαιώνει τη σωστή λειτουργία του ψυχοακουστικού μοντέλου.
 Η σημαντική πτώση του SNR στο Level 3 είναι αναμενόμενη σε ενεργειακούς όρους και δεν αντανακλά πλήρως την αντιληπτή ποιότητα, καθώς το σφάλμα κατανέμεται σύμφωνα με τα φαινόμενα απόκρυψης.
 Συνολικά, η υλοποίηση επιβεβαιώνει ότι ακόμη και ένα απλοποιημένο μοντέλο AAC μπορεί να επιτύχει ουσιαστική συμπίεση, εφόσον η σχεδίαση είναι προσεκτική και αριθμητικά συνεπής.
 \end{document}
--- a/source/core/aac_coder.py
+++ b/source/core/aac_coder.py
@ -111,21 +111,6 @@ def _thresholds_from_smr(
    return T
 def _normalize_global_gain(G: GlobalGain) -> float | FloatArray:
    """
    Normalize GlobalGain to match AACChannelFrameF3["G"] type:
    - long: return float
    - ESH: return float64 ndarray of shape (1, 8)
    """
    if np.isscalar(G):
        return float(G)
    G_arr = np.asarray(G)
    if G_arr.size == 1:
        return float(G_arr.reshape(-1)[0])
    return np.asarray(G_arr, dtype=np.float64)
 # -----------------------------------------------------------------------------
 # Public helpers (useful for level_x demo wrappers)
 # -----------------------------------------------------------------------------
@ -476,10 +461,6 @@ def aac_coder_3(
        S_L, sfc_L, G_L = aac_quantizer(chl_f_tns, frame_type, SMR_L)
        S_R, sfc_R, G_R = aac_quantizer(chr_f_tns, frame_type, SMR_R)
        # Normalize G types for AACSeq3 schema (float | float64 ndarray).
        G_Ln = _normalize_global_gain(G_L)
        G_Rn = _normalize_global_gain(G_R)
        # Huffman-code ONLY the DPCM differences for b>0.
        # sfc[0] corresponds to alpha(0)=G and is stored separately in the frame.
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
@ -503,7 +484,7 @@ def aac_coder_3(
            "chl": {
                "tns_coeffs": np.asarray(chl_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_L, dtype=np.float64),
-                "G": G_Ln,
+                "G": G_L,
                "sfc": sfc_L_stream,
                "stream": mdct_L_stream,
                "codebook": int(cb_L),
@ -511,7 +492,7 @@ def aac_coder_3(
            "chr": {
                "tns_coeffs": np.asarray(chr_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_R, dtype=np.float64),
-                "G": G_Rn,
+                "G": G_R,
                "sfc": sfc_R_stream,
                "stream": mdct_R_stream,
                "codebook": int(cb_R),
--- a/source/core/aac_decoder.py
+++ b/source/core/aac_decoder.py
@ -42,7 +42,7 @@ def _nbands(frame_type: FrameType) -> int:
 # Public helpers
 # -----------------------------------------------------------------------------
-def aac_unpack_seq_channels_to_frame_f(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
+def aac_unpack_seq_channels(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
    """
    Re-pack per-channel spectra from the Level-1 AACSeq1 schema into the stereo
    FrameF container expected by aac_i_filter_bank().
@ -167,7 +167,7 @@ def aac_decoder_1(
        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
-        frame_f: FrameF = aac_unpack_seq_channels_to_frame_f(frame_type, chl_f, chr_f)
+        frame_f: FrameF = aac_unpack_seq_channels(frame_type, chl_f, chr_f)
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
        start = i * hop
@ -427,7 +427,7 @@ def aac_decoder_3(
        X_R = aac_i_tns(Xq_R, frame_type, tns_R)
        # Re-pack to stereo container and inverse filterbank
-        frame_f = aac_unpack_seq_channels_to_frame_f(frame_type, np.asarray(X_L), np.asarray(X_R))
+        frame_f = aac_unpack_seq_channels(frame_type, np.asarray(X_L), np.asarray(X_R))
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)
        start = i * hop
--- a/source/core/aac_psycho.py
+++ b/source/core/aac_psycho.py
@ -189,7 +189,7 @@ def _predictability(
 # Band-domain aggregation
 # -----------------------------------------------------------------------------
-def _band_energy_and_weighted_predictability(
+def _band_energy_and_pred(
    r: FloatArray,
    c: FloatArray,
    wlow: BandIndexArray,
@ -238,7 +238,7 @@ def _band_energy_and_weighted_predictability(
    return e_b, c_num_b
-def _psycho_one_window(
+def _psycho_window(
    time_x: FrameChannelT,
    prev1_x: FrameChannelT,
    prev2_x: FrameChannelT,
@ -288,7 +288,7 @@ def _psycho_one_window(
    c_w = _predictability(r, phi, r_m1, phi_m1, r_m2, phi_m2)
    # Aggregate into psycho bands
-    e_b, c_num_b = _band_energy_and_weighted_predictability(r, c_w, wlow, whigh)
+    e_b, c_num_b = _band_energy_and_pred(r, c_w, wlow, whigh)
    # Spread energies and predictability across bands:
    # ecb(b) = sum_bb e(bb)      * S(bb, b)
@ -333,7 +333,7 @@ def _psycho_one_window(
 # ESH window slicing (match filterbank conventions)
 # -----------------------------------------------------------------------------
-def _esh_subframes_256(x_2048: FrameChannelT) -> list[FrameChannelT]:
+def _esh_subframes(x_2048: FrameChannelT) -> list[FrameChannelT]:
    """
    Extract the 8 overlapping 256-sample short windows used by AAC ESH.
@ -408,7 +408,7 @@ def aac_psycho(
    # Long frame types: compute one SMR vector (69 bands)
    if frame_type != "ESH":
-        return _psycho_one_window(frame_T, frame_T_prev_1, frame_T_prev_2, N=N, table=table)
+        return _psycho_window(frame_T, frame_T_prev_1, frame_T_prev_2, N=N, table=table)
    # ESH: compute 8 SMR vectors (42 bands each), one per short subframe.
    #
@ -419,8 +419,8 @@ def aac_psycho(
    #
    # This matches the "within-frame history" convention commonly used in
    # simplified psycho models for ESH.
-    cur_subs = _esh_subframes_256(frame_T)
+    cur_subs = _esh_subframes(frame_T)
-    prev1_subs = _esh_subframes_256(frame_T_prev_1)
+    prev1_subs = _esh_subframes(frame_T_prev_1)
    B = int(table.shape[0])  # expected 42
    smr_out = np.zeros((B, 8), dtype=np.float64)
@ -436,6 +436,6 @@ def aac_psycho(
            x_m1 = cur_subs[j - 1]
            x_m2 = cur_subs[j - 2]
-        smr_out[:, j] = _psycho_one_window(cur_subs[j], x_m1, x_m2, N=256, table=table)
+        smr_out[:, j] = _psycho_window(cur_subs[j], x_m1, x_m2, N=256, table=table)
    return smr_out
--- a/source/core/aac_quantizer.py
+++ b/source/core/aac_quantizer.py
@ -35,7 +35,7 @@ MAX_SF_DELTA:int  = 60
 # -----------------------------------------------------------------------------
 # Helpers: ESH packing/unpacking (128x8 <-> 1024x1)
 # -----------------------------------------------------------------------------
-def _esh_pack_to_1024(x_128x8: FloatArray) -> FloatArray:
+def _esh_pack(x_128x8: FloatArray) -> FloatArray:
    """
    Pack ESH coefficients (128 x 8) into a single long vector (1024 x 1).
@ -58,7 +58,7 @@ def _esh_pack_to_1024(x_128x8: FloatArray) -> FloatArray:
    return x_128x8.reshape(1024, 1, order="F")
-def _esh_unpack_from_1024(x_1024x1: FloatArray) -> FloatArray:
+def _esh_unpack(x_1024x1: FloatArray) -> FloatArray:
    """
    Unpack a packed ESH vector (1024 elements) back to shape (128, 8).
@ -226,7 +226,7 @@ def _band_energy(x: FloatArray, lo: int, hi: int) -> float:
    return float(np.sum(sec * sec))
-def _threshold_T_from_SMR(
+def _psychoacoustic_threshold(
    X: FloatArray,
    SMR_col: FloatArray,
    bands: list[tuple[int, int]],
@ -425,7 +425,7 @@ def aac_quantizer(
            SMRj = SMR[:, j].reshape(NB)
            # Compute psychoacoustic threshold T(b) for this subframe
-            T = _threshold_T_from_SMR(Xj, SMRj, bands)
+            T = _psychoacoustic_threshold(Xj, SMRj, bands)
            # Frame-wise initial estimate alpha_hat (Equation 14)
            alpha_hat = _initial_alpha_hat(Xj)
@ -482,7 +482,7 @@ def aac_quantizer(
        raise ValueError(f"For non-ESH, SMR must have shape ({NB},) or ({NB}, 1).")
    # Compute psychoacoustic threshold T(b) for the long frame
-    T = _threshold_T_from_SMR(Xv, SMRv, bands)
+    T = _psychoacoustic_threshold(Xv, SMRv, bands)
    # Frame-wise initial estimate alpha_hat (Equation 14)
    alpha_hat = _initial_alpha_hat(Xv)
@ -566,7 +566,7 @@ def aac_i_quantizer(
        if sfc.shape != (NB, 8):
            raise ValueError(f"For ESH, sfc must have shape ({NB}, 8).")
-        S_128x8 = _esh_unpack_from_1024(S_flat)
+        S_128x8 = _esh_unpack(S_flat)
        Xrec = np.zeros((128, 8), dtype=np.float64)
--- a/source/core/aac_tns.py
+++ b/source/core/aac_tns.py
@ -39,7 +39,7 @@ from core.aac_types import *
 # -----------------------------------------------------------------------------
-def _band_ranges_for_kcount(k_count: int) -> BandRanges:
+def _band_ranges(k_count: int) -> BandRanges:
    """
    Return Bark band index ranges [start, end] (inclusive) for the given MDCT line count.
@ -66,7 +66,7 @@ def _band_ranges_for_kcount(k_count: int) -> BandRanges:
    start = tbl[:, 1].astype(int)
    end = tbl[:, 2].astype(int)
-    ranges: list[tuple[int, int]] = [(int(s), int(e)) for s, e in zip(start, end)]
+    ranges: BandRanges = [(int(s), int(e)) for s, e in zip(start, end)]
    for s, e in ranges:
        if s < 0 or e < s or e >= k_count:
@ -117,7 +117,7 @@ def _compute_sw(x: MdctCoeffs) -> MdctCoeffs:
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    k_count = int(x.shape[0])
-    bands = _band_ranges_for_kcount(k_count)
+    bands = _band_ranges(k_count)
    sw = np.zeros(k_count, dtype=np.float64)
    for s, e in bands:
@ -347,7 +347,7 @@ def _apply_itns_iir(y: MdctCoeffs, a_q: MdctCoeffs) -> MdctCoeffs:
    return x_hat
-def _tns_one_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
+def _tns_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
    """
    TNS for a single MDCT vector (one long frame or one short subframe).
@ -430,7 +430,7 @@ def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChan
        a_out = np.empty((PRED_ORDER, 8), dtype=np.float64)
        for j in range(8):
-            y[:, j], a_out[:, j] = _tns_one_vector(x[:, j])
+            y[:, j], a_out[:, j] = _tns_vector(x[:, j])
        return y, a_out
@ -443,7 +443,7 @@ def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChan
    else:
        raise ValueError('For non-ESH, frame_F_in must have shape (1024,) or (1024, 1).')
-    y_vec, a_q = _tns_one_vector(x_vec)
+    y_vec, a_q = _tns_vector(x_vec)
    if out_shape == (1024,):
        y_out = y_vec
--- a/source/core/aac_utils.py
+++ b/source/core/aac_utils.py
@ -163,6 +163,42 @@ def snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    return float(10.0 * np.log10(ps / pn))
 def estimate_lag_mono(x_ref: TimeSignal, x_hat: TimeSignal, max_lag=4096):
    """
    Estimate time lag between two mono signals.
    Returns lag (positive means x_hat delayed).
    """
    n = min(len(x_ref), len(x_hat))
    x_ref = x_ref[:n]
    x_hat = x_hat[:n]
    corr = np.correlate(x_ref, x_hat, mode='full')
    lags = np.arange(-n + 1, n)
    center = n - 1
    lo = max(0, center - max_lag)
    hi = min(len(corr), center + max_lag + 1)
    best = lo + int(np.argmax(corr[lo:hi]))
    return int(lags[best])
 def match_gain(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    """
    Least-squares gain g that best maps x_hat -> x_ref.
    """
    n = min(x_ref.shape[0], x_hat.shape[0])
    c = min(x_ref.shape[1], x_hat.shape[1])
    r = x_ref[:n, :c].reshape(-1).astype(np.float64)
    h = x_hat[:n, :c].reshape(-1).astype(np.float64)
    denom = float(np.dot(h, h))
    if denom <= 0.0:
        return 1.0
    return float(np.dot(r, h) / denom)
 # -----------------------------------------------------------------------------
 # Psychoacoustic band tables (TableB219.mat)
 # -----------------------------------------------------------------------------
--- a/source/core/tests/test_aac_coder_decoder.py
+++ b/source/core/tests/test_aac_coder_decoder.py
@ -21,7 +21,7 @@ import soundfile as sf
 from core.aac_coder import aac_coder_1, aac_coder_2, aac_coder_3, aac_read_wav_stereo_48k
 from core.aac_decoder import aac_decoder_1, aac_decoder_2, aac_decoder_3, aac_remove_padding
-from core.aac_utils import snr_db
+from core.aac_utils import snr_db, estimate_lag_mono, match_gain
 from core.aac_types import *
@ -151,6 +151,29 @@ def test_aac_coder_seq_schema_and_shapes(mk_random_stereo_wav: Path) -> None:
            assert chl_f.shape == (1024, 1)
            assert chr_f.shape == (1024, 1)
@pytest.mark.parametrize("mk_actual_stereo_wav", [0.5], indirect=True)
 def test_level_1_gain_close_to_one(
    mk_actual_stereo_wav: Path,
    tmp_path: Path,
 ) -> None:
    """
    Guardrail: decoded signal should not have a large global gain mismatch.
    """
    x_ref, fs = aac_read_wav_stereo_48k(mk_actual_stereo_wav)
    assert int(fs) == 48000
    out_wav = tmp_path / "decoded_level3.wav"
    aac_seq_1: AACSeq1 = aac_coder_1(mk_actual_stereo_wav)
    y_hat: StereoSignal = aac_decoder_1(aac_seq_1, out_wav)
    n = min(x_ref.shape[0], y_hat.shape[0])
    x_ref = x_ref[:n, :]
    y_hat = y_hat[:n, :]
    g = match_gain(x_ref, y_hat)
    # print (f"g = {g}")
    # Allow some slack but catch big scaling regressions
    assert 0.75 <= g <= 1.25
@pytest.mark.parametrize("mk_random_stereo_wav", [0.5], indirect=True)
 def test_end_to_end_aac_coder_decoder_high_snr(mk_random_stereo_wav: Path, tmp_path: Path) -> None:
@ -207,6 +230,30 @@ def test_aac_coder_2_seq_schema_and_shapes(mk_random_stereo_wav: Path) -> None:
                assert coeffs.shape == (4, 1)
@pytest.mark.parametrize("mk_actual_stereo_wav", [0.5], indirect=True)
 def test_level_2_gain_close_to_one(
    mk_actual_stereo_wav: Path,
    tmp_path: Path,
 ) -> None:
    """
    Guardrail: decoded signal should not have a large global gain mismatch.
    """
    x_ref, fs = aac_read_wav_stereo_48k(mk_actual_stereo_wav)
    assert int(fs) == 48000
    out_wav = tmp_path / "decoded_level3.wav"
    aac_seq_2: AACSeq2 = aac_coder_2(mk_actual_stereo_wav)
    y_hat: StereoSignal = aac_decoder_2(aac_seq_2, out_wav)
    n = min(x_ref.shape[0], y_hat.shape[0])
    x_ref = x_ref[:n, :]
    y_hat = y_hat[:n, :]
    g = match_gain(x_ref, y_hat)
    # print (f"g = {g}")
    # Allow some slack but catch big scaling regressions
    assert 0.75 <= g <= 1.25
@pytest.mark.parametrize("mk_random_stereo_wav", [0.5], indirect=True)
 def test_end_to_end_level_2_high_snr(mk_random_stereo_wav: Path, tmp_path: Path) -> None:
    x_ref, fs = sf.read(str(mk_random_stereo_wav), always_2d=True)
@ -294,6 +341,90 @@ def test_aac_coder_3_seq_schema_and_shapes(mk_actual_stereo_wav: Path) -> None:
            else:
                assert np.isscalar(G)
@pytest.mark.parametrize("mk_actual_stereo_wav", [0.5], indirect=True)
 def test_level_3_estimated_lag(
    mk_actual_stereo_wav: Path,
    tmp_path: Path,
 ) -> None:
    """
    Check that the estimated lag between reference and decoded
    signal is small (zero), to prevent catastrophic misalignment.
    """
    x_ref, fs = aac_read_wav_stereo_48k(mk_actual_stereo_wav)
    assert int(fs) == 48000
    out_wav = tmp_path / "decoded_level3.wav"
    aac_seq_3: AACSeq3 = aac_coder_3(mk_actual_stereo_wav)
    y_hat: StereoSignal = aac_decoder_3(aac_seq_3, out_wav)
    # Use only common length
    n = min(x_ref.shape[0], y_hat.shape[0])
    x_ref = x_ref[:n, :]
    y_hat = y_hat[:n, :]
    lag_L = estimate_lag_mono(x_ref[:, 0], y_hat[:, 0], max_lag=4096)
    lag_R = estimate_lag_mono(x_ref[:, 1], y_hat[:, 1], max_lag=4096)
    # We allow zero latency
    assert abs(lag_L) == 0
    assert abs(lag_R) == 0
@pytest.mark.parametrize("mk_actual_stereo_wav", [0.5], indirect=True)
 def test_level_3_not_lr_swapped(
    mk_actual_stereo_wav: Path,
    tmp_path: Path,
 ) -> None:
    """
    Ensure the decoded stereo channels are not swapped.
    If channels are swapped, SNR against the original drops a lot,
    while SNR against swapped reference increases.
    """
    x_ref, fs = aac_read_wav_stereo_48k(mk_actual_stereo_wav)
    assert int(fs) == 48000
    out_wav = tmp_path / "decoded_level3.wav"
    aac_seq_3: AACSeq3 = aac_coder_3(mk_actual_stereo_wav)
    y_hat: StereoSignal = aac_decoder_3(aac_seq_3, out_wav)
    n = min(x_ref.shape[0], y_hat.shape[0])
    x_ref = x_ref[:n, :]
    y_hat = y_hat[:n, :]
    snr_normal = snr_db(x_ref, y_hat)
    snr_swapped = snr_db(x_ref[:, [1, 0]], y_hat)
    # If the decoded output were swapped, snr_swapped would be significantly higher.
    assert snr_normal >= snr_swapped
@pytest.mark.parametrize("mk_actual_stereo_wav", [0.5], indirect=True)
 def test_level_3_gain_close_to_one(
    mk_actual_stereo_wav: Path,
    tmp_path: Path,
 ) -> None:
    """
    Guardrail: decoded signal should not have a large global gain mismatch.
    """
    x_ref, fs = aac_read_wav_stereo_48k(mk_actual_stereo_wav)
    assert int(fs) == 48000
    out_wav = tmp_path / "decoded_level3.wav"
    aac_seq_3: AACSeq3 = aac_coder_3(mk_actual_stereo_wav)
    y_hat: StereoSignal = aac_decoder_3(aac_seq_3, out_wav)
    n = min(x_ref.shape[0], y_hat.shape[0])
    x_ref = x_ref[:n, :]
    y_hat = y_hat[:n, :]
    g = match_gain(x_ref, y_hat)
    # Allow some slack but catch big scaling regressions
    assert 0.75 <= g <= 1.25
@pytest.mark.parametrize("mk_actual_stereo_wav", [0.5], indirect=True)
 def test_end_to_end_level_3_high_snr(mk_actual_stereo_wav: Path, tmp_path: Path) -> None:
@ -310,5 +441,4 @@ def test_end_to_end_level_3_high_snr(mk_actual_stereo_wav: Path, tmp_path: Path)
    n = min(x_ref.shape[0], y_hat.shape[0])
    s = snr_db(x_ref[:n, :], y_hat[:n, :])
-    # print(f" with SNR={s}")
+    assert s > 8.0
    assert s > 7.0
--- a/source/level_1/core/aac_coder.py
+++ b/source/level_1/core/aac_coder.py
@ -111,21 +111,6 @@ def _thresholds_from_smr(
    return T
 def _normalize_global_gain(G: GlobalGain) -> float | FloatArray:
    """
    Normalize GlobalGain to match AACChannelFrameF3["G"] type:
    - long: return float
    - ESH: return float64 ndarray of shape (1, 8)
    """
    if np.isscalar(G):
        return float(G)
    G_arr = np.asarray(G)
    if G_arr.size == 1:
        return float(G_arr.reshape(-1)[0])
    return np.asarray(G_arr, dtype=np.float64)
 # -----------------------------------------------------------------------------
 # Public helpers (useful for level_x demo wrappers)
 # -----------------------------------------------------------------------------
@ -476,10 +461,6 @@ def aac_coder_3(
        S_L, sfc_L, G_L = aac_quantizer(chl_f_tns, frame_type, SMR_L)
        S_R, sfc_R, G_R = aac_quantizer(chr_f_tns, frame_type, SMR_R)
        # Normalize G types for AACSeq3 schema (float | float64 ndarray).
        G_Ln = _normalize_global_gain(G_L)
        G_Rn = _normalize_global_gain(G_R)
        # Huffman-code ONLY the DPCM differences for b>0.
        # sfc[0] corresponds to alpha(0)=G and is stored separately in the frame.
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
@ -503,7 +484,7 @@ def aac_coder_3(
            "chl": {
                "tns_coeffs": np.asarray(chl_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_L, dtype=np.float64),
-                "G": G_Ln,
+                "G": G_L,
                "sfc": sfc_L_stream,
                "stream": mdct_L_stream,
                "codebook": int(cb_L),
@ -511,7 +492,7 @@ def aac_coder_3(
            "chr": {
                "tns_coeffs": np.asarray(chr_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_R, dtype=np.float64),
-                "G": G_Rn,
+                "G": G_R,
                "sfc": sfc_R_stream,
                "stream": mdct_R_stream,
                "codebook": int(cb_R),
--- a/source/level_1/core/aac_decoder.py
+++ b/source/level_1/core/aac_decoder.py
@ -42,7 +42,7 @@ def _nbands(frame_type: FrameType) -> int:
 # Public helpers
 # -----------------------------------------------------------------------------
-def aac_unpack_seq_channels_to_frame_f(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
+def aac_unpack_seq_channels(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
    """
    Re-pack per-channel spectra from the Level-1 AACSeq1 schema into the stereo
    FrameF container expected by aac_i_filter_bank().
@ -167,7 +167,7 @@ def aac_decoder_1(
        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
-        frame_f: FrameF = aac_unpack_seq_channels_to_frame_f(frame_type, chl_f, chr_f)
+        frame_f: FrameF = aac_unpack_seq_channels(frame_type, chl_f, chr_f)
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
        start = i * hop
@ -427,7 +427,7 @@ def aac_decoder_3(
        X_R = aac_i_tns(Xq_R, frame_type, tns_R)
        # Re-pack to stereo container and inverse filterbank
-        frame_f = aac_unpack_seq_channels_to_frame_f(frame_type, np.asarray(X_L), np.asarray(X_R))
+        frame_f = aac_unpack_seq_channels(frame_type, np.asarray(X_L), np.asarray(X_R))
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)
        start = i * hop
--- a/source/level_1/core/aac_huffman.py
+++ b/source/level_1/core/aac_huffman.py
@ -0,0 +1,112 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Huffman wrappers (Level 3)
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Thin wrappers around the provided Huffman utilities (material/huff_utils.py)
 #   so that the API matches the assignment text.
 #
 # Exposed API (assignment):
 #   huff_sec, huff_codebook = aac_encode_huff(coeff_sec, huff_LUT_list, force_codebook)
 #   dec_coeffs = aac_decode_huff(huff_sec, huff_codebook, huff_LUT_list)
 #
 # Notes:
 #   - Huffman coding operates on tuples. Therefore, decode(encode(x)) may return
 #     extra trailing symbols due to tuple padding. The AAC decoder knows the
 #     true section length from side information (band limits) and truncates.
 # ------------------------------------------------------------
 from __future__ import annotations
 from typing import Any
 import numpy as np
 from material.huff_utils import encode_huff, decode_huff
 def aac_encode_huff(
    coeff_sec: np.ndarray,
    huff_LUT_list: list[dict[str, Any]],
    force_codebook: int | None = None,
 ) -> tuple[str, int]:
    """
    Huffman-encode a section of coefficients (MDCT symbols or scalefactors).
    Parameters
    ----------
    coeff_sec : np.ndarray
        Coefficient section to be encoded. Any shape is accepted; the input
        is flattened and treated as a 1-D sequence of int64 symbols.
    huff_LUT_list : list[dict[str, Any]]
        List of Huffman Look-Up Tables (LUTs) as returned by material.load_LUT().
        Index corresponds to codebook id (typically 1..11, with 0 reserved).
    force_codebook : int | None
        If provided, forces the use of this Huffman codebook. In the assignment,
        scalefactors are encoded with codebook 11. For MDCT coefficients, this
        argument is usually omitted (auto-selection).
    Returns
    -------
    tuple[str, int]
        (huff_sec, huff_codebook)
        - huff_sec: bitstream as a string of '0'/'1'
        - huff_codebook: codebook id used by the encoder
    """
    coeff_sec_arr = np.asarray(coeff_sec, dtype=np.int64).reshape(-1)
    if force_codebook is None:
        # Provided utility returns (bitstream, codebook) in the auto-selection case.
        huff_sec, huff_codebook = encode_huff(coeff_sec_arr, huff_LUT_list)
        return str(huff_sec), int(huff_codebook)
    # Provided utility returns ONLY the bitstream when force_codebook is set.
    cb = int(force_codebook)
    huff_sec = encode_huff(coeff_sec_arr, huff_LUT_list, force_codebook=cb)
    return str(huff_sec), cb
 def aac_decode_huff(
    huff_sec: str | np.ndarray,
    huff_codebook: int,
    huff_LUT: list[dict[str, Any]],
 ) -> np.ndarray:
    """
    Huffman-decode a bitstream using the specified codebook.
    Parameters
    ----------
    huff_sec : str | np.ndarray
        Huffman bitstream. Typically a string of '0'/'1'. If an array is provided,
        it is passed through to the provided decoder.
    huff_codebook : int
        Codebook id that was returned by aac_encode_huff.
        Codebook 0 represents an all-zero section.
    huff_LUT : list[dict[str, Any]]
        Huffman LUT list as returned by material.load_LUT().
    Returns
    -------
    np.ndarray
        Decoded coefficients as a 1-D np.int64 array.
        Note: Due to tuple coding, the decoded array may contain extra trailing
        padding symbols. The caller must truncate to the known section length.
    """
    cb = int(huff_codebook)
    if cb == 0:
        # Codebook 0 represents an all-zero section. The decoded length is not
        # recoverable from the bitstream alone; the caller must expand/truncate.
        return np.zeros((0,), dtype=np.int64)
    if cb < 0 or cb >= len(huff_LUT):
        raise ValueError(f"Invalid Huffman codebook index: {cb}")
    lut = huff_LUT[cb]
    dec = decode_huff(huff_sec, lut)
    return np.asarray(dec, dtype=np.int64).reshape(-1)
--- a/source/level_1/core/aac_psycho.py
+++ b/source/level_1/core/aac_psycho.py
@ -0,0 +1,441 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Psychoacoustic Model
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Psychoacoustic model for ONE channel, based on the assignment notes (Section 2.4).
 #
 #   Public API:
 #       SMR = aac_psycho(frame_T, frame_type, frame_T_prev_1, frame_T_prev_2)
 #
 #   Output:
 #     - For long frames ("OLS", "LSS", "LPS"): SMR has shape (69,)
 #     - For short frames ("ESH"): SMR has shape (42, 8)  (one column per subframe)
 #
 #   Notes:
 #   - Uses Bark band tables from material/TableB219.mat:
 #       * B219a for long windows (69 bands, N=2048 FFT, N/2=1024 bins)
 #       * B219b for short windows (42 bands, N=256 FFT,  N/2=128 bins)
 #   - Applies a Hann window in time domain before FFT magnitude/phase extraction.
 #   - Implements:
 #       spreading function -> band spreading -> tonality index -> masking thresholds -> SMR.
 # ------------------------------------------------------------
 from __future__ import annotations
 import numpy as np
 from core.aac_utils import band_limits, get_table
 from core.aac_configuration import NMT_DB, TMN_DB
 from core.aac_types import *
 # -----------------------------------------------------------------------------
 # Spreading function
 # -----------------------------------------------------------------------------
 def _spreading_matrix(bval: BandValueArray) -> FloatArray:
    """
    Compute the spreading function matrix between psychoacoustic bands.
    The spreading function describes how energy in one critical band masks
    nearby bands. The formula follows the assignment pseudo-code.
    Parameters
    ----------
    bval : BandValueArray
        Bark value per band, shape (B,).
    Returns
    -------
    FloatArray
        Spreading matrix S of shape (B, B), where:
          S[bb, b] quantifies the contribution of band bb masking band b.
    """
    bval = np.asarray(bval, dtype=np.float64).reshape(-1)
    B = int(bval.shape[0])
    spread = np.zeros((B, B), dtype=np.float64)
    for b in range(B):
        for bb in range(B):
            # tmpx depends on direction (asymmetric spreading)
            if bb >= b:
                tmpx = 3.0 * (bval[bb] - bval[b])
            else:
                tmpx = 1.5 * (bval[bb] - bval[b])
            # tmpz uses the "min(..., 0)" nonlinearity exactly as in the notes
            tmpz = 8.0 * min((tmpx - 0.5) ** 2 - 2.0 * (tmpx - 0.5), 0.0)
            tmpy = 15.811389 + 7.5 * (tmpx + 0.474) - 17.5 * np.sqrt(1.0 + (tmpx + 0.474) ** 2)
            # Clamp very small values (below -100 dB) to 0 contribution
            if tmpy < -100.0:
                spread[bb, b] = 0.0
            else:
                spread[bb, b] = 10.0 ** ((tmpz + tmpy) / 10.0)
    return spread
 # -----------------------------------------------------------------------------
 # Windowing + FFT feature extraction
 # -----------------------------------------------------------------------------
 def _hann_window(N: int) -> FloatArray:
    """
    Hann window as specified in the notes:
        w[n] = 0.5 - 0.5*cos(2*pi*(n + 0.5)/N)
    Parameters
    ----------
    N : int
        Window length.
    Returns
    -------
    FloatArray
        1-D array of shape (N,), dtype float64.
    """
    n = np.arange(N, dtype=np.float64)
    return 0.5 - 0.5 * np.cos((2.0 * np.pi / N) * (n + 0.5))
 def _r_phi_from_time(x: FrameChannelT, N: int) -> tuple[FloatArray, FloatArray]:
    """
    Compute FFT magnitude r(w) and phase phi(w) for bins w = 0 .. N/2-1.
    Processing:
    1) Apply Hann window in time domain.
    2) Compute N-point FFT.
    3) Keep only the positive-frequency bins [0 .. N/2-1].
    Parameters
    ----------
    x : FrameChannelT
        Time-domain samples, shape (N,).
    N : int
        FFT size (2048 or 256).
    Returns
    -------
    r : FloatArray
        Magnitude spectrum for bins 0 .. N/2-1, shape (N/2,).
    phi : FloatArray
        Phase spectrum for bins 0 .. N/2-1, shape (N/2,).
    """
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    if x.shape[0] != N:
        raise ValueError(f"Expected time vector of length {N}, got {x.shape[0]}.")
    w = _hann_window(N)
    X = np.fft.fft(x * w, n=N)
    Xp = X[: N // 2]
    r = np.abs(Xp).astype(np.float64, copy=False)
    phi = np.angle(Xp).astype(np.float64, copy=False)
    return r, phi
 def _predictability(
    r: FloatArray,
    phi: FloatArray,
    r_m1: FloatArray,
    phi_m1: FloatArray,
    r_m2: FloatArray,
    phi_m2: FloatArray,
 ) -> FloatArray:
    """
    Compute predictability c(w) per spectral bin.
    The notes define:
      r_pred(w)   = 2*r_{-1}(w)   - r_{-2}(w)
      phi_pred(w) = 2*phi_{-1}(w) - phi_{-2}(w)
      c(w) = |X(w) - X_pred(w)| / (r(w) + |r_pred(w)|)
    where X(w) is represented in polar form using r(w), phi(w).
    Parameters
    ----------
    r, phi : FloatArray
        Current magnitude and phase, shape (N/2,).
    r_m1, phi_m1 : FloatArray
        Previous magnitude and phase, shape (N/2,).
    r_m2, phi_m2 : FloatArray
        Pre-previous magnitude and phase, shape (N/2,).
    Returns
    -------
    FloatArray
        Predictability c(w), shape (N/2,).
    """
    r_pred = 2.0 * r_m1 - r_m2
    phi_pred = 2.0 * phi_m1 - phi_m2
    num = np.sqrt(
        (r * np.cos(phi) - r_pred * np.cos(phi_pred)) ** 2
      + (r * np.sin(phi) - r_pred * np.sin(phi_pred)) ** 2
    )
    den = r + np.abs(r_pred) + 1e-12  # avoid division-by-zero without altering behavior
    return (num / den).astype(np.float64, copy=False)
 # -----------------------------------------------------------------------------
 # Band-domain aggregation
 # -----------------------------------------------------------------------------
 def _band_energy_and_pred(
    r: FloatArray,
    c: FloatArray,
    wlow: BandIndexArray,
    whigh: BandIndexArray,
 ) -> tuple[FloatArray, FloatArray]:
    """
    Aggregate spectral bin quantities into psychoacoustic bands.
    Definitions (notes):
      e(b) = sum_{w=wlow(b)..whigh(b)} r(w)^2
      c_num(b) = sum_{w=wlow(b)..whigh(b)} c(w) * r(w)^2
    The band predictability c(b) is later computed after spreading as:
      cb(b) = ct(b) / ecb(b)
    Parameters
    ----------
    r : FloatArray
        Magnitude spectrum, shape (N/2,).
    c : FloatArray
        Predictability per bin, shape (N/2,).
    wlow, whigh : BandIndexArray
        Band limits (inclusive indices), shape (B,).
    Returns
    -------
    e_b : FloatArray
        Band energies e(b), shape (B,).
    c_num_b : FloatArray
        Weighted predictability numerators c_num(b), shape (B,).
    """
    r2 = (r * r).astype(np.float64, copy=False)
    B = int(wlow.shape[0])
    e_b = np.zeros(B, dtype=np.float64)
    c_num_b = np.zeros(B, dtype=np.float64)
    for b in range(B):
        a = int(wlow[b])
        z = int(whigh[b])
        seg_r2 = r2[a : z + 1]
        e_b[b] = float(np.sum(seg_r2))
        c_num_b[b] = float(np.sum(c[a : z + 1] * seg_r2))
    return e_b, c_num_b
 def _psycho_window(
    time_x: FrameChannelT,
    prev1_x: FrameChannelT,
    prev2_x: FrameChannelT,
    *,
    N: int,
    table: BarkTable,
 ) -> FloatArray:
    """
    Compute SMR for one FFT analysis window (N=2048 for long, N=256 for short).
    This implements the pipeline described in the notes:
      - FFT magnitude/phase
      - predictability per bin
      - band energies and predictability
      - band spreading
      - tonality index tb(b)
      - masking threshold (noise + threshold in quiet)
      - SMR(b) = e(b) / np(b)
    Parameters
    ----------
    time_x : FrameChannelT
        Current time-domain samples, shape (N,).
    prev1_x : FrameChannelT
        Previous time-domain samples, shape (N,).
    prev2_x : FrameChannelT
        Pre-previous time-domain samples, shape (N,).
    N : int
        FFT size.
    table : BarkTable
        Psychoacoustic band table (B219a or B219b).
    Returns
    -------
    FloatArray
        SMR per band, shape (B,).
    """
    wlow, whigh, bval, qthr_db = band_limits(table)
    spread = _spreading_matrix(bval)
    # FFT features for current and history windows
    r, phi = _r_phi_from_time(time_x, N)
    r_m1, phi_m1 = _r_phi_from_time(prev1_x, N)
    r_m2, phi_m2 = _r_phi_from_time(prev2_x, N)
    # Predictability per bin
    c_w = _predictability(r, phi, r_m1, phi_m1, r_m2, phi_m2)
    # Aggregate into psycho bands
    e_b, c_num_b = _band_energy_and_pred(r, c_w, wlow, whigh)
    # Spread energies and predictability across bands:
    # ecb(b) = sum_bb e(bb)      * S(bb, b)
    # ct(b)  = sum_bb c_num(bb)  * S(bb, b)
    ecb = spread.T @ e_b
    ct = spread.T @ c_num_b
    # Band predictability after spreading: cb(b) = ct(b) / ecb(b)
    cb = ct / (ecb + 1e-12)
    # Normalized energy term:
    # en(b) = ecb(b) / sum_bb S(bb, b)
    spread_colsum = np.sum(spread, axis=0)
    en = ecb / (spread_colsum + 1e-12)
    # Tonality index (clamped to [0, 1])
    tb = -0.299 - 0.43 * np.log(np.maximum(cb, 1e-12))
    tb = np.clip(tb, 0.0, 1.0)
    # Required SNR per band (dB): interpolate between TMN and NMT
    snr_b = tb * TMN_DB + (1.0 - tb) * NMT_DB
    bc = 10.0 ** (-snr_b / 10.0)
    # Noise masking threshold estimate (power domain)
    nb = en * bc
    # Threshold in quiet (convert from dB to power domain):
    # qthr_power = eps * (N/2) * 10^(qthr_db/10)
    qthr_power = np.finfo('float').eps * (N / 2.0) * (10.0 ** (qthr_db / 10.0))
    # Final masking threshold per band:
    # np(b) = max(nb(b), qthr(b))
    npart = np.maximum(nb, qthr_power)
    # Signal-to-mask ratio:
    # SMR(b) = e(b) / np(b)
    smr = e_b / (npart + 1e-12)
    return smr.astype(np.float64, copy=False)
 # -----------------------------------------------------------------------------
 # ESH window slicing (match filterbank conventions)
 # -----------------------------------------------------------------------------
 def _esh_subframes(x_2048: FrameChannelT) -> list[FrameChannelT]:
    """
    Extract the 8 overlapping 256-sample short windows used by AAC ESH.
    The project convention (matching the filterbank) is:
      start_j = 448 + 128*j,  for j = 0..7
      subframe_j = x[start_j : start_j + 256]
    This selects the central 1152-sample region [448, 1600) and produces
    8 windows with 50% overlap.
    Parameters
    ----------
    x_2048 : FrameChannelT
        Time-domain channel frame, shape (2048,).
    Returns
    -------
    list[FrameChannelT]
        List of 8 subframes, each of shape (256,).
    """
    x_2048 = np.asarray(x_2048, dtype=np.float64).reshape(-1)
    if x_2048.shape[0] != 2048:
        raise ValueError("ESH requires 2048-sample input frames.")
    subs: list[FrameChannelT] = []
    for j in range(8):
        start = 448 + 128 * j
        subs.append(x_2048[start : start + 256])
    return subs
 # -----------------------------------------------------------------------------
 # Public API
 # -----------------------------------------------------------------------------
 def aac_psycho(
    frame_T: FrameChannelT,
    frame_type: FrameType,
    frame_T_prev_1: FrameChannelT,
    frame_T_prev_2: FrameChannelT,
 ) -> FloatArray:
    """
    Psychoacoustic model for ONE channel.
    Parameters
    ----------
    frame_T : FrameChannelT
        Current time-domain channel frame, shape (2048,).
        For "ESH", the 8 short windows are derived internally.
    frame_type : FrameType
        AAC frame type ("OLS", "LSS", "ESH", "LPS").
    frame_T_prev_1 : FrameChannelT
        Previous time-domain channel frame, shape (2048,).
    frame_T_prev_2 : FrameChannelT
        Pre-previous time-domain channel frame, shape (2048,).
    Returns
    -------
    FloatArray
        Signal-to-Mask Ratio (SMR), per psychoacoustic band.
        - If frame_type == "ESH": shape (42, 8)
        - Else: shape (69,)
    """
    frame_T = np.asarray(frame_T, dtype=np.float64).reshape(-1)
    frame_T_prev_1 = np.asarray(frame_T_prev_1, dtype=np.float64).reshape(-1)
    frame_T_prev_2 = np.asarray(frame_T_prev_2, dtype=np.float64).reshape(-1)
    if frame_T.shape[0] != 2048 or frame_T_prev_1.shape[0] != 2048 or frame_T_prev_2.shape[0] != 2048:
        raise ValueError("aac_psycho expects 2048-sample frames for current/prev1/prev2.")
    table, N = get_table(frame_type)
    # Long frame types: compute one SMR vector (69 bands)
    if frame_type != "ESH":
        return _psycho_window(frame_T, frame_T_prev_1, frame_T_prev_2, N=N, table=table)
    # ESH: compute 8 SMR vectors (42 bands each), one per short subframe.
    #
    # The notes use short-window history for predictability:
    # - For j=0: use previous frame's subframes (7, 6)
    # - For j=1: use current subframe 0 and previous frame's subframe 7
    # - For j>=2: use current subframes (j-1, j-2)
    #
    # This matches the "within-frame history" convention commonly used in
    # simplified psycho models for ESH.
    cur_subs = _esh_subframes(frame_T)
    prev1_subs = _esh_subframes(frame_T_prev_1)
    B = int(table.shape[0])  # expected 42
    smr_out = np.zeros((B, 8), dtype=np.float64)
    for j in range(8):
        if j == 0:
            x_m1 = prev1_subs[7]
            x_m2 = prev1_subs[6]
        elif j == 1:
            x_m1 = cur_subs[0]
            x_m2 = prev1_subs[7]
        else:
            x_m1 = cur_subs[j - 1]
            x_m2 = cur_subs[j - 2]
        smr_out[:, j] = _psycho_window(cur_subs[j], x_m1, x_m2, N=256, table=table)
    return smr_out
--- a/source/level_1/core/aac_quantizer.py
+++ b/source/level_1/core/aac_quantizer.py
@ -0,0 +1,600 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Quantizer / iQuantizer (Level 3)
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Implements AAC quantizer and inverse quantizer for one channel.
 #   Based on assignment section 2.6 (Eq. 12-15).
 #
 # Notes:
 #   - Bit reservoir is not implemented (assignment simplification).
 #   - Scalefactor bands are assumed equal to psychoacoustic bands
 #     (Table B.2.1.9a / B.2.1.9b from TableB219.mat).
 # ------------------------------------------------------------
 from __future__ import annotations
 import numpy as np
 from core.aac_utils import get_table, band_limits
 from core.aac_types import *
 # -----------------------------------------------------------------------------
 # Constants (assignment)
 # -----------------------------------------------------------------------------
 MAGIC_NUMBER: float = 0.4054
 EPS: float = 1e-12
 MAX_SF_DELTA:int  = 60
 # -----------------------------------------------------------------------------
 # Helpers: ESH packing/unpacking (128x8 <-> 1024x1)
 # -----------------------------------------------------------------------------
 def _esh_pack(x_128x8: FloatArray) -> FloatArray:
    """
    Pack ESH coefficients (128 x 8) into a single long vector (1024 x 1).
    Packing order:
        Columns are concatenated in subframe order (0..7), column-major.
    Parameters
    ----------
    x_128x8 : FloatArray
        ESH coefficients, shape (128, 8).
    Returns
    -------
    FloatArray
        Packed coefficients, shape (1024, 1).
    """
    x_128x8 = np.asarray(x_128x8, dtype=np.float64)
    if x_128x8.shape != (128, 8):
        raise ValueError("ESH pack expects shape (128, 8).")
    return x_128x8.reshape(1024, 1, order="F")
 def _esh_unpack(x_1024x1: FloatArray) -> FloatArray:
    """
    Unpack a packed ESH vector (1024 elements) back to shape (128, 8).
    Parameters
    ----------
    x_1024x1 : FloatArray
        Packed ESH vector, shape (1024,) or (1024, 1) after flattening.
    Returns
    -------
    FloatArray
        Unpacked ESH coefficients, shape (128, 8).
    """
    x_1024x1 = np.asarray(x_1024x1, dtype=np.float64).reshape(-1)
    if x_1024x1.shape[0] != 1024:
        raise ValueError("ESH unpack expects 1024 elements.")
    return x_1024x1.reshape(128, 8, order="F")
 # -----------------------------------------------------------------------------
 # Core quantizer formulas (Eq. 12, Eq. 13)
 # -----------------------------------------------------------------------------
 def _quantize_symbol(x: FloatArray, alpha: float) -> QuantizedSymbols:
    """
    Quantize MDCT coefficients to integer symbols S(k).
    Implements Eq. (12):
        S(k) = sgn(X(k)) * int( (|X(k)| * 2^(-alpha/4))^(3/4) + MAGIC_NUMBER )
    Parameters
    ----------
    x : FloatArray
        MDCT coefficients for a contiguous set of spectral lines.
        Shape: (N,)
    alpha : float
        Scalefactor gain for the corresponding scalefactor band.
    Returns
    -------
    QuantizedSymbols
        Quantized symbols S(k) as int64, shape (N,).
    """
    x = np.asarray(x, dtype=np.float64)
    scale = 2.0 ** (-0.25 * float(alpha))
    ax = np.abs(x) * scale
    y = np.power(ax, 0.75, dtype=np.float64)
    # "int" in the assignment corresponds to truncation.
    q = np.floor(y + MAGIC_NUMBER).astype(np.int64)
    return (np.sign(x).astype(np.int64) * q).astype(np.int64)
 def _dequantize_symbol(S: QuantizedSymbols, alpha: float) -> FloatArray:
    """
    Inverse quantizer (dequantization of symbols).
    Implements Eq. (13):
        Xhat(k) = sgn(S(k)) * |S(k)|^(4/3) * 2^(alpha/4)
    Parameters
    ----------
    S : QuantizedSymbols
        Quantized symbols S(k), int64, shape (N,).
    alpha : float
        Scalefactor gain for the corresponding scalefactor band.
    Returns
    -------
    FloatArray
        Reconstructed MDCT coefficients Xhat(k), float64, shape (N,).
    """
    S = np.asarray(S, dtype=np.int64)
    scale = 2.0 ** (0.25 * float(alpha))
    aS = np.abs(S).astype(np.float64)
    y = np.power(aS, 4.0 / 3.0, dtype=np.float64)
    return (np.sign(S).astype(np.float64) * y * scale).astype(np.float64)
 # -----------------------------------------------------------------------------
 # Alpha initialization (Eq. 14)
 # -----------------------------------------------------------------------------
 def _initial_alpha_hat(X: "FloatArray", MQ: int = 8191) -> int:
    """
    Compute the initial scalefactor estimate alpha_hat for a frame.
    The assignment proposes the following first approximation (Equation 14):
        alpha_hat = (16/3) * log2( max_k(|X(k)|)^(3/4) / MQ )
    where max_k runs over all MDCT coefficients of the frame (not per band),
    and MQ is the maximum quantization level parameter (2*MQ + 1 levels).
    Parameters
    ----------
    X : FloatArray
        MDCT coefficients of one frame (or one ESH subframe), shape (N,).
    MQ : int
        Quantizer parameter (default 8191, as per assignment).
    Returns
    -------
    int
        Integer alpha_hat (rounded to nearest integer).
    """
    x_max = float(np.max(np.abs(X)))
    if x_max <= 0.0:
        return 0
    alpha_hat = (16.0 / 3.0) * np.log2((x_max ** (3.0 / 4.0)) / float(MQ))
    return int(np.round(alpha_hat))
 # -----------------------------------------------------------------------------
 # Band utilities
 # -----------------------------------------------------------------------------
 def _band_slices(frame_type: FrameType) -> list[tuple[int, int]]:
    """
    Return scalefactor band ranges [wlow, whigh] (inclusive) for the given frame type.
    These are derived from the psychoacoustic tables (TableB219),
    and map directly to MDCT indices:
      - long: 0..1023
      - short (ESH subframe): 0..127
    Parameters
    ----------
    frame_type : FrameType
        Frame type ("OLS", "LSS", "ESH", "LPS").
    Returns
    -------
    list[tuple[int, int]]
        List of (lo, hi) inclusive index pairs for each band.
    """
    table, _Nfft = get_table(frame_type)
    wlow, whigh, _bval, _qthr_db = band_limits(table)
    bands: list[tuple[int, int]] = []
    for lo, hi in zip(wlow, whigh):
        bands.append((int(lo), int(hi)))
    return bands
 def _band_energy(x: FloatArray, lo: int, hi: int) -> float:
    """
    Compute energy of a spectral segment x[lo:hi+1].
    Parameters
    ----------
    x : FloatArray
        MDCT coefficient vector.
    lo, hi : int
        Inclusive index range.
    Returns
    -------
    float
        Sum of squares (energy) within the band.
    """
    sec = x[lo : hi + 1]
    return float(np.sum(sec * sec))
 def _psychoacoustic_threshold(
    X: FloatArray,
    SMR_col: FloatArray,
    bands: list[tuple[int, int]],
 ) -> FloatArray:
    """
    Compute psychoacoustic thresholds T(b) per band.
    Uses:
        P(b) = sum_{k in band} X(k)^2
        T(b) = P(b) / SMR(b)
    Parameters
    ----------
    X : FloatArray
        MDCT coefficients for a frame (long) or one ESH subframe (short).
    SMR_col : FloatArray
        SMR values for this frame/subframe, shape (NB,).
    bands : list[tuple[int, int]]
        Band index ranges.
    Returns
    -------
    FloatArray
        Threshold vector T(b), shape (NB,).
    """
    nb = len(bands)
    T = np.zeros((nb,), dtype=np.float64)
    for b, (lo, hi) in enumerate(bands):
        P = _band_energy(X, lo, hi)
        smr = float(SMR_col[b])
        if smr <= EPS:
            T[b] = 0.0
        else:
            T[b] = P / smr
    return T
 # -----------------------------------------------------------------------------
 # Alpha selection per band + neighbor-difference constraint
 # -----------------------------------------------------------------------------
 def _best_alpha_for_band(
    X: "FloatArray", lo: int, hi: int, T_b: float,
    alpha_hat: int, alpha_prev: int, alpha_min: int, alpha_max: int,
 ) -> int:
    """
    Determine the band-wise scalefactor alpha(b) following the assignment.
    Procedure:
    - Start from a frame-wise initial estimate alpha_hat.
    - Iteratively increase alpha(b) by 1 as long as the quantization error power
      stays below the psychoacoustic threshold T(b):  P_e(b) = sum_{k in band} ( X(k) - Xhat(k) )^2
    - Stop increasing alpha(b) if the neighbor constraint would be violated: |alpha(b) - alpha(b-1)| <= 60
      When processing bands sequentially (low -> high), this becomes: alpha(b) <= alpha_prev + 60
    Notes:
    - This function does not decrease alpha if the initial value already violates
      the threshold; the assignment only specifies iterative increase.
    Parameters
    ----------
    X : FloatArray
        Full MDCT vector of the current (sub)frame, shape (N,).
    lo, hi : int
        Band index bounds (inclusive), defining the band slice.
    T_b : float
        Threshold T(b) for this band.
    alpha_hat : int
        Initial frame-wise estimate (Equation 14).
    alpha_prev : int
        Previously selected alpha for band b-1 (neighbor constraint reference).
    alpha_min, alpha_max : int
        Safeguard bounds for alpha.
    Returns
    -------
    int
        Selected integer alpha(b).
    """
    if T_b <= 0.0:
        return int(alpha_hat)
    Xsec = X[lo : hi + 1]
    # Neighbor constraint (sequential processing): alpha(b) <= alpha_prev + 60
    alpha_limit = min(int(alpha_max), int(alpha_prev) + MAX_SF_DELTA)
    # Start from alpha_hat, clamped to feasible range
    alpha = int(alpha_hat)
    alpha = max(int(alpha_min), min(alpha, int(alpha_limit)))
    # Evaluate at current alpha
    Ssec = _quantize_symbol(Xsec, alpha)
    Xhat = _dequantize_symbol(Ssec, alpha)
    Pe = float(np.sum((Xsec - Xhat) ** 2))
    # If already above threshold, return current alpha (no decrease step specified)
    if Pe > T_b:
        return alpha
    # Increase alpha while still under threshold and within constraints
    while True:
        alpha_next = alpha + 1
        if alpha_next > alpha_limit:
            break
        Ssec = _quantize_symbol(Xsec, alpha_next)
        Xhat = _dequantize_symbol(Ssec, alpha_next)
        Pe_next = float(np.sum((Xsec - Xhat) ** 2))
        if Pe_next > T_b:
            break
        alpha = alpha_next
    return alpha
 # -----------------------------------------------------------------------------
 # Public API
 # -----------------------------------------------------------------------------
 def aac_quantizer(
    frame_F: FrameChannelF,
    frame_type: FrameType,
    SMR: FloatArray,
 ) -> tuple[QuantizedSymbols, ScaleFactors, GlobalGain]:
    """
    AAC quantizer for one channel (Level 3).
    Quantizes MDCT coefficients (after TNS) using band-wise scalefactors derived
    from psychoacoustic thresholds computed via SMR.
    The implementation follows the assignment procedure:
    - Compute an initial frame-wise alpha_hat using Equation (14), based on the
      maximum MDCT coefficient magnitude of the (sub)frame.
    - For each band b, increase alpha(b) by 1 while the quantization error power
      P_e(b) stays below the threshold T(b).
    - Enforce the neighbor constraint |alpha(b) - alpha(b-1)| <= 60 during the
      band-by-band search (no post-processing needed).
    Parameters
    ----------
    frame_F : FrameChannelF
        MDCT coefficients after TNS, one channel.
        Shapes:
        - Long frames: (1024,) or (1024, 1)
        - ESH: (128, 8)
    frame_type : FrameType
        AAC frame type ("OLS", "LSS", "ESH", "LPS").
    SMR : FloatArray
        Signal-to-Mask Ratio per band.
        Shapes:
        - Long: (NB,) or (NB, 1)
        - ESH: (NB, 8)
    Returns
    -------
    S : QuantizedSymbols
        Quantized symbols S(k), packed as shape (1024, 1) for all frame types.
        For ESH, the 8 subframes are packed in column-major subframe layout.
    sfc : ScaleFactors
        DPCM-coded scalefactors:
            sfc(0) = alpha(0) = G
            sfc(b) = alpha(b) - alpha(b-1), for b > 0
        Shapes:
        - Long: (NB, 1)
        - ESH: (NB, 8)
    G : GlobalGain
        Global gain G = alpha(0).
        - Long: scalar float
        - ESH: array shape (1, 8), dtype float64
    """
    bands = _band_slices(frame_type)
    NB = len(bands)
    X = np.asarray(frame_F, dtype=np.float64)
    SMR = np.asarray(SMR, dtype=np.float64)
    # -------------------------------------------------------------------------
    # ESH: 8 short subframes, each of length 128
    # -------------------------------------------------------------------------
    if frame_type == "ESH":
        if X.shape != (128, 8):
            raise ValueError("For ESH, frame_F must have shape (128, 8).")
        if SMR.shape != (NB, 8):
            raise ValueError(f"For ESH, SMR must have shape ({NB}, 8).")
        S_out: QuantizedSymbols = np.zeros((1024, 1), dtype=np.int64)
        sfc: ScaleFactors = np.zeros((NB, 8), dtype=np.int64)
        G_arr = np.zeros((1, 8), dtype=np.float64)
        # Packed output view: (128, 8) with column-major layout
        S_pack = S_out[:, 0].reshape(128, 8, order="F")
        for j in range(8):
            Xj = X[:, j].reshape(128)
            SMRj = SMR[:, j].reshape(NB)
            # Compute psychoacoustic threshold T(b) for this subframe
            T = _psychoacoustic_threshold(Xj, SMRj, bands)
            # Frame-wise initial estimate alpha_hat (Equation 14)
            alpha_hat = _initial_alpha_hat(Xj)
            # Band-wise scalefactors alpha(b)
            alpha = np.zeros((NB,), dtype=np.int64)
            alpha_prev = int(alpha_hat)
            for b, (lo, hi) in enumerate(bands):
                alpha_b = _best_alpha_for_band(
                    X=Xj,
                    lo=lo,
                    hi=hi,
                    T_b=float(T[b]),
                    alpha_hat=int(alpha_hat),
                    alpha_prev=int(alpha_prev),
                    alpha_min=-4096,
                    alpha_max=4096,
                )
                alpha[b] = int(alpha_b)
                alpha_prev = int(alpha_b)
            # DPCM-coded scalefactors
            G_arr[0, j] = float(alpha[0])
            sfc[0, j] = int(alpha[0])
            for b in range(1, NB):
                sfc[b, j] = int(alpha[b] - alpha[b - 1])
            # Quantize MDCT coefficients band-by-band
            Sj = np.zeros((128,), dtype=np.int64)
            for b, (lo, hi) in enumerate(bands):
                Sj[lo : hi + 1] = _quantize_symbol(Xj[lo : hi + 1], float(alpha[b]))
            # Store subframe in packed output
            S_pack[:, j] = Sj
        return S_out, sfc, G_arr
    # -------------------------------------------------------------------------
    # Long frames: OLS / LSS / LPS, length 1024
    # -------------------------------------------------------------------------
    if X.shape == (1024,):
        Xv = X
    elif X.shape == (1024, 1):
        Xv = X[:, 0]
    else:
        raise ValueError("For non-ESH, frame_F must have shape (1024,) or (1024, 1).")
    if SMR.shape == (NB,):
        SMRv = SMR
    elif SMR.shape == (NB, 1):
        SMRv = SMR[:, 0]
    else:
        raise ValueError(f"For non-ESH, SMR must have shape ({NB},) or ({NB}, 1).")
    # Compute psychoacoustic threshold T(b) for the long frame
    T = _psychoacoustic_threshold(Xv, SMRv, bands)
    # Frame-wise initial estimate alpha_hat (Equation 14)
    alpha_hat = _initial_alpha_hat(Xv)
    # Band-wise scalefactors alpha(b)
    alpha = np.zeros((NB,), dtype=np.int64)
    alpha_prev = int(alpha_hat)
    for b, (lo, hi) in enumerate(bands):
        alpha_b = _best_alpha_for_band(
            X=Xv,
            lo=lo,
            hi=hi,
            T_b=float(T[b]),
            alpha_hat=int(alpha_hat),
            alpha_prev=int(alpha_prev),
            alpha_min=-4096,
            alpha_max=4096,
        )
        alpha[b] = int(alpha_b)
        alpha_prev = int(alpha_b)
    # DPCM-coded scalefactors
    sfc_out: ScaleFactors = np.zeros((NB, 1), dtype=np.int64)
    sfc_out[0, 0] = int(alpha[0])
    for b in range(1, NB):
        sfc_out[b, 0] = int(alpha[b] - alpha[b - 1])
    G: float = float(alpha[0])
    # Quantize MDCT coefficients band-by-band
    S_vec = np.zeros((1024,), dtype=np.int64)
    for b, (lo, hi) in enumerate(bands):
        S_vec[lo : hi + 1] = _quantize_symbol(Xv[lo : hi + 1], float(alpha[b]))
    return S_vec.reshape(1024, 1), sfc_out, G
 def aac_i_quantizer(
    S: QuantizedSymbols,
    sfc: ScaleFactors,
    G: GlobalGain,
    frame_type: FrameType,
 ) -> FrameChannelF:
    """
    Inverse quantizer (iQuantizer) for one channel.
    Reconstructs MDCT coefficients from quantized symbols and DPCM scalefactors.
    Parameters
    ----------
    S : QuantizedSymbols
        Quantized symbols, shape (1024, 1) (or any array with 1024 elements).
    sfc : ScaleFactors
        DPCM-coded scalefactors.
        Shapes:
        - Long: (NB, 1)
        - ESH: (NB, 8)
    G : GlobalGain
        Global gain (not strictly required if sfc includes sfc(0)=alpha(0)).
        Present for API compatibility with the assignment.
    frame_type : FrameType
        AAC frame type.
    Returns
    -------
    FrameChannelF
        Reconstructed MDCT coefficients:
        - ESH: (128, 8)
        - Long: (1024, 1)
    """
    bands = _band_slices(frame_type)
    NB = len(bands)
    S_flat = np.asarray(S, dtype=np.int64).reshape(-1)
    if S_flat.shape[0] != 1024:
        raise ValueError("S must contain 1024 symbols.")
    if frame_type == "ESH":
        sfc = np.asarray(sfc, dtype=np.int64)
        if sfc.shape != (NB, 8):
            raise ValueError(f"For ESH, sfc must have shape ({NB}, 8).")
        S_128x8 = _esh_unpack(S_flat)
        Xrec = np.zeros((128, 8), dtype=np.float64)
        for j in range(8):
            alpha = np.zeros((NB,), dtype=np.int64)
            alpha[0] = int(sfc[0, j])
            for b in range(1, NB):
                alpha[b] = int(alpha[b - 1] + sfc[b, j])
            Xj = np.zeros((128,), dtype=np.float64)
            for b, (lo, hi) in enumerate(bands):
                Xj[lo : hi + 1] = _dequantize_symbol(S_128x8[lo : hi + 1, j].astype(np.int64), float(alpha[b]))
            Xrec[:, j] = Xj
        return Xrec
    sfc = np.asarray(sfc, dtype=np.int64)
    if sfc.shape != (NB, 1):
        raise ValueError(f"For non-ESH, sfc must have shape ({NB}, 1).")
    alpha = np.zeros((NB,), dtype=np.int64)
    alpha[0] = int(sfc[0, 0])
    for b in range(1, NB):
        alpha[b] = int(alpha[b - 1] + sfc[b, 0])
    Xrec = np.zeros((1024,), dtype=np.float64)
    for b, (lo, hi) in enumerate(bands):
        Xrec[lo : hi + 1] = _dequantize_symbol(S_flat[lo : hi + 1], float(alpha[b]))
    return Xrec.reshape(1024, 1)
--- a/source/level_1/core/aac_tns.py
+++ b/source/level_1/core/aac_tns.py
@ -0,0 +1,514 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Temporal Noise Shaping (TNS)
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Temporal Noise Shaping (TNS) module (Level 2).
 #
 #   Public API:
 #     frame_F_out, tns_coeffs = aac_tns(frame_F_in, frame_type)
 #     frame_F_out = aac_i_tns(frame_F_in, frame_type, tns_coeffs)
 #
 #   Notes (per assignment):
 #   - TNS is applied per channel (not stereo).
 #   - For ESH, TNS is applied independently to each of the 8 short subframes.
 #   - Bark band tables are taken from TableB.2.1.9a (long) and TableB.2.1.9b (short)
 #     provided in TableB219.mat.
 #   - Predictor order is fixed to p = 4.
 #   - Coefficients are quantized with a 4-bit uniform symmetric quantizer, step = 0.1.
 #   - Forward TNS applies FIR: H_TNS(z) = 1 - a1 z^-1 - ... - ap z^-p
 #   - Inverse TNS applies the inverse IIR filter using the same quantized coefficients.
 # ------------------------------------------------------------
 from __future__ import annotations
 from pathlib import Path
 from typing import Tuple
 from core.aac_utils import load_b219_tables
 from core.aac_configuration import PRED_ORDER, QUANT_STEP, QUANT_MAX
 from core.aac_types import *
 # -----------------------------------------------------------------------------
 # Private helpers
 # -----------------------------------------------------------------------------
 def _band_ranges(k_count: int) -> BandRanges:
    """
    Return Bark band index ranges [start, end] (inclusive) for the given MDCT line count.
    Parameters
    ----------
    k_count : int
        Number of MDCT lines:
        - 1024 for long frames
        - 128 for short subframes (ESH)
    Returns
    -------
    BandRanges (list[tuple[int, int]])
        Each tuple is (start_k, end_k) inclusive.
    """
    tables = load_b219_tables()
    if k_count == 1024:
        tbl = tables["B219a"]
    elif k_count == 128:
        tbl = tables["B219b"]
    else:
        raise ValueError("TNS supports only k_count=1024 (long) or k_count=128 (short).")
    start = tbl[:, 1].astype(int)
    end = tbl[:, 2].astype(int)
    ranges: BandRanges = [(int(s), int(e)) for s, e in zip(start, end)]
    for s, e in ranges:
        if s < 0 or e < s or e >= k_count:
            raise ValueError("Invalid band table ranges for given k_count.")
    return ranges
 # -----------------------------------------------------------------------------
 # Core DSP helpers
 # -----------------------------------------------------------------------------
 def _smooth_sw_inplace(sw: MdctCoeffs) -> None:
    """
    Smooth Sw(k) to reduce discontinuities between adjacent Bark bands.
    The assignment applies two passes:
    - Backward: Sw(k) = (Sw(k) + Sw(k+1))/2
    - Forward:  Sw(k) = (Sw(k) + Sw(k-1))/2
    Parameters
    ----------
    sw : MdctCoeffs
        1-D array of length K (float64). Modified in-place.
    """
    k_count = int(sw.shape[0])
    for k in range(k_count - 2, -1, -1):
        sw[k] = 0.5 * (sw[k] + sw[k + 1])
    for k in range(1, k_count):
        sw[k] = 0.5 * (sw[k] + sw[k - 1])
 def _compute_sw(x: MdctCoeffs) -> MdctCoeffs:
    """
    Compute Sw(k) from band energies P(j) and apply boundary smoothing.
    Parameters
    ----------
    x : MdctCoeffs
        1-D MDCT line array, length K.
    Returns
    -------
    MdctCoeffs
        Sw(k), 1-D array of length K, float64.
    """
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    k_count = int(x.shape[0])
    bands = _band_ranges(k_count)
    sw = np.zeros(k_count, dtype=np.float64)
    for s, e in bands:
        seg = x[s : e + 1]
        p_j = float(np.sum(seg * seg))
        sw_val = float(np.sqrt(p_j))
        sw[s : e + 1] = sw_val
    _smooth_sw_inplace(sw)
    return sw
 def _autocorr(x: MdctCoeffs, p: int) -> MdctCoeffs:
    """
    Autocorrelation r(m) for m=0..p.
    Parameters
    ----------
    x : MdctCoeffs
        1-D signal.
    p : int
        Maximum lag.
    Returns
    -------
    MdctCoeffs
        r, shape (p+1,), float64.
    """
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    n = int(x.shape[0])
    r = np.zeros(p + 1, dtype=np.float64)
    for m in range(p + 1):
        r[m] = float(np.dot(x[m:], x[: n - m]))
    return r
 def _lpc_coeffs(xw: MdctCoeffs, p: int) -> MdctCoeffs:
    """
    Solve Yule-Walker normal equations for LPC coefficients of order p.
    Parameters
    ----------
    xw : MdctCoeffs
        1-D normalized sequence Xw(k).
    p : int
        Predictor order.
    Returns
    -------
    MdctCoeffs
        LPC coefficients a[0..p-1], shape (p,), float64.
    """
    r = _autocorr(xw, p)
    R = np.empty((p, p), dtype=np.float64)
    for i in range(p):
        for j in range(p):
            R[i, j] = r[abs(i - j)]
    rhs = r[1 : p + 1].reshape(p)
    reg = 1e-12
    R_reg = R + reg * np.eye(p, dtype=np.float64)
    a = np.linalg.solve(R_reg, rhs)
    return a
 def _quantize_coeffs(a: MdctCoeffs) -> MdctCoeffs:
    """
    Quantize LPC coefficients with uniform symmetric quantizer and clamp.
    Parameters
    ----------
    a : MdctCoeffs
        LPC coefficient array, shape (p,).
    Returns
    -------
    MdctCoeffs
        Quantized coefficients, shape (p,), float64.
    """
    a = np.asarray(a, dtype=np.float64).reshape(-1)
    q = np.round(a / QUANT_STEP) * QUANT_STEP
    q = np.clip(q, -QUANT_MAX, QUANT_MAX)
    return q.astype(np.float64, copy=False)
 def _is_inverse_stable(a_q: MdctCoeffs) -> bool:
    """
    Check stability of the inverse TNS filter H_TNS^{-1}.
    Forward filter:
        H_TNS(z) = 1 - a1 z^-1 - ... - ap z^-p
    Inverse filter poles are roots of:
        A(z) = 1 - a1 z^-1 - ... - ap z^-p
    Multiply by z^p:
        z^p - a1 z^{p-1} - ... - ap = 0
    Stability condition:
        all roots satisfy |z| < 1.
    Parameters
    ----------
    a_q : MdctCoeffs
        Quantized predictor coefficients, shape (p,).
    Returns
    -------
    bool
        True if stable, else False.
    """
    a_q = np.asarray(a_q, dtype=np.float64).reshape(-1)
    p = int(a_q.shape[0])
    # Polynomial in z: z^p - a1 z^{p-1} - ... - ap
    poly = np.empty(p + 1, dtype=np.float64)
    poly[0] = 1.0
    poly[1:] = -a_q
    roots = np.roots(poly)
    # Strictly inside unit circle for stability. Add tiny margin for numeric safety.
    margin = 1e-12
    return bool(np.all(np.abs(roots) < (1.0 - margin)))
 def _stabilize_quantized_coeffs(a_q: MdctCoeffs) -> MdctCoeffs:
    """
    Make quantized predictor coefficients stable for inverse filtering.
    Policy:
    - If already stable: return as-is.
    - Else: iteratively shrink coefficients by gamma and re-quantize to the 0.1 grid.
    - If still unstable after attempts: fall back to all-zero coefficients (disable TNS).
    Parameters
    ----------
    a_q : MdctCoeffs
        Quantized predictor coefficients, shape (p,).
    Returns
    -------
    MdctCoeffs
        Stable quantized coefficients, shape (p,).
    """
    a_q = np.asarray(a_q, dtype=np.float64).reshape(-1)
    if _is_inverse_stable(a_q):
        return a_q
    # Try a few shrinking factors. Re-quantize after shrinking to keep coefficients on-grid.
    gammas = (0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1)
    for g in gammas:
        cand = _quantize_coeffs(g * a_q)
        if _is_inverse_stable(cand):
            return cand
    # Last resort: disable TNS for this vector
    return np.zeros_like(a_q, dtype=np.float64)
 def _apply_tns_fir(x: MdctCoeffs, a_q: MdctCoeffs) -> MdctCoeffs:
    """
    Apply forward TNS FIR filter:
        y[k] = x[k] - sum_{l=1..p} a_l * x[k-l]
    Parameters
    ----------
    x : MdctCoeffs
        1-D MDCT lines, length K.
    a_q : MdctCoeffs
        Quantized LPC coefficients, shape (p,).
    Returns
    -------
    MdctCoeffs
        Filtered MDCT lines y, length K.
    """
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    a_q = np.asarray(a_q, dtype=np.float64).reshape(-1)
    p = int(a_q.shape[0])
    k_count = int(x.shape[0])
    y = np.zeros(k_count, dtype=np.float64)
    for k in range(k_count):
        acc = x[k]
        for l in range(1, p + 1):
            if k - l >= 0:
                acc -= a_q[l - 1] * x[k - l]
        y[k] = acc
    return y
 def _apply_itns_iir(y: MdctCoeffs, a_q: MdctCoeffs) -> MdctCoeffs:
    """
    Apply inverse TNS IIR filter:
        x_hat[k] = y[k] + sum_{l=1..p} a_l * x_hat[k-l]
    Parameters
    ----------
    y : MdctCoeffs
        1-D MDCT lines after TNS, length K.
    a_q : MdctCoeffs
        Quantized LPC coefficients, shape (p,).
    Returns
    -------
    MdctCoeffs
        Reconstructed MDCT lines x_hat, length K.
    """
    y = np.asarray(y, dtype=np.float64).reshape(-1)
    a_q = np.asarray(a_q, dtype=np.float64).reshape(-1)
    p = int(a_q.shape[0])
    k_count = int(y.shape[0])
    x_hat = np.zeros(k_count, dtype=np.float64)
    for k in range(k_count):
        acc = y[k]
        for l in range(1, p + 1):
            if k - l >= 0:
                acc += a_q[l - 1] * x_hat[k - l]
        x_hat[k] = acc
    return x_hat
 def _tns_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
    """
    TNS for a single MDCT vector (one long frame or one short subframe).
    Steps:
    1) Compute Sw(k) from Bark band energies and smooth it.
    2) Normalize: Xw(k) = X(k) / Sw(k) (safe when Sw=0).
    3) Compute LPC coefficients (order p=PRED_ORDER) on Xw.
    4) Quantize coefficients (4-bit symmetric, step QUANT_STEP).
    5) Apply FIR filter on original X(k) using quantized coefficients.
    Parameters
    ----------
    x : MdctCoeffs
        1-D MDCT vector.
    Returns
    -------
    y : MdctCoeffs
        TNS-processed MDCT vector (same length).
    a_q : MdctCoeffs
        Quantized LPC coefficients, shape (PRED_ORDER,).
    """
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    sw = _compute_sw(x)
    eps = 1e-12
    xw = np.zeros_like(x, dtype=np.float64)
    mask = sw > eps
    np.divide(x, sw, out=xw, where=mask)
    a = _lpc_coeffs(xw, PRED_ORDER)
    a_q = _quantize_coeffs(a)
    # Ensure inverse stability (assignment requirement)
    a_q = _stabilize_quantized_coeffs(a_q)
    y = _apply_tns_fir(x, a_q)
    return y, a_q
 # -----------------------------------------------------------------------------
 # Public Functions
 # -----------------------------------------------------------------------------
 def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChannelF, TnsCoeffs]:
    """
    Temporal Noise Shaping (TNS) for ONE channel.
    Parameters
    ----------
    frame_F_in : FrameChannelF
        Per-channel MDCT coefficients.
        Expected (typical) shapes:
        - If frame_type == "ESH": (128, 8)
        - Else: (1024, 1) or (1024,)
    frame_type : FrameType
        Frame type code ("OLS", "LSS", "ESH", "LPS").
    Returns
    -------
    frame_F_out : FrameChannelF
        Per-channel MDCT coefficients after applying TNS.
        Same shape convention as input.
    tns_coeffs : TnsCoeffs
        Quantized TNS predictor coefficients.
        Expected shapes:
        - If frame_type == "ESH": (PRED_ORDER, 8)
        - Else: (PRED_ORDER, 1)
    """
    x = np.asarray(frame_F_in, dtype=np.float64)
    if frame_type == "ESH":
        if x.shape != (128, 8):
            raise ValueError("For ESH, frame_F_in must have shape (128, 8).")
        y = np.empty_like(x, dtype=np.float64)
        a_out = np.empty((PRED_ORDER, 8), dtype=np.float64)
        for j in range(8):
            y[:, j], a_out[:, j] = _tns_vector(x[:, j])
        return y, a_out
    if x.shape == (1024,):
        x_vec = x
        out_shape = (1024,)
    elif x.shape == (1024, 1):
        x_vec = x[:, 0]
        out_shape = (1024, 1)
    else:
        raise ValueError('For non-ESH, frame_F_in must have shape (1024,) or (1024, 1).')
    y_vec, a_q = _tns_vector(x_vec)
    if out_shape == (1024,):
        y_out = y_vec
    else:
        y_out = y_vec.reshape(1024, 1)
    a_out = a_q.reshape(PRED_ORDER, 1)
    return y_out, a_out
 def aac_i_tns(frame_F_in: FrameChannelF, frame_type: FrameType, tns_coeffs: TnsCoeffs) -> FrameChannelF:
    """
    Inverse Temporal Noise Shaping (iTNS) for ONE channel.
    Parameters
    ----------
    frame_F_in : FrameChannelF
        Per-channel MDCT coefficients after TNS.
        Expected (typical) shapes:
        - If frame_type == "ESH": (128, 8)
        - Else: (1024, 1) or (1024,)
    frame_type : FrameType
        Frame type code ("OLS", "LSS", "ESH", "LPS").
    tns_coeffs : TnsCoeffs
        Quantized TNS predictor coefficients.
        Expected shapes:
        - If frame_type == "ESH": (PRED_ORDER, 8)
        - Else: (PRED_ORDER, 1)
    Returns
    -------
    FrameChannelF
        Per-channel MDCT coefficients after inverse TNS.
        Same shape convention as input frame_F_in.
    """
    x = np.asarray(frame_F_in, dtype=np.float64)
    a = np.asarray(tns_coeffs, dtype=np.float64)
    if frame_type == "ESH":
        if x.shape != (128, 8):
            raise ValueError("For ESH, frame_F_in must have shape (128, 8).")
        if a.shape != (PRED_ORDER, 8):
            raise ValueError("For ESH, tns_coeffs must have shape (PRED_ORDER, 8).")
        y = np.empty_like(x, dtype=np.float64)
        for j in range(8):
            y[:, j] = _apply_itns_iir(x[:, j], a[:, j])
        return y
    if a.shape != (PRED_ORDER, 1):
        raise ValueError("For non-ESH, tns_coeffs must have shape (PRED_ORDER, 1).")
    if x.shape == (1024,):
        x_vec = x
        out_shape = (1024,)
    elif x.shape == (1024, 1):
        x_vec = x[:, 0]
        out_shape = (1024, 1)
    else:
        raise ValueError('For non-ESH, frame_F_in must have shape (1024,) or (1024, 1).')
    y_vec = _apply_itns_iir(x_vec, a[:, 0])
    if out_shape == (1024,):
        return y_vec
    return y_vec.reshape(1024, 1)
--- a/source/level_1/core/aac_utils.py
+++ b/source/level_1/core/aac_utils.py
@ -163,6 +163,42 @@ def snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    return float(10.0 * np.log10(ps / pn))
 def estimate_lag_mono(x_ref: TimeSignal, x_hat: TimeSignal, max_lag=4096):
    """
    Estimate time lag between two mono signals.
    Returns lag (positive means x_hat delayed).
    """
    n = min(len(x_ref), len(x_hat))
    x_ref = x_ref[:n]
    x_hat = x_hat[:n]
    corr = np.correlate(x_ref, x_hat, mode='full')
    lags = np.arange(-n + 1, n)
    center = n - 1
    lo = max(0, center - max_lag)
    hi = min(len(corr), center + max_lag + 1)
    best = lo + int(np.argmax(corr[lo:hi]))
    return int(lags[best])
 def match_gain(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    """
    Least-squares gain g that best maps x_hat -> x_ref.
    """
    n = min(x_ref.shape[0], x_hat.shape[0])
    c = min(x_ref.shape[1], x_hat.shape[1])
    r = x_ref[:n, :c].reshape(-1).astype(np.float64)
    h = x_hat[:n, :c].reshape(-1).astype(np.float64)
    denom = float(np.dot(h, h))
    if denom <= 0.0:
        return 1.0
    return float(np.dot(r, h) / denom)
 # -----------------------------------------------------------------------------
 # Psychoacoustic band tables (TableB219.mat)
 # -----------------------------------------------------------------------------
--- a/source/level_1/material/huff_utils.py
+++ b/source/level_1/material/huff_utils.py
@ -0,0 +1,403 @@
 import numpy as np
 import scipy.io as sio
 import os
 # ------------------ LOAD LUT ------------------
 def load_LUT(mat_filename=None):
    """
    Loads the list of Huffman Codebooks (LUTs)
    Returns:
        huffLUT : list (index 1..11 used, index 0 unused)
    """
    if mat_filename is None:
        current_dir = os.path.dirname(os.path.abspath(__file__))
        mat_filename = os.path.join(current_dir, "huffCodebooks.mat")
    mat = sio.loadmat(mat_filename)
    huffCodebooks_raw = mat['huffCodebooks'].squeeze()
    huffCodebooks = []
    for i in range(11):
        huffCodebooks.append(np.array(huffCodebooks_raw[i]))
    # Build inverse VLC tables
    invTable = [None] * 11
    for i in range(11):
        h = huffCodebooks[i][:, 2].astype(int)       # column 3
        hlength = huffCodebooks[i][:, 1].astype(int) # column 2
        hbin = []
        for j in range(len(h)):
            hbin.append(format(h[j], f'0{hlength[j]}b'))
        invTable[i] = vlc_table(hbin)
    # Build Huffman LUT dicts
    huffLUT = [None] * 12  # index 0 unused
    params = [
        (4, 1, True),
        (4, 1, True),
        (4, 2, False),
        (4, 2, False),
        (2, 4, True),
        (2, 4, True),
        (2, 7, False),
        (2, 7, False),
        (2, 12, False),
        (2, 12, False),
        (2, 16, False),
    ]
    for i, (nTupleSize, maxAbs, signed) in enumerate(params, start=1):
        huffLUT[i] = {
            'LUT': huffCodebooks[i-1],
            'invTable': invTable[i-1],
            'codebook': i,
            'nTupleSize': nTupleSize,
            'maxAbsCodeVal': maxAbs,
            'signedValues': signed
        }
    return huffLUT
 def vlc_table(code_array):
    """
    codeArray: list of strings, each string is a Huffman codeword (e.g. '0101')
    returns:
        h : NumPy array of shape (num_nodes, 3)
            columns:
              [ next_if_0 , next_if_1 , symbol_index ]
    """
    h = np.zeros((1, 3), dtype=int)
    for code_index, code in enumerate(code_array, start=1):
        word = [int(bit) for bit in code]
        h_index = 0
        for bit in word:
            k = bit
            next_node = h[h_index, k]
            if next_node == 0:
                h = np.vstack([h, [0, 0, 0]])
                new_index = h.shape[0] - 1
                h[h_index, k] = new_index
                h_index = new_index
            else:
                h_index = next_node
        h[h_index, 2] = code_index
    return h
 # ------------------ ENCODE ------------------
 def encode_huff(coeff_sec, huff_LUT_list, force_codebook = None):
    """
    Huffman-encode a sequence of quantized coefficients.
    This function selects the appropriate Huffman codebook based on the
    maximum absolute value of the input coefficients, encodes the coefficients
    into a binary Huffman bitstream, and returns both the bitstream and the
    selected codebook index.
    This is the Python equivalent of the MATLAB `encodeHuff.m` function used
    in audio/image coding (e.g., scale factor band encoding). The input
    coefficient sequence is grouped into fixed-size tuples as defined by
    the chosen Huffman LUT. Zero-padding may be applied internally.
    Parameters
    ----------
    coeff_sec : array_like of int
        1-D array of quantized integer coefficients to encode.
        Typically corresponds to a "section" or scale-factor band.
    huff_LUT_list : list
        List of Huffman lookup-table dictionaries as returned by `loadLUT()`.
        Index 1..11 correspond to valid Huffman codebooks.
        Index 0 is unused.
    Returns
    -------
    huffSec : str
        Huffman-encoded bitstream represented as a string of '0' and '1'
        characters.
    huffCodebook : int
        Index (1..11) of the Huffman codebook used for encoding.
        A value of 0 indicates a special all-zero section.
    """
    if force_codebook is not None:
            return huff_LUT_code_1(huff_LUT_list[force_codebook], coeff_sec)
    maxAbsVal = np.max(np.abs(coeff_sec))
    if maxAbsVal == 0:
        huffCodebook = 0
        huffSec = huff_LUT_code_0()
    elif maxAbsVal == 1:
        candidates = [1, 2]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal == 2:
        candidates = [3, 4]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (3, 4):
        candidates = [5, 6]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (5, 6, 7):
        candidates = [7, 8]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (8, 9, 10, 11, 12):
        candidates = [9, 10]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (13, 14, 15):
        huffCodebook = 11
        huffSec = huff_LUT_code_1(huff_LUT_list[huffCodebook], coeff_sec)
    else:
        huffCodebook = 11
        huffSec = huff_LUT_code_ESC(huff_LUT_list[huffCodebook], coeff_sec)
    return huffSec, huffCodebook
 def huff_LUT_code_1(huff_LUT, coeff_sec):
    LUT = huff_LUT['LUT']
    nTupleSize = huff_LUT['nTupleSize']
    maxAbsCodeVal = huff_LUT['maxAbsCodeVal']
    signedValues = huff_LUT['signedValues']
    numTuples = int(np.ceil(len(coeff_sec) / nTupleSize))
    if signedValues:
        coeff = coeff_sec + maxAbsCodeVal
        base = 2 * maxAbsCodeVal + 1
    else:
        coeff = coeff_sec
        base = maxAbsCodeVal + 1
    coeffPad = np.zeros(numTuples * nTupleSize, dtype=int)
    coeffPad[:len(coeff)] = coeff
    huffSec = []
    powers = base ** np.arange(nTupleSize - 1, -1, -1)
    for i in range(numTuples):
        nTuple = coeffPad[i*nTupleSize:(i+1)*nTupleSize]
        huffIndex = int(np.abs(nTuple) @ powers)
        hexVal = LUT[huffIndex, 2]
        huffLen = LUT[huffIndex, 1]
        bits = format(int(hexVal), f'0{int(huffLen)}b')
        if signedValues:
            huffSec.append(bits)
        else:
            signBits = ''.join('1' if v < 0 else '0' for v in nTuple)
            huffSec.append(bits + signBits)
    return ''.join(huffSec)
 def huff_LUT_code_0():
    return ''
 def huff_LUT_code_ESC(huff_LUT, coeff_sec):
    LUT = huff_LUT['LUT']
    nTupleSize = huff_LUT['nTupleSize']
    maxAbsCodeVal = huff_LUT['maxAbsCodeVal']
    numTuples = int(np.ceil(len(coeff_sec) / nTupleSize))
    base = maxAbsCodeVal + 1
    coeffPad = np.zeros(numTuples * nTupleSize, dtype=int)
    coeffPad[:len(coeff_sec)] = coeff_sec
    huffSec = []
    powers = base ** np.arange(nTupleSize - 1, -1, -1)
    for i in range(numTuples):
        nTuple = coeffPad[i*nTupleSize:(i+1)*nTupleSize]
        lnTuple = nTuple.astype(float)
        lnTuple[lnTuple == 0] = np.finfo(float).eps
        N4 = np.maximum(0, np.floor(np.log2(np.abs(lnTuple))).astype(int))
        N = np.maximum(0, N4 - 4)
        esc = np.abs(nTuple) > 15
        nTupleESC = nTuple.copy()
        nTupleESC[esc] = np.sign(nTupleESC[esc]) * 16
        huffIndex = int(np.abs(nTupleESC) @ powers)
        hexVal = LUT[huffIndex, 2]
        huffLen = LUT[huffIndex, 1]
        bits = format(int(hexVal), f'0{int(huffLen)}b')
        escSeq = ''
        for k in range(nTupleSize):
            if esc[k]:
                escSeq += '1' * N[k]
                escSeq += '0'
                escSeq += format(abs(nTuple[k]) - (1 << N4[k]), f'0{N4[k]}b')
        signBits = ''.join('1' if v < 0 else '0' for v in nTuple)
        huffSec.append(bits + signBits + escSeq)
    return ''.join(huffSec)
 # ------------------ DECODE ------------------
 def decode_huff(huff_sec, huff_LUT):
    """
    Decode a Huffman-encoded stream.
    Parameters
    ----------
    huff_sec : array-like of int or str
        Huffman encoded stream as a sequence of 0 and 1 (string or list/array).
    huff_LUT : dict
        Huffman lookup table with keys:
            - 'invTable': inverse table (numpy array)
            - 'codebook': codebook number
            - 'nTupleSize': tuple size
            - 'maxAbsCodeVal': maximum absolute code value
            - 'signedValues': True/False
    Returns
    -------
    decCoeffs : list of int
        Decoded quantized coefficients.
    """
    h = huff_LUT['invTable']
    huffCodebook = huff_LUT['codebook']
    nTupleSize = huff_LUT['nTupleSize']
    maxAbsCodeVal = huff_LUT['maxAbsCodeVal']
    signedValues = huff_LUT['signedValues']
    # Convert string to array of ints
    if isinstance(huff_sec, str):
        huff_sec = np.array([int(b) for b in huff_sec])
    eos = False
    decCoeffs = []
    streamIndex = 0
    while not eos:
        wordbit = 0
        r = 0  # start at root
        # Decode Huffman word using inverse table
        while True:
            b = huff_sec[streamIndex + wordbit]
            wordbit += 1
            rOld = r
            r = h[rOld, b]
            if h[r, 0] == 0 and h[r, 1] == 0:
                symbolIndex = h[r, 2] - 1  # zero-based
                streamIndex += wordbit
                break
        # Decode n-tuple magnitudes
        if signedValues:
            base = 2 * maxAbsCodeVal + 1
            nTupleDec = []
            tmp = symbolIndex
            for p in reversed(range(nTupleSize)):
                val = tmp // (base ** p)
                nTupleDec.append(val - maxAbsCodeVal)
                tmp = tmp % (base ** p)
            nTupleDec = np.array(nTupleDec)
        else:
            base = maxAbsCodeVal + 1
            nTupleDec = []
            tmp = symbolIndex
            for p in reversed(range(nTupleSize)):
                val = tmp // (base ** p)
                nTupleDec.append(val)
                tmp = tmp % (base ** p)
            nTupleDec = np.array(nTupleDec)
            # Apply sign bits
            nTupleSignBits = huff_sec[streamIndex:streamIndex + nTupleSize]
            nTupleSign = -(np.sign(nTupleSignBits - 0.5))
            streamIndex += nTupleSize
            nTupleDec = nTupleDec * nTupleSign
        # Handle escape sequences
        escIndex = np.where(np.abs(nTupleDec) == 16)[0]
        if huffCodebook == 11 and escIndex.size > 0:
            for idx in escIndex:
                N = 0
                b = huff_sec[streamIndex]
                while b:
                    N += 1
                    b = huff_sec[streamIndex + N]
                # Skip the N leading '1' bits AND the terminating '0' delimiter.
                # The encoder writes: '1'*N + '0' + <N4 bits>
                streamIndex += N +1
                N4 = N + 4
                escape_word = huff_sec[streamIndex:streamIndex + N4]
                escape_value = 2 ** N4 + int("".join(map(str, escape_word)), 2)
                nTupleDec[idx] = escape_value
                # We already consumed the delimiter above; now consume only N4 bits.
                streamIndex += N4
            # Apply signs again
            nTupleDec[escIndex] *= nTupleSign[escIndex]
        decCoeffs.extend(nTupleDec.tolist())
        if streamIndex >= len(huff_sec):
            eos = True
    return decCoeffs
--- a/source/level_2/core/aac_coder.py
+++ b/source/level_2/core/aac_coder.py
@ -111,21 +111,6 @@ def _thresholds_from_smr(
    return T
 def _normalize_global_gain(G: GlobalGain) -> float | FloatArray:
    """
    Normalize GlobalGain to match AACChannelFrameF3["G"] type:
    - long: return float
    - ESH: return float64 ndarray of shape (1, 8)
    """
    if np.isscalar(G):
        return float(G)
    G_arr = np.asarray(G)
    if G_arr.size == 1:
        return float(G_arr.reshape(-1)[0])
    return np.asarray(G_arr, dtype=np.float64)
 # -----------------------------------------------------------------------------
 # Public helpers (useful for level_x demo wrappers)
 # -----------------------------------------------------------------------------
@ -476,10 +461,6 @@ def aac_coder_3(
        S_L, sfc_L, G_L = aac_quantizer(chl_f_tns, frame_type, SMR_L)
        S_R, sfc_R, G_R = aac_quantizer(chr_f_tns, frame_type, SMR_R)
        # Normalize G types for AACSeq3 schema (float | float64 ndarray).
        G_Ln = _normalize_global_gain(G_L)
        G_Rn = _normalize_global_gain(G_R)
        # Huffman-code ONLY the DPCM differences for b>0.
        # sfc[0] corresponds to alpha(0)=G and is stored separately in the frame.
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
@ -503,7 +484,7 @@ def aac_coder_3(
            "chl": {
                "tns_coeffs": np.asarray(chl_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_L, dtype=np.float64),
-                "G": G_Ln,
+                "G": G_L,
                "sfc": sfc_L_stream,
                "stream": mdct_L_stream,
                "codebook": int(cb_L),
@ -511,7 +492,7 @@ def aac_coder_3(
            "chr": {
                "tns_coeffs": np.asarray(chr_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_R, dtype=np.float64),
-                "G": G_Rn,
+                "G": G_R,
                "sfc": sfc_R_stream,
                "stream": mdct_R_stream,
                "codebook": int(cb_R),
--- a/source/level_2/core/aac_decoder.py
+++ b/source/level_2/core/aac_decoder.py
@ -42,7 +42,7 @@ def _nbands(frame_type: FrameType) -> int:
 # Public helpers
 # -----------------------------------------------------------------------------
-def aac_unpack_seq_channels_to_frame_f(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
+def aac_unpack_seq_channels(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
    """
    Re-pack per-channel spectra from the Level-1 AACSeq1 schema into the stereo
    FrameF container expected by aac_i_filter_bank().
@ -167,7 +167,7 @@ def aac_decoder_1(
        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
-        frame_f: FrameF = aac_unpack_seq_channels_to_frame_f(frame_type, chl_f, chr_f)
+        frame_f: FrameF = aac_unpack_seq_channels(frame_type, chl_f, chr_f)
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
        start = i * hop
@ -427,7 +427,7 @@ def aac_decoder_3(
        X_R = aac_i_tns(Xq_R, frame_type, tns_R)
        # Re-pack to stereo container and inverse filterbank
-        frame_f = aac_unpack_seq_channels_to_frame_f(frame_type, np.asarray(X_L), np.asarray(X_R))
+        frame_f = aac_unpack_seq_channels(frame_type, np.asarray(X_L), np.asarray(X_R))
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)
        start = i * hop
--- a/source/level_2/core/aac_huffman.py
+++ b/source/level_2/core/aac_huffman.py
@ -0,0 +1,112 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Huffman wrappers (Level 3)
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Thin wrappers around the provided Huffman utilities (material/huff_utils.py)
 #   so that the API matches the assignment text.
 #
 # Exposed API (assignment):
 #   huff_sec, huff_codebook = aac_encode_huff(coeff_sec, huff_LUT_list, force_codebook)
 #   dec_coeffs = aac_decode_huff(huff_sec, huff_codebook, huff_LUT_list)
 #
 # Notes:
 #   - Huffman coding operates on tuples. Therefore, decode(encode(x)) may return
 #     extra trailing symbols due to tuple padding. The AAC decoder knows the
 #     true section length from side information (band limits) and truncates.
 # ------------------------------------------------------------
 from __future__ import annotations
 from typing import Any
 import numpy as np
 from material.huff_utils import encode_huff, decode_huff
 def aac_encode_huff(
    coeff_sec: np.ndarray,
    huff_LUT_list: list[dict[str, Any]],
    force_codebook: int | None = None,
 ) -> tuple[str, int]:
    """
    Huffman-encode a section of coefficients (MDCT symbols or scalefactors).
    Parameters
    ----------
    coeff_sec : np.ndarray
        Coefficient section to be encoded. Any shape is accepted; the input
        is flattened and treated as a 1-D sequence of int64 symbols.
    huff_LUT_list : list[dict[str, Any]]
        List of Huffman Look-Up Tables (LUTs) as returned by material.load_LUT().
        Index corresponds to codebook id (typically 1..11, with 0 reserved).
    force_codebook : int | None
        If provided, forces the use of this Huffman codebook. In the assignment,
        scalefactors are encoded with codebook 11. For MDCT coefficients, this
        argument is usually omitted (auto-selection).
    Returns
    -------
    tuple[str, int]
        (huff_sec, huff_codebook)
        - huff_sec: bitstream as a string of '0'/'1'
        - huff_codebook: codebook id used by the encoder
    """
    coeff_sec_arr = np.asarray(coeff_sec, dtype=np.int64).reshape(-1)
    if force_codebook is None:
        # Provided utility returns (bitstream, codebook) in the auto-selection case.
        huff_sec, huff_codebook = encode_huff(coeff_sec_arr, huff_LUT_list)
        return str(huff_sec), int(huff_codebook)
    # Provided utility returns ONLY the bitstream when force_codebook is set.
    cb = int(force_codebook)
    huff_sec = encode_huff(coeff_sec_arr, huff_LUT_list, force_codebook=cb)
    return str(huff_sec), cb
 def aac_decode_huff(
    huff_sec: str | np.ndarray,
    huff_codebook: int,
    huff_LUT: list[dict[str, Any]],
 ) -> np.ndarray:
    """
    Huffman-decode a bitstream using the specified codebook.
    Parameters
    ----------
    huff_sec : str | np.ndarray
        Huffman bitstream. Typically a string of '0'/'1'. If an array is provided,
        it is passed through to the provided decoder.
    huff_codebook : int
        Codebook id that was returned by aac_encode_huff.
        Codebook 0 represents an all-zero section.
    huff_LUT : list[dict[str, Any]]
        Huffman LUT list as returned by material.load_LUT().
    Returns
    -------
    np.ndarray
        Decoded coefficients as a 1-D np.int64 array.
        Note: Due to tuple coding, the decoded array may contain extra trailing
        padding symbols. The caller must truncate to the known section length.
    """
    cb = int(huff_codebook)
    if cb == 0:
        # Codebook 0 represents an all-zero section. The decoded length is not
        # recoverable from the bitstream alone; the caller must expand/truncate.
        return np.zeros((0,), dtype=np.int64)
    if cb < 0 or cb >= len(huff_LUT):
        raise ValueError(f"Invalid Huffman codebook index: {cb}")
    lut = huff_LUT[cb]
    dec = decode_huff(huff_sec, lut)
    return np.asarray(dec, dtype=np.int64).reshape(-1)
--- a/source/level_2/core/aac_psycho.py
+++ b/source/level_2/core/aac_psycho.py
@ -0,0 +1,441 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Psychoacoustic Model
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Psychoacoustic model for ONE channel, based on the assignment notes (Section 2.4).
 #
 #   Public API:
 #       SMR = aac_psycho(frame_T, frame_type, frame_T_prev_1, frame_T_prev_2)
 #
 #   Output:
 #     - For long frames ("OLS", "LSS", "LPS"): SMR has shape (69,)
 #     - For short frames ("ESH"): SMR has shape (42, 8)  (one column per subframe)
 #
 #   Notes:
 #   - Uses Bark band tables from material/TableB219.mat:
 #       * B219a for long windows (69 bands, N=2048 FFT, N/2=1024 bins)
 #       * B219b for short windows (42 bands, N=256 FFT,  N/2=128 bins)
 #   - Applies a Hann window in time domain before FFT magnitude/phase extraction.
 #   - Implements:
 #       spreading function -> band spreading -> tonality index -> masking thresholds -> SMR.
 # ------------------------------------------------------------
 from __future__ import annotations
 import numpy as np
 from core.aac_utils import band_limits, get_table
 from core.aac_configuration import NMT_DB, TMN_DB
 from core.aac_types import *
 # -----------------------------------------------------------------------------
 # Spreading function
 # -----------------------------------------------------------------------------
 def _spreading_matrix(bval: BandValueArray) -> FloatArray:
    """
    Compute the spreading function matrix between psychoacoustic bands.
    The spreading function describes how energy in one critical band masks
    nearby bands. The formula follows the assignment pseudo-code.
    Parameters
    ----------
    bval : BandValueArray
        Bark value per band, shape (B,).
    Returns
    -------
    FloatArray
        Spreading matrix S of shape (B, B), where:
          S[bb, b] quantifies the contribution of band bb masking band b.
    """
    bval = np.asarray(bval, dtype=np.float64).reshape(-1)
    B = int(bval.shape[0])
    spread = np.zeros((B, B), dtype=np.float64)
    for b in range(B):
        for bb in range(B):
            # tmpx depends on direction (asymmetric spreading)
            if bb >= b:
                tmpx = 3.0 * (bval[bb] - bval[b])
            else:
                tmpx = 1.5 * (bval[bb] - bval[b])
            # tmpz uses the "min(..., 0)" nonlinearity exactly as in the notes
            tmpz = 8.0 * min((tmpx - 0.5) ** 2 - 2.0 * (tmpx - 0.5), 0.0)
            tmpy = 15.811389 + 7.5 * (tmpx + 0.474) - 17.5 * np.sqrt(1.0 + (tmpx + 0.474) ** 2)
            # Clamp very small values (below -100 dB) to 0 contribution
            if tmpy < -100.0:
                spread[bb, b] = 0.0
            else:
                spread[bb, b] = 10.0 ** ((tmpz + tmpy) / 10.0)
    return spread
 # -----------------------------------------------------------------------------
 # Windowing + FFT feature extraction
 # -----------------------------------------------------------------------------
 def _hann_window(N: int) -> FloatArray:
    """
    Hann window as specified in the notes:
        w[n] = 0.5 - 0.5*cos(2*pi*(n + 0.5)/N)
    Parameters
    ----------
    N : int
        Window length.
    Returns
    -------
    FloatArray
        1-D array of shape (N,), dtype float64.
    """
    n = np.arange(N, dtype=np.float64)
    return 0.5 - 0.5 * np.cos((2.0 * np.pi / N) * (n + 0.5))
 def _r_phi_from_time(x: FrameChannelT, N: int) -> tuple[FloatArray, FloatArray]:
    """
    Compute FFT magnitude r(w) and phase phi(w) for bins w = 0 .. N/2-1.
    Processing:
    1) Apply Hann window in time domain.
    2) Compute N-point FFT.
    3) Keep only the positive-frequency bins [0 .. N/2-1].
    Parameters
    ----------
    x : FrameChannelT
        Time-domain samples, shape (N,).
    N : int
        FFT size (2048 or 256).
    Returns
    -------
    r : FloatArray
        Magnitude spectrum for bins 0 .. N/2-1, shape (N/2,).
    phi : FloatArray
        Phase spectrum for bins 0 .. N/2-1, shape (N/2,).
    """
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    if x.shape[0] != N:
        raise ValueError(f"Expected time vector of length {N}, got {x.shape[0]}.")
    w = _hann_window(N)
    X = np.fft.fft(x * w, n=N)
    Xp = X[: N // 2]
    r = np.abs(Xp).astype(np.float64, copy=False)
    phi = np.angle(Xp).astype(np.float64, copy=False)
    return r, phi
 def _predictability(
    r: FloatArray,
    phi: FloatArray,
    r_m1: FloatArray,
    phi_m1: FloatArray,
    r_m2: FloatArray,
    phi_m2: FloatArray,
 ) -> FloatArray:
    """
    Compute predictability c(w) per spectral bin.
    The notes define:
      r_pred(w)   = 2*r_{-1}(w)   - r_{-2}(w)
      phi_pred(w) = 2*phi_{-1}(w) - phi_{-2}(w)
      c(w) = |X(w) - X_pred(w)| / (r(w) + |r_pred(w)|)
    where X(w) is represented in polar form using r(w), phi(w).
    Parameters
    ----------
    r, phi : FloatArray
        Current magnitude and phase, shape (N/2,).
    r_m1, phi_m1 : FloatArray
        Previous magnitude and phase, shape (N/2,).
    r_m2, phi_m2 : FloatArray
        Pre-previous magnitude and phase, shape (N/2,).
    Returns
    -------
    FloatArray
        Predictability c(w), shape (N/2,).
    """
    r_pred = 2.0 * r_m1 - r_m2
    phi_pred = 2.0 * phi_m1 - phi_m2
    num = np.sqrt(
        (r * np.cos(phi) - r_pred * np.cos(phi_pred)) ** 2
      + (r * np.sin(phi) - r_pred * np.sin(phi_pred)) ** 2
    )
    den = r + np.abs(r_pred) + 1e-12  # avoid division-by-zero without altering behavior
    return (num / den).astype(np.float64, copy=False)
 # -----------------------------------------------------------------------------
 # Band-domain aggregation
 # -----------------------------------------------------------------------------
 def _band_energy_and_pred(
    r: FloatArray,
    c: FloatArray,
    wlow: BandIndexArray,
    whigh: BandIndexArray,
 ) -> tuple[FloatArray, FloatArray]:
    """
    Aggregate spectral bin quantities into psychoacoustic bands.
    Definitions (notes):
      e(b) = sum_{w=wlow(b)..whigh(b)} r(w)^2
      c_num(b) = sum_{w=wlow(b)..whigh(b)} c(w) * r(w)^2
    The band predictability c(b) is later computed after spreading as:
      cb(b) = ct(b) / ecb(b)
    Parameters
    ----------
    r : FloatArray
        Magnitude spectrum, shape (N/2,).
    c : FloatArray
        Predictability per bin, shape (N/2,).
    wlow, whigh : BandIndexArray
        Band limits (inclusive indices), shape (B,).
    Returns
    -------
    e_b : FloatArray
        Band energies e(b), shape (B,).
    c_num_b : FloatArray
        Weighted predictability numerators c_num(b), shape (B,).
    """
    r2 = (r * r).astype(np.float64, copy=False)
    B = int(wlow.shape[0])
    e_b = np.zeros(B, dtype=np.float64)
    c_num_b = np.zeros(B, dtype=np.float64)
    for b in range(B):
        a = int(wlow[b])
        z = int(whigh[b])
        seg_r2 = r2[a : z + 1]
        e_b[b] = float(np.sum(seg_r2))
        c_num_b[b] = float(np.sum(c[a : z + 1] * seg_r2))
    return e_b, c_num_b
 def _psycho_window(
    time_x: FrameChannelT,
    prev1_x: FrameChannelT,
    prev2_x: FrameChannelT,
    *,
    N: int,
    table: BarkTable,
 ) -> FloatArray:
    """
    Compute SMR for one FFT analysis window (N=2048 for long, N=256 for short).
    This implements the pipeline described in the notes:
      - FFT magnitude/phase
      - predictability per bin
      - band energies and predictability
      - band spreading
      - tonality index tb(b)
      - masking threshold (noise + threshold in quiet)
      - SMR(b) = e(b) / np(b)
    Parameters
    ----------
    time_x : FrameChannelT
        Current time-domain samples, shape (N,).
    prev1_x : FrameChannelT
        Previous time-domain samples, shape (N,).
    prev2_x : FrameChannelT
        Pre-previous time-domain samples, shape (N,).
    N : int
        FFT size.
    table : BarkTable
        Psychoacoustic band table (B219a or B219b).
    Returns
    -------
    FloatArray
        SMR per band, shape (B,).
    """
    wlow, whigh, bval, qthr_db = band_limits(table)
    spread = _spreading_matrix(bval)
    # FFT features for current and history windows
    r, phi = _r_phi_from_time(time_x, N)
    r_m1, phi_m1 = _r_phi_from_time(prev1_x, N)
    r_m2, phi_m2 = _r_phi_from_time(prev2_x, N)
    # Predictability per bin
    c_w = _predictability(r, phi, r_m1, phi_m1, r_m2, phi_m2)
    # Aggregate into psycho bands
    e_b, c_num_b = _band_energy_and_pred(r, c_w, wlow, whigh)
    # Spread energies and predictability across bands:
    # ecb(b) = sum_bb e(bb)      * S(bb, b)
    # ct(b)  = sum_bb c_num(bb)  * S(bb, b)
    ecb = spread.T @ e_b
    ct = spread.T @ c_num_b
    # Band predictability after spreading: cb(b) = ct(b) / ecb(b)
    cb = ct / (ecb + 1e-12)
    # Normalized energy term:
    # en(b) = ecb(b) / sum_bb S(bb, b)
    spread_colsum = np.sum(spread, axis=0)
    en = ecb / (spread_colsum + 1e-12)
    # Tonality index (clamped to [0, 1])
    tb = -0.299 - 0.43 * np.log(np.maximum(cb, 1e-12))
    tb = np.clip(tb, 0.0, 1.0)
    # Required SNR per band (dB): interpolate between TMN and NMT
    snr_b = tb * TMN_DB + (1.0 - tb) * NMT_DB
    bc = 10.0 ** (-snr_b / 10.0)
    # Noise masking threshold estimate (power domain)
    nb = en * bc
    # Threshold in quiet (convert from dB to power domain):
    # qthr_power = eps * (N/2) * 10^(qthr_db/10)
    qthr_power = np.finfo('float').eps * (N / 2.0) * (10.0 ** (qthr_db / 10.0))
    # Final masking threshold per band:
    # np(b) = max(nb(b), qthr(b))
    npart = np.maximum(nb, qthr_power)
    # Signal-to-mask ratio:
    # SMR(b) = e(b) / np(b)
    smr = e_b / (npart + 1e-12)
    return smr.astype(np.float64, copy=False)
 # -----------------------------------------------------------------------------
 # ESH window slicing (match filterbank conventions)
 # -----------------------------------------------------------------------------
 def _esh_subframes(x_2048: FrameChannelT) -> list[FrameChannelT]:
    """
    Extract the 8 overlapping 256-sample short windows used by AAC ESH.
    The project convention (matching the filterbank) is:
      start_j = 448 + 128*j,  for j = 0..7
      subframe_j = x[start_j : start_j + 256]
    This selects the central 1152-sample region [448, 1600) and produces
    8 windows with 50% overlap.
    Parameters
    ----------
    x_2048 : FrameChannelT
        Time-domain channel frame, shape (2048,).
    Returns
    -------
    list[FrameChannelT]
        List of 8 subframes, each of shape (256,).
    """
    x_2048 = np.asarray(x_2048, dtype=np.float64).reshape(-1)
    if x_2048.shape[0] != 2048:
        raise ValueError("ESH requires 2048-sample input frames.")
    subs: list[FrameChannelT] = []
    for j in range(8):
        start = 448 + 128 * j
        subs.append(x_2048[start : start + 256])
    return subs
 # -----------------------------------------------------------------------------
 # Public API
 # -----------------------------------------------------------------------------
 def aac_psycho(
    frame_T: FrameChannelT,
    frame_type: FrameType,
    frame_T_prev_1: FrameChannelT,
    frame_T_prev_2: FrameChannelT,
 ) -> FloatArray:
    """
    Psychoacoustic model for ONE channel.
    Parameters
    ----------
    frame_T : FrameChannelT
        Current time-domain channel frame, shape (2048,).
        For "ESH", the 8 short windows are derived internally.
    frame_type : FrameType
        AAC frame type ("OLS", "LSS", "ESH", "LPS").
    frame_T_prev_1 : FrameChannelT
        Previous time-domain channel frame, shape (2048,).
    frame_T_prev_2 : FrameChannelT
        Pre-previous time-domain channel frame, shape (2048,).
    Returns
    -------
    FloatArray
        Signal-to-Mask Ratio (SMR), per psychoacoustic band.
        - If frame_type == "ESH": shape (42, 8)
        - Else: shape (69,)
    """
    frame_T = np.asarray(frame_T, dtype=np.float64).reshape(-1)
    frame_T_prev_1 = np.asarray(frame_T_prev_1, dtype=np.float64).reshape(-1)
    frame_T_prev_2 = np.asarray(frame_T_prev_2, dtype=np.float64).reshape(-1)
    if frame_T.shape[0] != 2048 or frame_T_prev_1.shape[0] != 2048 or frame_T_prev_2.shape[0] != 2048:
        raise ValueError("aac_psycho expects 2048-sample frames for current/prev1/prev2.")
    table, N = get_table(frame_type)
    # Long frame types: compute one SMR vector (69 bands)
    if frame_type != "ESH":
        return _psycho_window(frame_T, frame_T_prev_1, frame_T_prev_2, N=N, table=table)
    # ESH: compute 8 SMR vectors (42 bands each), one per short subframe.
    #
    # The notes use short-window history for predictability:
    # - For j=0: use previous frame's subframes (7, 6)
    # - For j=1: use current subframe 0 and previous frame's subframe 7
    # - For j>=2: use current subframes (j-1, j-2)
    #
    # This matches the "within-frame history" convention commonly used in
    # simplified psycho models for ESH.
    cur_subs = _esh_subframes(frame_T)
    prev1_subs = _esh_subframes(frame_T_prev_1)
    B = int(table.shape[0])  # expected 42
    smr_out = np.zeros((B, 8), dtype=np.float64)
    for j in range(8):
        if j == 0:
            x_m1 = prev1_subs[7]
            x_m2 = prev1_subs[6]
        elif j == 1:
            x_m1 = cur_subs[0]
            x_m2 = prev1_subs[7]
        else:
            x_m1 = cur_subs[j - 1]
            x_m2 = cur_subs[j - 2]
        smr_out[:, j] = _psycho_window(cur_subs[j], x_m1, x_m2, N=256, table=table)
    return smr_out
--- a/source/level_2/core/aac_quantizer.py
+++ b/source/level_2/core/aac_quantizer.py
@ -0,0 +1,600 @@
 # ------------------------------------------------------------
 # AAC Coder/Decoder - Quantizer / iQuantizer (Level 3)
 #
 # Multimedia course at Aristotle University of
 # Thessaloniki (AUTh)
 #
 # Author:
 #   Christos Choutouridis (ΑΕΜ 8997)
 #   cchoutou@ece.auth.gr
 #
 # Description:
 #   Implements AAC quantizer and inverse quantizer for one channel.
 #   Based on assignment section 2.6 (Eq. 12-15).
 #
 # Notes:
 #   - Bit reservoir is not implemented (assignment simplification).
 #   - Scalefactor bands are assumed equal to psychoacoustic bands
 #     (Table B.2.1.9a / B.2.1.9b from TableB219.mat).
 # ------------------------------------------------------------
 from __future__ import annotations
 import numpy as np
 from core.aac_utils import get_table, band_limits
 from core.aac_types import *
 # -----------------------------------------------------------------------------
 # Constants (assignment)
 # -----------------------------------------------------------------------------
 MAGIC_NUMBER: float = 0.4054
 EPS: float = 1e-12
 MAX_SF_DELTA:int  = 60
 # -----------------------------------------------------------------------------
 # Helpers: ESH packing/unpacking (128x8 <-> 1024x1)
 # -----------------------------------------------------------------------------
 def _esh_pack(x_128x8: FloatArray) -> FloatArray:
    """
    Pack ESH coefficients (128 x 8) into a single long vector (1024 x 1).
    Packing order:
        Columns are concatenated in subframe order (0..7), column-major.
    Parameters
    ----------
    x_128x8 : FloatArray
        ESH coefficients, shape (128, 8).
    Returns
    -------
    FloatArray
        Packed coefficients, shape (1024, 1).
    """
    x_128x8 = np.asarray(x_128x8, dtype=np.float64)
    if x_128x8.shape != (128, 8):
        raise ValueError("ESH pack expects shape (128, 8).")
    return x_128x8.reshape(1024, 1, order="F")
 def _esh_unpack(x_1024x1: FloatArray) -> FloatArray:
    """
    Unpack a packed ESH vector (1024 elements) back to shape (128, 8).
    Parameters
    ----------
    x_1024x1 : FloatArray
        Packed ESH vector, shape (1024,) or (1024, 1) after flattening.
    Returns
    -------
    FloatArray
        Unpacked ESH coefficients, shape (128, 8).
    """
    x_1024x1 = np.asarray(x_1024x1, dtype=np.float64).reshape(-1)
    if x_1024x1.shape[0] != 1024:
        raise ValueError("ESH unpack expects 1024 elements.")
    return x_1024x1.reshape(128, 8, order="F")
 # -----------------------------------------------------------------------------
 # Core quantizer formulas (Eq. 12, Eq. 13)
 # -----------------------------------------------------------------------------
 def _quantize_symbol(x: FloatArray, alpha: float) -> QuantizedSymbols:
    """
    Quantize MDCT coefficients to integer symbols S(k).
    Implements Eq. (12):
        S(k) = sgn(X(k)) * int( (|X(k)| * 2^(-alpha/4))^(3/4) + MAGIC_NUMBER )
    Parameters
    ----------
    x : FloatArray
        MDCT coefficients for a contiguous set of spectral lines.
        Shape: (N,)
    alpha : float
        Scalefactor gain for the corresponding scalefactor band.
    Returns
    -------
    QuantizedSymbols
        Quantized symbols S(k) as int64, shape (N,).
    """
    x = np.asarray(x, dtype=np.float64)
    scale = 2.0 ** (-0.25 * float(alpha))
    ax = np.abs(x) * scale
    y = np.power(ax, 0.75, dtype=np.float64)
    # "int" in the assignment corresponds to truncation.
    q = np.floor(y + MAGIC_NUMBER).astype(np.int64)
    return (np.sign(x).astype(np.int64) * q).astype(np.int64)
 def _dequantize_symbol(S: QuantizedSymbols, alpha: float) -> FloatArray:
    """
    Inverse quantizer (dequantization of symbols).
    Implements Eq. (13):
        Xhat(k) = sgn(S(k)) * |S(k)|^(4/3) * 2^(alpha/4)
    Parameters
    ----------
    S : QuantizedSymbols
        Quantized symbols S(k), int64, shape (N,).
    alpha : float
        Scalefactor gain for the corresponding scalefactor band.
    Returns
    -------
    FloatArray
        Reconstructed MDCT coefficients Xhat(k), float64, shape (N,).
    """
    S = np.asarray(S, dtype=np.int64)
    scale = 2.0 ** (0.25 * float(alpha))
    aS = np.abs(S).astype(np.float64)
    y = np.power(aS, 4.0 / 3.0, dtype=np.float64)
    return (np.sign(S).astype(np.float64) * y * scale).astype(np.float64)
 # -----------------------------------------------------------------------------
 # Alpha initialization (Eq. 14)
 # -----------------------------------------------------------------------------
 def _initial_alpha_hat(X: "FloatArray", MQ: int = 8191) -> int:
    """
    Compute the initial scalefactor estimate alpha_hat for a frame.
    The assignment proposes the following first approximation (Equation 14):
        alpha_hat = (16/3) * log2( max_k(|X(k)|)^(3/4) / MQ )
    where max_k runs over all MDCT coefficients of the frame (not per band),
    and MQ is the maximum quantization level parameter (2*MQ + 1 levels).
    Parameters
    ----------
    X : FloatArray
        MDCT coefficients of one frame (or one ESH subframe), shape (N,).
    MQ : int
        Quantizer parameter (default 8191, as per assignment).
    Returns
    -------
    int
        Integer alpha_hat (rounded to nearest integer).
    """
    x_max = float(np.max(np.abs(X)))
    if x_max <= 0.0:
        return 0
    alpha_hat = (16.0 / 3.0) * np.log2((x_max ** (3.0 / 4.0)) / float(MQ))
    return int(np.round(alpha_hat))
 # -----------------------------------------------------------------------------
 # Band utilities
 # -----------------------------------------------------------------------------
 def _band_slices(frame_type: FrameType) -> list[tuple[int, int]]:
    """
    Return scalefactor band ranges [wlow, whigh] (inclusive) for the given frame type.
    These are derived from the psychoacoustic tables (TableB219),
    and map directly to MDCT indices:
      - long: 0..1023
      - short (ESH subframe): 0..127
    Parameters
    ----------
    frame_type : FrameType
        Frame type ("OLS", "LSS", "ESH", "LPS").
    Returns
    -------
    list[tuple[int, int]]
        List of (lo, hi) inclusive index pairs for each band.
    """
    table, _Nfft = get_table(frame_type)
    wlow, whigh, _bval, _qthr_db = band_limits(table)
    bands: list[tuple[int, int]] = []
    for lo, hi in zip(wlow, whigh):
        bands.append((int(lo), int(hi)))
    return bands
 def _band_energy(x: FloatArray, lo: int, hi: int) -> float:
    """
    Compute energy of a spectral segment x[lo:hi+1].
    Parameters
    ----------
    x : FloatArray
        MDCT coefficient vector.
    lo, hi : int
        Inclusive index range.
    Returns
    -------
    float
        Sum of squares (energy) within the band.
    """
    sec = x[lo : hi + 1]
    return float(np.sum(sec * sec))
 def _psychoacoustic_threshold(
    X: FloatArray,
    SMR_col: FloatArray,
    bands: list[tuple[int, int]],
 ) -> FloatArray:
    """
    Compute psychoacoustic thresholds T(b) per band.
    Uses:
        P(b) = sum_{k in band} X(k)^2
        T(b) = P(b) / SMR(b)
    Parameters
    ----------
    X : FloatArray
        MDCT coefficients for a frame (long) or one ESH subframe (short).
    SMR_col : FloatArray
        SMR values for this frame/subframe, shape (NB,).
    bands : list[tuple[int, int]]
        Band index ranges.
    Returns
    -------
    FloatArray
        Threshold vector T(b), shape (NB,).
    """
    nb = len(bands)
    T = np.zeros((nb,), dtype=np.float64)
    for b, (lo, hi) in enumerate(bands):
        P = _band_energy(X, lo, hi)
        smr = float(SMR_col[b])
        if smr <= EPS:
            T[b] = 0.0
        else:
            T[b] = P / smr
    return T
 # -----------------------------------------------------------------------------
 # Alpha selection per band + neighbor-difference constraint
 # -----------------------------------------------------------------------------
 def _best_alpha_for_band(
    X: "FloatArray", lo: int, hi: int, T_b: float,
    alpha_hat: int, alpha_prev: int, alpha_min: int, alpha_max: int,
 ) -> int:
    """
    Determine the band-wise scalefactor alpha(b) following the assignment.
    Procedure:
    - Start from a frame-wise initial estimate alpha_hat.
    - Iteratively increase alpha(b) by 1 as long as the quantization error power
      stays below the psychoacoustic threshold T(b):  P_e(b) = sum_{k in band} ( X(k) - Xhat(k) )^2
    - Stop increasing alpha(b) if the neighbor constraint would be violated: |alpha(b) - alpha(b-1)| <= 60
      When processing bands sequentially (low -> high), this becomes: alpha(b) <= alpha_prev + 60
    Notes:
    - This function does not decrease alpha if the initial value already violates
      the threshold; the assignment only specifies iterative increase.
    Parameters
    ----------
    X : FloatArray
        Full MDCT vector of the current (sub)frame, shape (N,).
    lo, hi : int
        Band index bounds (inclusive), defining the band slice.
    T_b : float
        Threshold T(b) for this band.
    alpha_hat : int
        Initial frame-wise estimate (Equation 14).
    alpha_prev : int
        Previously selected alpha for band b-1 (neighbor constraint reference).
    alpha_min, alpha_max : int
        Safeguard bounds for alpha.
    Returns
    -------
    int
        Selected integer alpha(b).
    """
    if T_b <= 0.0:
        return int(alpha_hat)
    Xsec = X[lo : hi + 1]
    # Neighbor constraint (sequential processing): alpha(b) <= alpha_prev + 60
    alpha_limit = min(int(alpha_max), int(alpha_prev) + MAX_SF_DELTA)
    # Start from alpha_hat, clamped to feasible range
    alpha = int(alpha_hat)
    alpha = max(int(alpha_min), min(alpha, int(alpha_limit)))
    # Evaluate at current alpha
    Ssec = _quantize_symbol(Xsec, alpha)
    Xhat = _dequantize_symbol(Ssec, alpha)
    Pe = float(np.sum((Xsec - Xhat) ** 2))
    # If already above threshold, return current alpha (no decrease step specified)
    if Pe > T_b:
        return alpha
    # Increase alpha while still under threshold and within constraints
    while True:
        alpha_next = alpha + 1
        if alpha_next > alpha_limit:
            break
        Ssec = _quantize_symbol(Xsec, alpha_next)
        Xhat = _dequantize_symbol(Ssec, alpha_next)
        Pe_next = float(np.sum((Xsec - Xhat) ** 2))
        if Pe_next > T_b:
            break
        alpha = alpha_next
    return alpha
 # -----------------------------------------------------------------------------
 # Public API
 # -----------------------------------------------------------------------------
 def aac_quantizer(
    frame_F: FrameChannelF,
    frame_type: FrameType,
    SMR: FloatArray,
 ) -> tuple[QuantizedSymbols, ScaleFactors, GlobalGain]:
    """
    AAC quantizer for one channel (Level 3).
    Quantizes MDCT coefficients (after TNS) using band-wise scalefactors derived
    from psychoacoustic thresholds computed via SMR.
    The implementation follows the assignment procedure:
    - Compute an initial frame-wise alpha_hat using Equation (14), based on the
      maximum MDCT coefficient magnitude of the (sub)frame.
    - For each band b, increase alpha(b) by 1 while the quantization error power
      P_e(b) stays below the threshold T(b).
    - Enforce the neighbor constraint |alpha(b) - alpha(b-1)| <= 60 during the
      band-by-band search (no post-processing needed).
    Parameters
    ----------
    frame_F : FrameChannelF
        MDCT coefficients after TNS, one channel.
        Shapes:
        - Long frames: (1024,) or (1024, 1)
        - ESH: (128, 8)
    frame_type : FrameType
        AAC frame type ("OLS", "LSS", "ESH", "LPS").
    SMR : FloatArray
        Signal-to-Mask Ratio per band.
        Shapes:
        - Long: (NB,) or (NB, 1)
        - ESH: (NB, 8)
    Returns
    -------
    S : QuantizedSymbols
        Quantized symbols S(k), packed as shape (1024, 1) for all frame types.
        For ESH, the 8 subframes are packed in column-major subframe layout.
    sfc : ScaleFactors
        DPCM-coded scalefactors:
            sfc(0) = alpha(0) = G
            sfc(b) = alpha(b) - alpha(b-1), for b > 0
        Shapes:
        - Long: (NB, 1)
        - ESH: (NB, 8)
    G : GlobalGain
        Global gain G = alpha(0).
        - Long: scalar float
        - ESH: array shape (1, 8), dtype float64
    """
    bands = _band_slices(frame_type)
    NB = len(bands)
    X = np.asarray(frame_F, dtype=np.float64)
    SMR = np.asarray(SMR, dtype=np.float64)
    # -------------------------------------------------------------------------
    # ESH: 8 short subframes, each of length 128
    # -------------------------------------------------------------------------
    if frame_type == "ESH":
        if X.shape != (128, 8):
            raise ValueError("For ESH, frame_F must have shape (128, 8).")
        if SMR.shape != (NB, 8):
            raise ValueError(f"For ESH, SMR must have shape ({NB}, 8).")
        S_out: QuantizedSymbols = np.zeros((1024, 1), dtype=np.int64)
        sfc: ScaleFactors = np.zeros((NB, 8), dtype=np.int64)
        G_arr = np.zeros((1, 8), dtype=np.float64)
        # Packed output view: (128, 8) with column-major layout
        S_pack = S_out[:, 0].reshape(128, 8, order="F")
        for j in range(8):
            Xj = X[:, j].reshape(128)
            SMRj = SMR[:, j].reshape(NB)
            # Compute psychoacoustic threshold T(b) for this subframe
            T = _psychoacoustic_threshold(Xj, SMRj, bands)
            # Frame-wise initial estimate alpha_hat (Equation 14)
            alpha_hat = _initial_alpha_hat(Xj)
            # Band-wise scalefactors alpha(b)
            alpha = np.zeros((NB,), dtype=np.int64)
            alpha_prev = int(alpha_hat)
            for b, (lo, hi) in enumerate(bands):
                alpha_b = _best_alpha_for_band(
                    X=Xj,
                    lo=lo,
                    hi=hi,
                    T_b=float(T[b]),
                    alpha_hat=int(alpha_hat),
                    alpha_prev=int(alpha_prev),
                    alpha_min=-4096,
                    alpha_max=4096,
                )
                alpha[b] = int(alpha_b)
                alpha_prev = int(alpha_b)
            # DPCM-coded scalefactors
            G_arr[0, j] = float(alpha[0])
            sfc[0, j] = int(alpha[0])
            for b in range(1, NB):
                sfc[b, j] = int(alpha[b] - alpha[b - 1])
            # Quantize MDCT coefficients band-by-band
            Sj = np.zeros((128,), dtype=np.int64)
            for b, (lo, hi) in enumerate(bands):
                Sj[lo : hi + 1] = _quantize_symbol(Xj[lo : hi + 1], float(alpha[b]))
            # Store subframe in packed output
            S_pack[:, j] = Sj
        return S_out, sfc, G_arr
    # -------------------------------------------------------------------------
    # Long frames: OLS / LSS / LPS, length 1024
    # -------------------------------------------------------------------------
    if X.shape == (1024,):
        Xv = X
    elif X.shape == (1024, 1):
        Xv = X[:, 0]
    else:
        raise ValueError("For non-ESH, frame_F must have shape (1024,) or (1024, 1).")
    if SMR.shape == (NB,):
        SMRv = SMR
    elif SMR.shape == (NB, 1):
        SMRv = SMR[:, 0]
    else:
        raise ValueError(f"For non-ESH, SMR must have shape ({NB},) or ({NB}, 1).")
    # Compute psychoacoustic threshold T(b) for the long frame
    T = _psychoacoustic_threshold(Xv, SMRv, bands)
    # Frame-wise initial estimate alpha_hat (Equation 14)
    alpha_hat = _initial_alpha_hat(Xv)
    # Band-wise scalefactors alpha(b)
    alpha = np.zeros((NB,), dtype=np.int64)
    alpha_prev = int(alpha_hat)
    for b, (lo, hi) in enumerate(bands):
        alpha_b = _best_alpha_for_band(
            X=Xv,
            lo=lo,
            hi=hi,
            T_b=float(T[b]),
            alpha_hat=int(alpha_hat),
            alpha_prev=int(alpha_prev),
            alpha_min=-4096,
            alpha_max=4096,
        )
        alpha[b] = int(alpha_b)
        alpha_prev = int(alpha_b)
    # DPCM-coded scalefactors
    sfc_out: ScaleFactors = np.zeros((NB, 1), dtype=np.int64)
    sfc_out[0, 0] = int(alpha[0])
    for b in range(1, NB):
        sfc_out[b, 0] = int(alpha[b] - alpha[b - 1])
    G: float = float(alpha[0])
    # Quantize MDCT coefficients band-by-band
    S_vec = np.zeros((1024,), dtype=np.int64)
    for b, (lo, hi) in enumerate(bands):
        S_vec[lo : hi + 1] = _quantize_symbol(Xv[lo : hi + 1], float(alpha[b]))
    return S_vec.reshape(1024, 1), sfc_out, G
 def aac_i_quantizer(
    S: QuantizedSymbols,
    sfc: ScaleFactors,
    G: GlobalGain,
    frame_type: FrameType,
 ) -> FrameChannelF:
    """
    Inverse quantizer (iQuantizer) for one channel.
    Reconstructs MDCT coefficients from quantized symbols and DPCM scalefactors.
    Parameters
    ----------
    S : QuantizedSymbols
        Quantized symbols, shape (1024, 1) (or any array with 1024 elements).
    sfc : ScaleFactors
        DPCM-coded scalefactors.
        Shapes:
        - Long: (NB, 1)
        - ESH: (NB, 8)
    G : GlobalGain
        Global gain (not strictly required if sfc includes sfc(0)=alpha(0)).
        Present for API compatibility with the assignment.
    frame_type : FrameType
        AAC frame type.
    Returns
    -------
    FrameChannelF
        Reconstructed MDCT coefficients:
        - ESH: (128, 8)
        - Long: (1024, 1)
    """
    bands = _band_slices(frame_type)
    NB = len(bands)
    S_flat = np.asarray(S, dtype=np.int64).reshape(-1)
    if S_flat.shape[0] != 1024:
        raise ValueError("S must contain 1024 symbols.")
    if frame_type == "ESH":
        sfc = np.asarray(sfc, dtype=np.int64)
        if sfc.shape != (NB, 8):
            raise ValueError(f"For ESH, sfc must have shape ({NB}, 8).")
        S_128x8 = _esh_unpack(S_flat)
        Xrec = np.zeros((128, 8), dtype=np.float64)
        for j in range(8):
            alpha = np.zeros((NB,), dtype=np.int64)
            alpha[0] = int(sfc[0, j])
            for b in range(1, NB):
                alpha[b] = int(alpha[b - 1] + sfc[b, j])
            Xj = np.zeros((128,), dtype=np.float64)
            for b, (lo, hi) in enumerate(bands):
                Xj[lo : hi + 1] = _dequantize_symbol(S_128x8[lo : hi + 1, j].astype(np.int64), float(alpha[b]))
            Xrec[:, j] = Xj
        return Xrec
    sfc = np.asarray(sfc, dtype=np.int64)
    if sfc.shape != (NB, 1):
        raise ValueError(f"For non-ESH, sfc must have shape ({NB}, 1).")
    alpha = np.zeros((NB,), dtype=np.int64)
    alpha[0] = int(sfc[0, 0])
    for b in range(1, NB):
        alpha[b] = int(alpha[b - 1] + sfc[b, 0])
    Xrec = np.zeros((1024,), dtype=np.float64)
    for b, (lo, hi) in enumerate(bands):
        Xrec[lo : hi + 1] = _dequantize_symbol(S_flat[lo : hi + 1], float(alpha[b]))
    return Xrec.reshape(1024, 1)
--- a/source/level_2/core/aac_tns.py
+++ b/source/level_2/core/aac_tns.py
@ -39,7 +39,7 @@ from core.aac_types import *
 # -----------------------------------------------------------------------------
-def _band_ranges_for_kcount(k_count: int) -> BandRanges:
+def _band_ranges(k_count: int) -> BandRanges:
    """
    Return Bark band index ranges [start, end] (inclusive) for the given MDCT line count.
@ -66,7 +66,7 @@ def _band_ranges_for_kcount(k_count: int) -> BandRanges:
    start = tbl[:, 1].astype(int)
    end = tbl[:, 2].astype(int)
-    ranges: list[tuple[int, int]] = [(int(s), int(e)) for s, e in zip(start, end)]
+    ranges: BandRanges = [(int(s), int(e)) for s, e in zip(start, end)]
    for s, e in ranges:
        if s < 0 or e < s or e >= k_count:
@ -117,7 +117,7 @@ def _compute_sw(x: MdctCoeffs) -> MdctCoeffs:
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    k_count = int(x.shape[0])
-    bands = _band_ranges_for_kcount(k_count)
+    bands = _band_ranges(k_count)
    sw = np.zeros(k_count, dtype=np.float64)
    for s, e in bands:
@ -347,7 +347,7 @@ def _apply_itns_iir(y: MdctCoeffs, a_q: MdctCoeffs) -> MdctCoeffs:
    return x_hat
-def _tns_one_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
+def _tns_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
    """
    TNS for a single MDCT vector (one long frame or one short subframe).
@ -430,7 +430,7 @@ def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChan
        a_out = np.empty((PRED_ORDER, 8), dtype=np.float64)
        for j in range(8):
-            y[:, j], a_out[:, j] = _tns_one_vector(x[:, j])
+            y[:, j], a_out[:, j] = _tns_vector(x[:, j])
        return y, a_out
@ -443,7 +443,7 @@ def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChan
    else:
        raise ValueError('For non-ESH, frame_F_in must have shape (1024,) or (1024, 1).')
-    y_vec, a_q = _tns_one_vector(x_vec)
+    y_vec, a_q = _tns_vector(x_vec)
    if out_shape == (1024,):
        y_out = y_vec
--- a/source/level_2/core/aac_utils.py
+++ b/source/level_2/core/aac_utils.py
@ -163,6 +163,42 @@ def snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    return float(10.0 * np.log10(ps / pn))
 def estimate_lag_mono(x_ref: TimeSignal, x_hat: TimeSignal, max_lag=4096):
    """
    Estimate time lag between two mono signals.
    Returns lag (positive means x_hat delayed).
    """
    n = min(len(x_ref), len(x_hat))
    x_ref = x_ref[:n]
    x_hat = x_hat[:n]
    corr = np.correlate(x_ref, x_hat, mode='full')
    lags = np.arange(-n + 1, n)
    center = n - 1
    lo = max(0, center - max_lag)
    hi = min(len(corr), center + max_lag + 1)
    best = lo + int(np.argmax(corr[lo:hi]))
    return int(lags[best])
 def match_gain(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    """
    Least-squares gain g that best maps x_hat -> x_ref.
    """
    n = min(x_ref.shape[0], x_hat.shape[0])
    c = min(x_ref.shape[1], x_hat.shape[1])
    r = x_ref[:n, :c].reshape(-1).astype(np.float64)
    h = x_hat[:n, :c].reshape(-1).astype(np.float64)
    denom = float(np.dot(h, h))
    if denom <= 0.0:
        return 1.0
    return float(np.dot(r, h) / denom)
 # -----------------------------------------------------------------------------
 # Psychoacoustic band tables (TableB219.mat)
 # -----------------------------------------------------------------------------
--- a/source/level_2/material/huff_utils.py
+++ b/source/level_2/material/huff_utils.py
@ -0,0 +1,403 @@
 import numpy as np
 import scipy.io as sio
 import os
 # ------------------ LOAD LUT ------------------
 def load_LUT(mat_filename=None):
    """
    Loads the list of Huffman Codebooks (LUTs)
    Returns:
        huffLUT : list (index 1..11 used, index 0 unused)
    """
    if mat_filename is None:
        current_dir = os.path.dirname(os.path.abspath(__file__))
        mat_filename = os.path.join(current_dir, "huffCodebooks.mat")
    mat = sio.loadmat(mat_filename)
    huffCodebooks_raw = mat['huffCodebooks'].squeeze()
    huffCodebooks = []
    for i in range(11):
        huffCodebooks.append(np.array(huffCodebooks_raw[i]))
    # Build inverse VLC tables
    invTable = [None] * 11
    for i in range(11):
        h = huffCodebooks[i][:, 2].astype(int)       # column 3
        hlength = huffCodebooks[i][:, 1].astype(int) # column 2
        hbin = []
        for j in range(len(h)):
            hbin.append(format(h[j], f'0{hlength[j]}b'))
        invTable[i] = vlc_table(hbin)
    # Build Huffman LUT dicts
    huffLUT = [None] * 12  # index 0 unused
    params = [
        (4, 1, True),
        (4, 1, True),
        (4, 2, False),
        (4, 2, False),
        (2, 4, True),
        (2, 4, True),
        (2, 7, False),
        (2, 7, False),
        (2, 12, False),
        (2, 12, False),
        (2, 16, False),
    ]
    for i, (nTupleSize, maxAbs, signed) in enumerate(params, start=1):
        huffLUT[i] = {
            'LUT': huffCodebooks[i-1],
            'invTable': invTable[i-1],
            'codebook': i,
            'nTupleSize': nTupleSize,
            'maxAbsCodeVal': maxAbs,
            'signedValues': signed
        }
    return huffLUT
 def vlc_table(code_array):
    """
    codeArray: list of strings, each string is a Huffman codeword (e.g. '0101')
    returns:
        h : NumPy array of shape (num_nodes, 3)
            columns:
              [ next_if_0 , next_if_1 , symbol_index ]
    """
    h = np.zeros((1, 3), dtype=int)
    for code_index, code in enumerate(code_array, start=1):
        word = [int(bit) for bit in code]
        h_index = 0
        for bit in word:
            k = bit
            next_node = h[h_index, k]
            if next_node == 0:
                h = np.vstack([h, [0, 0, 0]])
                new_index = h.shape[0] - 1
                h[h_index, k] = new_index
                h_index = new_index
            else:
                h_index = next_node
        h[h_index, 2] = code_index
    return h
 # ------------------ ENCODE ------------------
 def encode_huff(coeff_sec, huff_LUT_list, force_codebook = None):
    """
    Huffman-encode a sequence of quantized coefficients.
    This function selects the appropriate Huffman codebook based on the
    maximum absolute value of the input coefficients, encodes the coefficients
    into a binary Huffman bitstream, and returns both the bitstream and the
    selected codebook index.
    This is the Python equivalent of the MATLAB `encodeHuff.m` function used
    in audio/image coding (e.g., scale factor band encoding). The input
    coefficient sequence is grouped into fixed-size tuples as defined by
    the chosen Huffman LUT. Zero-padding may be applied internally.
    Parameters
    ----------
    coeff_sec : array_like of int
        1-D array of quantized integer coefficients to encode.
        Typically corresponds to a "section" or scale-factor band.
    huff_LUT_list : list
        List of Huffman lookup-table dictionaries as returned by `loadLUT()`.
        Index 1..11 correspond to valid Huffman codebooks.
        Index 0 is unused.
    Returns
    -------
    huffSec : str
        Huffman-encoded bitstream represented as a string of '0' and '1'
        characters.
    huffCodebook : int
        Index (1..11) of the Huffman codebook used for encoding.
        A value of 0 indicates a special all-zero section.
    """
    if force_codebook is not None:
            return huff_LUT_code_1(huff_LUT_list[force_codebook], coeff_sec)
    maxAbsVal = np.max(np.abs(coeff_sec))
    if maxAbsVal == 0:
        huffCodebook = 0
        huffSec = huff_LUT_code_0()
    elif maxAbsVal == 1:
        candidates = [1, 2]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal == 2:
        candidates = [3, 4]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (3, 4):
        candidates = [5, 6]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (5, 6, 7):
        candidates = [7, 8]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (8, 9, 10, 11, 12):
        candidates = [9, 10]
        huffSec1 = huff_LUT_code_1(huff_LUT_list[candidates[0]], coeff_sec)
        huffSec2 = huff_LUT_code_1(huff_LUT_list[candidates[1]], coeff_sec)
        if len(huffSec1) <= len(huffSec2):
            huffSec = huffSec1
            huffCodebook = candidates[0]
        else:
            huffSec = huffSec2
            huffCodebook = candidates[1]
    elif maxAbsVal in (13, 14, 15):
        huffCodebook = 11
        huffSec = huff_LUT_code_1(huff_LUT_list[huffCodebook], coeff_sec)
    else:
        huffCodebook = 11
        huffSec = huff_LUT_code_ESC(huff_LUT_list[huffCodebook], coeff_sec)
    return huffSec, huffCodebook
 def huff_LUT_code_1(huff_LUT, coeff_sec):
    LUT = huff_LUT['LUT']
    nTupleSize = huff_LUT['nTupleSize']
    maxAbsCodeVal = huff_LUT['maxAbsCodeVal']
    signedValues = huff_LUT['signedValues']
    numTuples = int(np.ceil(len(coeff_sec) / nTupleSize))
    if signedValues:
        coeff = coeff_sec + maxAbsCodeVal
        base = 2 * maxAbsCodeVal + 1
    else:
        coeff = coeff_sec
        base = maxAbsCodeVal + 1
    coeffPad = np.zeros(numTuples * nTupleSize, dtype=int)
    coeffPad[:len(coeff)] = coeff
    huffSec = []
    powers = base ** np.arange(nTupleSize - 1, -1, -1)
    for i in range(numTuples):
        nTuple = coeffPad[i*nTupleSize:(i+1)*nTupleSize]
        huffIndex = int(np.abs(nTuple) @ powers)
        hexVal = LUT[huffIndex, 2]
        huffLen = LUT[huffIndex, 1]
        bits = format(int(hexVal), f'0{int(huffLen)}b')
        if signedValues:
            huffSec.append(bits)
        else:
            signBits = ''.join('1' if v < 0 else '0' for v in nTuple)
            huffSec.append(bits + signBits)
    return ''.join(huffSec)
 def huff_LUT_code_0():
    return ''
 def huff_LUT_code_ESC(huff_LUT, coeff_sec):
    LUT = huff_LUT['LUT']
    nTupleSize = huff_LUT['nTupleSize']
    maxAbsCodeVal = huff_LUT['maxAbsCodeVal']
    numTuples = int(np.ceil(len(coeff_sec) / nTupleSize))
    base = maxAbsCodeVal + 1
    coeffPad = np.zeros(numTuples * nTupleSize, dtype=int)
    coeffPad[:len(coeff_sec)] = coeff_sec
    huffSec = []
    powers = base ** np.arange(nTupleSize - 1, -1, -1)
    for i in range(numTuples):
        nTuple = coeffPad[i*nTupleSize:(i+1)*nTupleSize]
        lnTuple = nTuple.astype(float)
        lnTuple[lnTuple == 0] = np.finfo(float).eps
        N4 = np.maximum(0, np.floor(np.log2(np.abs(lnTuple))).astype(int))
        N = np.maximum(0, N4 - 4)
        esc = np.abs(nTuple) > 15
        nTupleESC = nTuple.copy()
        nTupleESC[esc] = np.sign(nTupleESC[esc]) * 16
        huffIndex = int(np.abs(nTupleESC) @ powers)
        hexVal = LUT[huffIndex, 2]
        huffLen = LUT[huffIndex, 1]
        bits = format(int(hexVal), f'0{int(huffLen)}b')
        escSeq = ''
        for k in range(nTupleSize):
            if esc[k]:
                escSeq += '1' * N[k]
                escSeq += '0'
                escSeq += format(abs(nTuple[k]) - (1 << N4[k]), f'0{N4[k]}b')
        signBits = ''.join('1' if v < 0 else '0' for v in nTuple)
        huffSec.append(bits + signBits + escSeq)
    return ''.join(huffSec)
 # ------------------ DECODE ------------------
 def decode_huff(huff_sec, huff_LUT):
    """
    Decode a Huffman-encoded stream.
    Parameters
    ----------
    huff_sec : array-like of int or str
        Huffman encoded stream as a sequence of 0 and 1 (string or list/array).
    huff_LUT : dict
        Huffman lookup table with keys:
            - 'invTable': inverse table (numpy array)
            - 'codebook': codebook number
            - 'nTupleSize': tuple size
            - 'maxAbsCodeVal': maximum absolute code value
            - 'signedValues': True/False
    Returns
    -------
    decCoeffs : list of int
        Decoded quantized coefficients.
    """
    h = huff_LUT['invTable']
    huffCodebook = huff_LUT['codebook']
    nTupleSize = huff_LUT['nTupleSize']
    maxAbsCodeVal = huff_LUT['maxAbsCodeVal']
    signedValues = huff_LUT['signedValues']
    # Convert string to array of ints
    if isinstance(huff_sec, str):
        huff_sec = np.array([int(b) for b in huff_sec])
    eos = False
    decCoeffs = []
    streamIndex = 0
    while not eos:
        wordbit = 0
        r = 0  # start at root
        # Decode Huffman word using inverse table
        while True:
            b = huff_sec[streamIndex + wordbit]
            wordbit += 1
            rOld = r
            r = h[rOld, b]
            if h[r, 0] == 0 and h[r, 1] == 0:
                symbolIndex = h[r, 2] - 1  # zero-based
                streamIndex += wordbit
                break
        # Decode n-tuple magnitudes
        if signedValues:
            base = 2 * maxAbsCodeVal + 1
            nTupleDec = []
            tmp = symbolIndex
            for p in reversed(range(nTupleSize)):
                val = tmp // (base ** p)
                nTupleDec.append(val - maxAbsCodeVal)
                tmp = tmp % (base ** p)
            nTupleDec = np.array(nTupleDec)
        else:
            base = maxAbsCodeVal + 1
            nTupleDec = []
            tmp = symbolIndex
            for p in reversed(range(nTupleSize)):
                val = tmp // (base ** p)
                nTupleDec.append(val)
                tmp = tmp % (base ** p)
            nTupleDec = np.array(nTupleDec)
            # Apply sign bits
            nTupleSignBits = huff_sec[streamIndex:streamIndex + nTupleSize]
            nTupleSign = -(np.sign(nTupleSignBits - 0.5))
            streamIndex += nTupleSize
            nTupleDec = nTupleDec * nTupleSign
        # Handle escape sequences
        escIndex = np.where(np.abs(nTupleDec) == 16)[0]
        if huffCodebook == 11 and escIndex.size > 0:
            for idx in escIndex:
                N = 0
                b = huff_sec[streamIndex]
                while b:
                    N += 1
                    b = huff_sec[streamIndex + N]
                # Skip the N leading '1' bits AND the terminating '0' delimiter.
                # The encoder writes: '1'*N + '0' + <N4 bits>
                streamIndex += N +1
                N4 = N + 4
                escape_word = huff_sec[streamIndex:streamIndex + N4]
                escape_value = 2 ** N4 + int("".join(map(str, escape_word)), 2)
                nTupleDec[idx] = escape_value
                # We already consumed the delimiter above; now consume only N4 bits.
                streamIndex += N4
            # Apply signs again
            nTupleDec[escIndex] *= nTupleSign[escIndex]
        decCoeffs.extend(nTupleDec.tolist())
        if streamIndex >= len(huff_sec):
            eos = True
    return decCoeffs
--- a/source/level_3/core/aac_coder.py
+++ b/source/level_3/core/aac_coder.py
@ -111,21 +111,6 @@ def _thresholds_from_smr(
    return T
 def _normalize_global_gain(G: GlobalGain) -> float | FloatArray:
    """
    Normalize GlobalGain to match AACChannelFrameF3["G"] type:
    - long: return float
    - ESH: return float64 ndarray of shape (1, 8)
    """
    if np.isscalar(G):
        return float(G)
    G_arr = np.asarray(G)
    if G_arr.size == 1:
        return float(G_arr.reshape(-1)[0])
    return np.asarray(G_arr, dtype=np.float64)
 # -----------------------------------------------------------------------------
 # Public helpers (useful for level_x demo wrappers)
 # -----------------------------------------------------------------------------
@ -476,10 +461,6 @@ def aac_coder_3(
        S_L, sfc_L, G_L = aac_quantizer(chl_f_tns, frame_type, SMR_L)
        S_R, sfc_R, G_R = aac_quantizer(chr_f_tns, frame_type, SMR_R)
        # Normalize G types for AACSeq3 schema (float | float64 ndarray).
        G_Ln = _normalize_global_gain(G_L)
        G_Rn = _normalize_global_gain(G_R)
        # Huffman-code ONLY the DPCM differences for b>0.
        # sfc[0] corresponds to alpha(0)=G and is stored separately in the frame.
        sfc_L_dpcm = np.asarray(sfc_L, dtype=np.int64)[1:, ...]
@ -503,7 +484,7 @@ def aac_coder_3(
            "chl": {
                "tns_coeffs": np.asarray(chl_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_L, dtype=np.float64),
-                "G": G_Ln,
+                "G": G_L,
                "sfc": sfc_L_stream,
                "stream": mdct_L_stream,
                "codebook": int(cb_L),
@ -511,7 +492,7 @@ def aac_coder_3(
            "chr": {
                "tns_coeffs": np.asarray(chr_tns_coeffs, dtype=np.float64),
                "T": np.asarray(T_R, dtype=np.float64),
-                "G": G_Rn,
+                "G": G_R,
                "sfc": sfc_R_stream,
                "stream": mdct_R_stream,
                "codebook": int(cb_R),
--- a/source/level_3/core/aac_decoder.py
+++ b/source/level_3/core/aac_decoder.py
@ -42,7 +42,7 @@ def _nbands(frame_type: FrameType) -> int:
 # Public helpers
 # -----------------------------------------------------------------------------
-def aac_unpack_seq_channels_to_frame_f(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
+def aac_unpack_seq_channels(frame_type: FrameType, chl_f: FrameChannelF, chr_f: FrameChannelF) -> FrameF:
    """
    Re-pack per-channel spectra from the Level-1 AACSeq1 schema into the stereo
    FrameF container expected by aac_i_filter_bank().
@ -167,7 +167,7 @@ def aac_decoder_1(
        chl_f = np.asarray(fr["chl"]["frame_F"], dtype=np.float64)
        chr_f = np.asarray(fr["chr"]["frame_F"], dtype=np.float64)
-        frame_f: FrameF = aac_unpack_seq_channels_to_frame_f(frame_type, chl_f, chr_f)
+        frame_f: FrameF = aac_unpack_seq_channels(frame_type, chl_f, chr_f)
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)  # (2048, 2)
        start = i * hop
@ -427,7 +427,7 @@ def aac_decoder_3(
        X_R = aac_i_tns(Xq_R, frame_type, tns_R)
        # Re-pack to stereo container and inverse filterbank
-        frame_f = aac_unpack_seq_channels_to_frame_f(frame_type, np.asarray(X_L), np.asarray(X_R))
+        frame_f = aac_unpack_seq_channels(frame_type, np.asarray(X_L), np.asarray(X_R))
        frame_t_hat: FrameT = aac_i_filter_bank(frame_f, frame_type, win_type)
        start = i * hop
--- a/source/level_3/core/aac_psycho.py
+++ b/source/level_3/core/aac_psycho.py
@ -189,7 +189,7 @@ def _predictability(
 # Band-domain aggregation
 # -----------------------------------------------------------------------------
-def _band_energy_and_weighted_predictability(
+def _band_energy_and_pred(
    r: FloatArray,
    c: FloatArray,
    wlow: BandIndexArray,
@ -238,7 +238,7 @@ def _band_energy_and_weighted_predictability(
    return e_b, c_num_b
-def _psycho_one_window(
+def _psycho_window(
    time_x: FrameChannelT,
    prev1_x: FrameChannelT,
    prev2_x: FrameChannelT,
@ -288,7 +288,7 @@ def _psycho_one_window(
    c_w = _predictability(r, phi, r_m1, phi_m1, r_m2, phi_m2)
    # Aggregate into psycho bands
-    e_b, c_num_b = _band_energy_and_weighted_predictability(r, c_w, wlow, whigh)
+    e_b, c_num_b = _band_energy_and_pred(r, c_w, wlow, whigh)
    # Spread energies and predictability across bands:
    # ecb(b) = sum_bb e(bb)      * S(bb, b)
@ -333,7 +333,7 @@ def _psycho_one_window(
 # ESH window slicing (match filterbank conventions)
 # -----------------------------------------------------------------------------
-def _esh_subframes_256(x_2048: FrameChannelT) -> list[FrameChannelT]:
+def _esh_subframes(x_2048: FrameChannelT) -> list[FrameChannelT]:
    """
    Extract the 8 overlapping 256-sample short windows used by AAC ESH.
@ -408,7 +408,7 @@ def aac_psycho(
    # Long frame types: compute one SMR vector (69 bands)
    if frame_type != "ESH":
-        return _psycho_one_window(frame_T, frame_T_prev_1, frame_T_prev_2, N=N, table=table)
+        return _psycho_window(frame_T, frame_T_prev_1, frame_T_prev_2, N=N, table=table)
    # ESH: compute 8 SMR vectors (42 bands each), one per short subframe.
    #
@ -419,8 +419,8 @@ def aac_psycho(
    #
    # This matches the "within-frame history" convention commonly used in
    # simplified psycho models for ESH.
-    cur_subs = _esh_subframes_256(frame_T)
+    cur_subs = _esh_subframes(frame_T)
-    prev1_subs = _esh_subframes_256(frame_T_prev_1)
+    prev1_subs = _esh_subframes(frame_T_prev_1)
    B = int(table.shape[0])  # expected 42
    smr_out = np.zeros((B, 8), dtype=np.float64)
@ -436,6 +436,6 @@ def aac_psycho(
            x_m1 = cur_subs[j - 1]
            x_m2 = cur_subs[j - 2]
-        smr_out[:, j] = _psycho_one_window(cur_subs[j], x_m1, x_m2, N=256, table=table)
+        smr_out[:, j] = _psycho_window(cur_subs[j], x_m1, x_m2, N=256, table=table)
    return smr_out
--- a/source/level_3/core/aac_quantizer.py
+++ b/source/level_3/core/aac_quantizer.py
@ -35,7 +35,7 @@ MAX_SF_DELTA:int  = 60
 # -----------------------------------------------------------------------------
 # Helpers: ESH packing/unpacking (128x8 <-> 1024x1)
 # -----------------------------------------------------------------------------
-def _esh_pack_to_1024(x_128x8: FloatArray) -> FloatArray:
+def _esh_pack(x_128x8: FloatArray) -> FloatArray:
    """
    Pack ESH coefficients (128 x 8) into a single long vector (1024 x 1).
@ -58,7 +58,7 @@ def _esh_pack_to_1024(x_128x8: FloatArray) -> FloatArray:
    return x_128x8.reshape(1024, 1, order="F")
-def _esh_unpack_from_1024(x_1024x1: FloatArray) -> FloatArray:
+def _esh_unpack(x_1024x1: FloatArray) -> FloatArray:
    """
    Unpack a packed ESH vector (1024 elements) back to shape (128, 8).
@ -226,7 +226,7 @@ def _band_energy(x: FloatArray, lo: int, hi: int) -> float:
    return float(np.sum(sec * sec))
-def _threshold_T_from_SMR(
+def _psychoacoustic_threshold(
    X: FloatArray,
    SMR_col: FloatArray,
    bands: list[tuple[int, int]],
@ -425,7 +425,7 @@ def aac_quantizer(
            SMRj = SMR[:, j].reshape(NB)
            # Compute psychoacoustic threshold T(b) for this subframe
-            T = _threshold_T_from_SMR(Xj, SMRj, bands)
+            T = _psychoacoustic_threshold(Xj, SMRj, bands)
            # Frame-wise initial estimate alpha_hat (Equation 14)
            alpha_hat = _initial_alpha_hat(Xj)
@ -482,7 +482,7 @@ def aac_quantizer(
        raise ValueError(f"For non-ESH, SMR must have shape ({NB},) or ({NB}, 1).")
    # Compute psychoacoustic threshold T(b) for the long frame
-    T = _threshold_T_from_SMR(Xv, SMRv, bands)
+    T = _psychoacoustic_threshold(Xv, SMRv, bands)
    # Frame-wise initial estimate alpha_hat (Equation 14)
    alpha_hat = _initial_alpha_hat(Xv)
@ -566,7 +566,7 @@ def aac_i_quantizer(
        if sfc.shape != (NB, 8):
            raise ValueError(f"For ESH, sfc must have shape ({NB}, 8).")
-        S_128x8 = _esh_unpack_from_1024(S_flat)
+        S_128x8 = _esh_unpack(S_flat)
        Xrec = np.zeros((128, 8), dtype=np.float64)
--- a/source/level_3/core/aac_tns.py
+++ b/source/level_3/core/aac_tns.py
@ -39,7 +39,7 @@ from core.aac_types import *
 # -----------------------------------------------------------------------------
-def _band_ranges_for_kcount(k_count: int) -> BandRanges:
+def _band_ranges(k_count: int) -> BandRanges:
    """
    Return Bark band index ranges [start, end] (inclusive) for the given MDCT line count.
@ -66,7 +66,7 @@ def _band_ranges_for_kcount(k_count: int) -> BandRanges:
    start = tbl[:, 1].astype(int)
    end = tbl[:, 2].astype(int)
-    ranges: list[tuple[int, int]] = [(int(s), int(e)) for s, e in zip(start, end)]
+    ranges: BandRanges = [(int(s), int(e)) for s, e in zip(start, end)]
    for s, e in ranges:
        if s < 0 or e < s or e >= k_count:
@ -117,7 +117,7 @@ def _compute_sw(x: MdctCoeffs) -> MdctCoeffs:
    x = np.asarray(x, dtype=np.float64).reshape(-1)
    k_count = int(x.shape[0])
-    bands = _band_ranges_for_kcount(k_count)
+    bands = _band_ranges(k_count)
    sw = np.zeros(k_count, dtype=np.float64)
    for s, e in bands:
@ -347,7 +347,7 @@ def _apply_itns_iir(y: MdctCoeffs, a_q: MdctCoeffs) -> MdctCoeffs:
    return x_hat
-def _tns_one_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
+def _tns_vector(x: MdctCoeffs) -> tuple[MdctCoeffs, MdctCoeffs]:
    """
    TNS for a single MDCT vector (one long frame or one short subframe).
@ -430,7 +430,7 @@ def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChan
        a_out = np.empty((PRED_ORDER, 8), dtype=np.float64)
        for j in range(8):
-            y[:, j], a_out[:, j] = _tns_one_vector(x[:, j])
+            y[:, j], a_out[:, j] = _tns_vector(x[:, j])
        return y, a_out
@ -443,7 +443,7 @@ def aac_tns(frame_F_in: FrameChannelF, frame_type: FrameType) -> Tuple[FrameChan
    else:
        raise ValueError('For non-ESH, frame_F_in must have shape (1024,) or (1024, 1).')
-    y_vec, a_q = _tns_one_vector(x_vec)
+    y_vec, a_q = _tns_vector(x_vec)
    if out_shape == (1024,):
        y_out = y_vec
--- a/source/level_3/core/aac_utils.py
+++ b/source/level_3/core/aac_utils.py
@ -163,6 +163,42 @@ def snr_db(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    return float(10.0 * np.log10(ps / pn))
 def estimate_lag_mono(x_ref: TimeSignal, x_hat: TimeSignal, max_lag=4096):
    """
    Estimate time lag between two mono signals.
    Returns lag (positive means x_hat delayed).
    """
    n = min(len(x_ref), len(x_hat))
    x_ref = x_ref[:n]
    x_hat = x_hat[:n]
    corr = np.correlate(x_ref, x_hat, mode='full')
    lags = np.arange(-n + 1, n)
    center = n - 1
    lo = max(0, center - max_lag)
    hi = min(len(corr), center + max_lag + 1)
    best = lo + int(np.argmax(corr[lo:hi]))
    return int(lags[best])
 def match_gain(x_ref: StereoSignal, x_hat: StereoSignal) -> float:
    """
    Least-squares gain g that best maps x_hat -> x_ref.
    """
    n = min(x_ref.shape[0], x_hat.shape[0])
    c = min(x_ref.shape[1], x_hat.shape[1])
    r = x_ref[:n, :c].reshape(-1).astype(np.float64)
    h = x_hat[:n, :c].reshape(-1).astype(np.float64)
    denom = float(np.dot(h, h))
    if denom <= 0.0:
        return 1.0
    return float(np.dot(r, h) / denom)
 # -----------------------------------------------------------------------------
 # Psychoacoustic band tables (TableB219.mat)
 # -----------------------------------------------------------------------------
--- a/source/level_3/level_3.py
+++ b/source/level_3/level_3.py
@ -27,13 +27,17 @@ from typing import Optional, Tuple, Union
 import os
 import soundfile as sf
 import numpy as np
 import matplotlib.pyplot as plt
 from core.aac_types   import AACSeq3, StereoSignal
 from core.aac_coder   import aac_coder_3 as core_aac_coder_3
 from core.aac_coder   import aac_read_wav_stereo_48k
 from core.aac_decoder import aac_decoder_3 as core_aac_decoder_3
-from core.aac_utils   import snr_db
+from core.aac_utils   import snr_db, estimate_lag_mono, match_gain
 # Global variable to "pass" AACSeq3 without changing the demo_aac_e interface.
 AAC_Seq_3: AACSeq3
 # -----------------------------------------------------------------------------
 # Helpers (Level 3 metrics)
@ -90,6 +94,83 @@ def _bitrate_after_from_aacseq(aac_seq_3: AACSeq3, duration_sec: float) -> float
    return float(total_bits) / float(duration_sec)
 def _plot_frame_bitrate_and_compression(
    aac_seq_3: AACSeq3,
    wav_path: Union[str, Path],
    fname_bitrate: Union[str, Path],
    fname_comp: Union[str, Path],
 ) -> None:
    """
    Compute and plot per-frame bitrate and compression ratio
    for a Level 3 AAC sequence.
    Parameters
    ----------
    aac_seq_3 : list
        Output of aac_coder_3 (list of frame dictionaries).
    wav_path : str or Path
        Path to original WAV file (PCM 48 kHz stereo).
    fname_bitrate : str or Path
        Path to original bitrate per frame plot output file.
    fname_comp : str or Path
        Path to original compression per frame plot output file.
    """
    # Read WAV metadata
    info = sf.info(str(wav_path))
    samplerate = info.samplerate
    total_samples = info.frames
    total_duration = total_samples / samplerate
    n_frames = len(aac_seq_3)
    # AAC long-frame hop size is 1024 new samples per frame
    samples_per_frame = 1024
    duration_per_frame = samples_per_frame / samplerate
    # Original bitrate (file-based estimate)
    original_bits = os.path.getsize(wav_path) * 8.0
    original_bitrate = original_bits / total_duration
    frame_bitrates = []
    frame_compression = []
    for fr in aac_seq_3:
        bits = 0
        bits += len(fr["chl"]["sfc"])
        bits += len(fr["chl"]["stream"])
        bits += len(fr["chr"]["sfc"])
        bits += len(fr["chr"]["stream"])
        bitrate = bits / duration_per_frame
        compression = original_bitrate / bitrate if bitrate > 0 else np.inf
        frame_bitrates.append(bitrate)
        frame_compression.append(compression)
    frame_indices = np.arange(n_frames)
    # Plot bitrate per frame and save to file
    plt.figure(figsize=(6, 3), dpi=300)
    plt.plot(frame_indices, frame_bitrates)
    plt.xlabel("Frame index")
    plt.ylabel("Bitrate (bits/s)")
    plt.title("Bitrate (per-frame)")
    plt.tight_layout()
    plt.savefig(str(fname_bitrate))
    plt.close()
    # Plot compression ratio per frame and save to file
    plt.figure(figsize=(6, 3), dpi=300)
    plt.plot(frame_indices, frame_compression)
    plt.xlabel("Frame index")
    plt.ylabel("Compression Ratio")
    plt.title("Compression Ratio (per-frame)")
    plt.tight_layout()
    plt.savefig(str(fname_comp))
    plt.close()
 # -----------------------------------------------------------------------------
 # Public Level 3 API (wrappers)
 # -----------------------------------------------------------------------------
@ -187,20 +268,21 @@ def demo_aac_3(
        raise ValueError("Input sampling rate must be 48 kHz.")
    # Encode / decode
-    aac_seq_3 = aac_coder_3(filename_in, filename_aac_coded)
+    global AAC_Seq_3 # pick coder output
-    x_hat = i_aac_coder_3(aac_seq_3, filename_out)
+    AAC_Seq_3 = aac_coder_3(filename_in, filename_aac_coded)
    x_hat = i_aac_coder_3(AAC_Seq_3, filename_out)
    # Optional sanity: ensure output file exists and is readable
    _, fs_hat = sf.read(str(filename_out), always_2d=True)
    if int(fs_hat) != 48000:
        raise ValueError("Decoded output sampling rate must be 48 kHz.")
-    # Metrics
+    # Quality metrics
    s = snr_db(x_ref, x_hat)
    duration = _wav_duration_seconds(filename_in)
    bitrate_before = _bitrate_before_from_file(filename_in)
-    bitrate_after = _bitrate_after_from_aacseq(aac_seq_3, duration)
+    bitrate_after = _bitrate_after_from_aacseq(AAC_Seq_3, duration)
    compression = float("inf") if bitrate_after <= 0.0 else (bitrate_before / bitrate_after)
    return float(s), float(bitrate_after), float(compression)
@ -218,20 +300,28 @@ if __name__ == "__main__":
    #   python -m level_3 material/LicorDeCalandraca.wav LicorDeCalandraca_out_l3.wav
    #   or
    #   python -m level_3 material/LicorDeCalandraca.wav LicorDeCalandraca_out_l3.wav aac_seq_3.mat
    #   or
    #   python -m level_3 material/LicorDeCalandraca.wav LicorDeCalandraca_out_l3.wav aac_seq_3.mat bitrate.png compression.png
    import sys
-    if len(sys.argv) not in (3, 4):
+    if len(sys.argv) not in (3, 4, 5, 6):
-        raise SystemExit("Usage: python -m level_3 <input.wav> <output.wav> [aac_seq_3.mat]")
+        raise SystemExit(
-
+            "Usage: python -m level_3 <input.wav> <output.wav> [aac_seq_3.mat] [bitrate_fname] [compression_fname]"
        )
    in_wav = Path(sys.argv[1])
    out_wav = Path(sys.argv[2])
    aac_mat = Path(sys.argv[3]) if len(sys.argv) == 4 else None
    fname_bitrate = Path(sys.argv[4]) if len(sys.argv) == 5 else "bitrate_per_frame.png"
    fname_comp = Path(sys.argv[5]) if len(sys.argv) == 6 else "compression_per_frame.png"
    print(f"Encoding/Decoding {in_wav} to {out_wav}")
    if aac_mat is not None:
        print(f"Storing coded sequence to {aac_mat}")
    snr, bitrate, compression = demo_aac_3(in_wav, out_wav, aac_mat)
    # plot compresion / bitrate
    _plot_frame_bitrate_and_compression(AAC_Seq_3, in_wav, fname_bitrate, fname_comp)
    print(f"SNR = {snr:.3f} dB")
    print(f"Bitrate (coded) = {bitrate:.2f} bits/s")
    print(f"Compression ratio = {compression:.4f}")
--- a/source/requirements.txt
+++ b/source/requirements.txt
@ -3,3 +3,4 @@ scipy
 scipy-stubs
 soundfile
 pytest
 matplotlib