@@ -72,7 +72,7 @@ impl ProcessorCreator for Phi4MMProcessor {
7272 audio_feat_stride : pre_processor_config
7373 . audio_feat_stride
7474 . expect ( "audio_feat_stride" ) ,
75- eightk_method : "fillzero" . to_string ( ) , // Default to fillzero like Python
75+ eightk_method : "fillzero" . to_string ( ) , // Default to fillzero
7676 } ) ,
7777 } )
7878 }
@@ -424,7 +424,7 @@ impl Phi4MMInputsProcessor {
424424 // Apply mel filterbank
425425 let mel_features = self . apply_mel_filterbank ( & spectrogram, sample_rate) ?;
426426
427- // Take log - match Python : clip to minimum 1.0 then log
427+ // Take log: clip to minimum 1.0 then log
428428 let log_features: Vec < Vec < f32 > > = mel_features
429429 . iter ( )
430430 . map ( |frame| frame. iter ( ) . map ( |& x| ( x. max ( 1.0 ) ) . ln ( ) ) . collect ( ) )
@@ -459,7 +459,7 @@ impl Phi4MMInputsProcessor {
459459 // Create Hamming window
460460 let window = self . create_hamming_window ( win_length) ;
461461
462- // Extract frames - match Python logic exactly
462+ // Extract frames
463463 let n_batch = ( wav. len ( ) - win_length) / hop_length + 1 ;
464464 let mut frames = Vec :: new ( ) ;
465465 for i in 0 ..n_batch {
@@ -470,7 +470,7 @@ impl Phi4MMInputsProcessor {
470470 }
471471 }
472472
473- // Apply preemphasis - FIXED to match Python
473+ // Apply preemphasis
474474 let preemphasis = 0.97 ;
475475 self . apply_preemphasis_frames ( & mut frames, preemphasis) ;
476476
@@ -479,7 +479,7 @@ impl Phi4MMInputsProcessor {
479479 let fft = planner. plan_fft_forward ( n_fft) ;
480480
481481 let mut spectrogram = Vec :: new ( ) ;
482- for ( frame_idx , frame) in frames. iter ( ) . enumerate ( ) {
482+ for ( _frame_idx , frame) in frames. iter ( ) . enumerate ( ) {
483483 // Apply window and convert to complex
484484 let mut windowed: Vec < Complex32 > = frame
485485 . iter ( )
@@ -499,7 +499,7 @@ impl Phi4MMInputsProcessor {
499499 . map ( |c| c. norm ( ) )
500500 . collect ( ) ;
501501
502- // Handle 8kHz case - FIXED to match Python padding logic
502+ // Handle 8kHz case
503503 if fs == 8000 && self . eightk_method == "fillzero" {
504504 // Remove nyquist bin and pad with zeros to match 16kHz structure
505505 magnitude. pop ( ) ; // Remove nyquist
@@ -513,7 +513,6 @@ impl Phi4MMInputsProcessor {
513513 Ok ( spectrogram)
514514 }
515515
516- // NEW: Fixed preemphasis to match Python frame-level processing
517516 fn apply_preemphasis_frames ( & self , frames : & mut [ Vec < f32 > ] , preemphasis : f32 ) {
518517 if frames. is_empty ( ) {
519518 return ;
@@ -599,7 +598,6 @@ impl Phi4MMInputsProcessor {
599598 . collect ( )
600599 }
601600
602- // FIXED: Apply mel filterbank with proper frequency range matching Python
603601 fn apply_mel_filterbank (
604602 & self ,
605603 spectrogram : & [ Vec < f32 > ] ,
@@ -635,7 +633,6 @@ impl Phi4MMInputsProcessor {
635633 Ok ( mel_features)
636634 }
637635
638- // FIXED: Mel filterbank creation to match Python SpeechLib logic
639636 fn create_mel_filterbank (
640637 & self ,
641638 n_mels : usize ,
@@ -646,15 +643,14 @@ impl Phi4MMInputsProcessor {
646643 let fmax = sample_rate / 2.0 ;
647644 let fmin = 0.0 ;
648645
649- // Mel scale conversion functions (matching Python)
646+ // Mel scale conversion functions
650647 let hz_to_mel = |f : f32 | 1127.0 * ( 1.0 + f / 700.0 ) . ln ( ) ;
651- let mel_to_hz = |mel : f32 | 700.0 * ( mel / 1127.0 ) . exp ( ) - 700.0 ;
648+ let _mel_to_hz = |mel : f32 | 700.0 * ( mel / 1127.0 ) . exp ( ) - 700.0 ;
652649 let bin_to_mel = |fft_bin : usize | {
653650 1127.0 * ( 1.0 + ( fft_bin as f32 * sample_rate) / ( n_fft as f32 * 700.0 ) ) . ln ( )
654651 } ;
655652 let f_to_bin = |f : f32 | ( ( f * n_fft as f32 / sample_rate) + 0.5 ) as usize ;
656653
657- // Match Python frequency range logic
658654 let klo = f_to_bin ( fmin) + 1 ; // Skip DC component
659655 let khi = f_to_bin ( fmax) . max ( klo) ;
660656
@@ -676,7 +672,7 @@ impl Phi4MMInputsProcessor {
676672 let center = mel_centers[ m + 1 ] ;
677673 let right = mel_centers[ m + 2 ] ;
678674
679- // Match Python frequency range: process from klo to khi
675+ // Process from klo to khi
680676 for fft_bin in klo..khi. min ( bank_width) {
681677 let mbin = bin_to_mel ( fft_bin) ;
682678 if left < mbin && mbin < right {
0 commit comments