1
1
use crate :: api:: FrameQueue ;
2
2
use crate :: cpu_features:: CpuFeatureLevel ;
3
+ use crate :: util:: { Aligned , AlignedBoxedSlice } ;
3
4
use crate :: EncoderStatus ;
4
5
use arrayvec:: ArrayVec ;
5
6
use cfg_if:: cfg_if;
@@ -296,13 +297,13 @@ where
296
297
) -> & mut ( R2cFftHandler < f32 > , FftHandler < f32 > , FftHandler < f32 > ) ;
297
298
298
299
fn do_filtering ( & mut self , src : & [ [ Plane < T > ; 3 ] ] , dest : & mut Frame < T > ) {
299
- let mut dftr = [ 0f32 ; BLOCK_VOLUME ] ;
300
- let mut dftc = [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ;
301
- let mut means = [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ;
300
+ let mut dftr = Aligned :: new ( [ 0f32 ; BLOCK_VOLUME ] ) ;
301
+ let mut dftc = Aligned :: new ( [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ) ;
302
+ let mut means = Aligned :: new ( [ Complex :: < f32 > :: default ( ) ; COMPLEX_COUNT ] ) ;
302
303
303
304
for p in 0 ..3 {
304
305
let ( pad_width, pad_height) = self . pad_dimensions ( p) ;
305
- let mut ebuff = vec ! [ 0f32 ; pad_width * pad_height] ;
306
+ let mut ebuff = AlignedBoxedSlice :: new ( pad_width * pad_height, 0f32 ) ;
306
307
let effective_height = self . effective_height ( p) ;
307
308
let src_stride = src[ 0 ] [ p] . cfg . stride ;
308
309
let ebuff_stride = pad_width;
@@ -324,23 +325,23 @@ where
324
325
self . proc0 (
325
326
src_planes[ z] . get_unchecked ( x..) ,
326
327
self . hw ( ) . get_unchecked ( ( BLOCK_AREA * z) ..) ,
327
- dftr. get_unchecked_mut ( ( BLOCK_AREA * z) ..) ,
328
+ dftr. data . get_unchecked_mut ( ( BLOCK_AREA * z) ..) ,
328
329
src_stride,
329
330
SB_SIZE ,
330
331
self . src_scale ( ) ,
331
332
) ;
332
333
}
333
334
334
- self . real_to_complex_3d ( & dftr, & mut dftc) ;
335
- self . remove_mean ( & mut dftc, self . dftgc ( ) , & mut means) ;
335
+ self . real_to_complex_3d ( & dftr. data , & mut dftc. data ) ;
336
+ self . remove_mean ( & mut dftc. data , self . dftgc ( ) , & mut means. data ) ;
336
337
337
- self . filter_coeffs ( & mut dftc) ;
338
+ self . filter_coeffs ( & mut dftc. data ) ;
338
339
339
- self . add_mean ( & mut dftc, & means) ;
340
- self . complex_to_real_3d ( & dftc, & mut dftr) ;
340
+ self . add_mean ( & mut dftc. data , & means. data ) ;
341
+ self . complex_to_real_3d ( & dftc. data , & mut dftr. data ) ;
341
342
342
343
self . proc1 (
343
- dftr. get_unchecked ( ( TB_MIDPOINT * BLOCK_AREA ) ..) ,
344
+ dftr. data . get_unchecked ( ( TB_MIDPOINT * BLOCK_AREA ) ..) ,
344
345
self . hw ( ) . get_unchecked ( ( TB_MIDPOINT * BLOCK_AREA ) ..) ,
345
346
ebuff. get_unchecked_mut ( ( y * ebuff_stride + x) ..) ,
346
347
SB_SIZE ,
@@ -405,7 +406,7 @@ where
405
406
let s0 = s0. add ( u * p0 + v) ;
406
407
let s1 = s1. add ( u * p0 + v) ;
407
408
let dest = dest. add ( u * p1 + v) ;
408
- dest. write ( dest . read ( ) + s0 . read ( ) * s1 . read ( ) ) ;
409
+ dest. write ( s0 . read ( ) . mul_add ( s1 . read ( ) , dest . read ( ) ) ) ;
409
410
}
410
411
}
411
412
}
@@ -693,10 +694,10 @@ where
693
694
pad_dimensions : ArrayVec < ( usize , usize ) , 3 > ,
694
695
effective_heights : ArrayVec < usize , 3 > ,
695
696
696
- hw : [ f32 ; BLOCK_VOLUME ] ,
697
- dftgc : [ Complex < f32 > ; COMPLEX_COUNT ] ,
697
+ hw : Aligned < [ f32 ; BLOCK_VOLUME ] > ,
698
+ dftgc : Aligned < [ Complex < f32 > ; COMPLEX_COUNT ] > ,
698
699
fft : ( R2cFftHandler < f32 > , FftHandler < f32 > , FftHandler < f32 > ) ,
699
- sigmas : [ f32 ; CCNT2 ] ,
700
+ sigmas : Aligned < [ f32 ; CCNT2 ] > ,
700
701
}
701
702
702
703
impl < T > DftDenoiserRust < T >
@@ -708,8 +709,8 @@ where
708
709
pad_dimensions : ArrayVec < ( usize , usize ) , 3 > ,
709
710
effective_heights : ArrayVec < usize , 3 > ,
710
711
) -> Self {
711
- let hw = create_window ( ) ;
712
- let mut dftgr = [ 0f32 ; BLOCK_VOLUME ] ;
712
+ let hw = Aligned :: new ( create_window ( ) ) ;
713
+ let mut dftgr = Aligned :: new ( [ 0f32 ; BLOCK_VOLUME ] ) ;
713
714
714
715
let fft = (
715
716
R2cFftHandler :: new ( SB_SIZE ) ,
@@ -719,15 +720,15 @@ where
719
720
720
721
let mut wscale = 0.0f32 ;
721
722
for k in 0 ..BLOCK_VOLUME {
722
- dftgr[ k] = 255.0 * hw[ k] ;
723
- wscale += hw[ k] . powi ( 2 ) ;
723
+ dftgr. data [ k] = 255.0 * hw. data [ k] ;
724
+ wscale += hw. data [ k] . powi ( 2 ) ;
724
725
}
725
726
let wscale = 1.0 / wscale;
726
727
727
- let mut sigmas = [ 0f32 ; CCNT2 ] ;
728
- sigmas. fill ( sigma / wscale) ;
728
+ let mut sigmas = Aligned :: new ( [ 0f32 ; CCNT2 ] ) ;
729
+ sigmas. data . fill ( sigma / wscale) ;
729
730
730
- let mut denoiser = DftDenoiserRust {
731
+ let mut denoiser = Self {
731
732
dest_scale,
732
733
src_scale,
733
734
peak,
@@ -736,11 +737,11 @@ where
736
737
hw,
737
738
fft,
738
739
sigmas,
739
- dftgc : [ Complex :: default ( ) ; COMPLEX_COUNT ] ,
740
+ dftgc : Aligned :: new ( [ Complex :: default ( ) ; COMPLEX_COUNT ] ) ,
740
741
} ;
741
742
742
- let mut dftgc = [ Complex :: default ( ) ; COMPLEX_COUNT ] ;
743
- denoiser. real_to_complex_3d ( & dftgr, & mut dftgc) ;
743
+ let mut dftgc = Aligned :: new ( [ Complex :: default ( ) ; COMPLEX_COUNT ] ) ;
744
+ denoiser. real_to_complex_3d ( & dftgr. data , & mut dftgc. data ) ;
744
745
denoiser. dftgc = dftgc;
745
746
746
747
denoiser
@@ -778,17 +779,17 @@ where
778
779
779
780
#[ inline( always) ]
780
781
fn hw ( & self ) -> & [ f32 ; BLOCK_VOLUME ] {
781
- & self . hw
782
+ & self . hw . data
782
783
}
783
784
784
785
#[ inline( always) ]
785
786
fn dftgc ( & self ) -> & [ Complex < f32 > ; COMPLEX_COUNT ] {
786
- & self . dftgc
787
+ & self . dftgc . data
787
788
}
788
789
789
790
#[ inline( always) ]
790
791
fn sigmas ( & self ) -> & [ f32 ; CCNT2 ] {
791
- & self . sigmas
792
+ & self . sigmas . data
792
793
}
793
794
794
795
#[ inline( always) ]
0 commit comments