@@ -13,11 +13,16 @@ use crate::encoder::FrameInvariants;
13
13
use crate :: frame:: * ;
14
14
use crate :: tiling:: * ;
15
15
use crate :: util:: { clamp, msb, CastFromPrimitive , Pixel } ;
16
- use rayon:: iter:: { IntoParallelRefIterator , ParallelIterator } ;
16
+ use rayon:: iter:: ParallelIterator ;
17
+ use rayon:: prelude:: * ;
17
18
use rust_hawktracer:: * ;
18
19
19
20
use crate :: cpu_features:: CpuFeatureLevel ;
20
- use std:: cmp;
21
+ use std:: {
22
+ cmp,
23
+ ops:: DerefMut ,
24
+ sync:: { Arc , Mutex } ,
25
+ } ;
21
26
22
27
cfg_if:: cfg_if! {
23
28
if #[ cfg( nasm_x86_64) ] {
@@ -600,31 +605,46 @@ pub fn cdef_filter_tile<T: Pixel>(
600
605
let fb_width = ( output. planes [ 0 ] . rect ( ) . width + 63 ) / 64 ;
601
606
let fb_height = ( output. planes [ 0 ] . rect ( ) . height + 63 ) / 64 ;
602
607
603
- // should parallelize this
604
- let mut queue: Vec < ( usize , usize , TileMut < ' _ , T > ) > = Vec :: new ( ) ;
608
+ let mut queue: Vec < ( usize , usize , Arc < Mutex < & mut TileMut < ' _ , T > > > ) > =
609
+ Vec :: new ( ) ;
610
+ let shared_output = Arc :: new ( Mutex :: new ( output) ) ;
605
611
606
612
for fby in 0 ..fb_height {
607
613
for fbx in 0 ..fb_width {
608
- queue. push ( ( fbx, fby, output ) ) ;
614
+ queue. push ( ( fbx, fby, shared_output . clone ( ) ) ) ;
609
615
}
610
616
}
611
617
612
- queue. par_iter ( ) . for_each ( |tpl| filter_tile ( tpl, fi, input, tb, output ) ) ;
618
+ queue. into_par_iter ( ) . for_each ( |tpl| filter_tile ( tpl, fi, input, tb) ) ;
613
619
}
614
620
615
621
#[ hawktracer( filter_tile) ]
616
622
pub fn filter_tile < T : Pixel > (
617
- tpl : & ( usize , usize , & mut TileMut < ' _ , T > ) , fi : & FrameInvariants < T > ,
618
- input : & Frame < T > , tb : & TileBlocks , output : & mut TileMut < ' _ , T > ,
623
+ tpl : ( usize , usize , Arc < Mutex < & mut TileMut < ' _ , T > > > ) ,
624
+ fi : & FrameInvariants < T > , input : & Frame < T > , tb : & TileBlocks ,
619
625
) {
620
626
// tile_sbo is treated as an offset into the Tiles' plane
621
627
// regions, not as an absolute offset in the visible frame. The
622
628
// Tile's own offset is added to this in order to address into
623
629
// the input Frame.
624
- let tile_sbo = TileSuperBlockOffset ( SuperBlockOffset { x : tpl. 0 , y : tpl. 1 } ) ;
630
+ let ( fbx, fby, shared_output) = tpl;
631
+ let tile_sbo = TileSuperBlockOffset ( SuperBlockOffset { x : fbx, y : fby } ) ;
625
632
let cdef_index = tb. get_cdef ( tile_sbo) ;
626
633
let cdef_dirs = cdef_analyze_superblock ( fi, input, tb, tile_sbo) ;
634
+ loop {
635
+ if shared_output. try_lock ( ) . is_ok ( ) {
636
+ break ;
637
+ }
638
+ }
639
+ let mut output = shared_output. lock ( ) . unwrap ( ) ;
640
+
627
641
cdef_filter_superblock (
628
- fi, input, output, tb, tile_sbo, cdef_index, & cdef_dirs,
642
+ fi,
643
+ input,
644
+ output. deref_mut ( ) ,
645
+ tb,
646
+ tile_sbo,
647
+ cdef_index,
648
+ & cdef_dirs,
629
649
) ;
630
650
}
0 commit comments