Skip to content

Commit 54bddec

Browse files
committed
CDEF parallelization wip/mvp
1 parent 7bb810f commit 54bddec

File tree

1 file changed

+30
-10
lines changed

1 file changed

+30
-10
lines changed

src/cdef.rs

+30-10
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@ use crate::encoder::FrameInvariants;
1313
use crate::frame::*;
1414
use crate::tiling::*;
1515
use crate::util::{clamp, msb, CastFromPrimitive, Pixel};
16-
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
16+
use rayon::iter::ParallelIterator;
17+
use rayon::prelude::*;
1718
use rust_hawktracer::*;
1819

1920
use crate::cpu_features::CpuFeatureLevel;
20-
use std::cmp;
21+
use std::{
22+
cmp,
23+
ops::DerefMut,
24+
sync::{Arc, Mutex},
25+
};
2126

2227
cfg_if::cfg_if! {
2328
if #[cfg(nasm_x86_64)] {
@@ -600,31 +605,46 @@ pub fn cdef_filter_tile<T: Pixel>(
600605
let fb_width = (output.planes[0].rect().width + 63) / 64;
601606
let fb_height = (output.planes[0].rect().height + 63) / 64;
602607

603-
// should parallelize this
604-
let mut queue: Vec<(usize, usize, TileMut<'_, T>)> = Vec::new();
608+
let mut queue: Vec<(usize, usize, Arc<Mutex<&mut TileMut<'_, T>>>)> =
609+
Vec::new();
610+
let shared_output = Arc::new(Mutex::new(output));
605611

606612
for fby in 0..fb_height {
607613
for fbx in 0..fb_width {
608-
queue.push((fbx, fby, output));
614+
queue.push((fbx, fby, shared_output.clone()));
609615
}
610616
}
611617

612-
queue.par_iter().for_each(|tpl| filter_tile(tpl, fi, input, tb, output));
618+
queue.into_par_iter().for_each(|tpl| filter_tile(tpl, fi, input, tb));
613619
}
614620

615621
#[hawktracer(filter_tile)]
616622
pub fn filter_tile<T: Pixel>(
617-
tpl: &(usize, usize, &mut TileMut<'_, T>), fi: &FrameInvariants<T>,
618-
input: &Frame<T>, tb: &TileBlocks, output: &mut TileMut<'_, T>,
623+
tpl: (usize, usize, Arc<Mutex<&mut TileMut<'_, T>>>),
624+
fi: &FrameInvariants<T>, input: &Frame<T>, tb: &TileBlocks,
619625
) {
620626
// tile_sbo is treated as an offset into the Tiles' plane
621627
// regions, not as an absolute offset in the visible frame. The
622628
// Tile's own offset is added to this in order to address into
623629
// the input Frame.
624-
let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: tpl.0, y: tpl.1 });
630+
let (fbx, fby, shared_output) = tpl;
631+
let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: fbx, y: fby });
625632
let cdef_index = tb.get_cdef(tile_sbo);
626633
let cdef_dirs = cdef_analyze_superblock(fi, input, tb, tile_sbo);
634+
loop {
635+
if shared_output.try_lock().is_ok() {
636+
break;
637+
}
638+
}
639+
let mut output = shared_output.lock().unwrap();
640+
627641
cdef_filter_superblock(
628-
fi, input, output, tb, tile_sbo, cdef_index, &cdef_dirs,
642+
fi,
643+
input,
644+
output.deref_mut(),
645+
tb,
646+
tile_sbo,
647+
cdef_index,
648+
&cdef_dirs,
629649
);
630650
}

0 commit comments

Comments
 (0)