Skip to content

Commit fc03f98

Browse files
maj160shssoichiro
authored andcommitted
Use region dimensions in SAD and ME
This avoids having to re-check bounds every time we perform SAD, as the region knows its own size. It also may save 2 usize's being passed around during ME. To enforce this, we also remove the w and h parameters from everywhere. This is part of a series of commits authored by @maj160 to improve performance of rav1e.
1 parent 0e2c74c commit fc03f98

File tree

6 files changed

+118
-119
lines changed

6 files changed

+118
-119
lines changed

src/api/internal.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,8 @@ impl<T: Pixel> ContextInner<T> {
902902
bsize: BlockSize, len: usize,
903903
reference_frame_block_importances: &mut [f32],
904904
) {
905+
debug_assert!(bsize.width() == IMPORTANCE_BLOCK_SIZE);
906+
debug_assert!(bsize.height() == IMPORTANCE_BLOCK_SIZE);
905907
let coded_data = fi.coded_frame_data.as_ref().unwrap();
906908
let plane_org = &frame.planes[0];
907909
let plane_ref = &reference_frame.planes[0];
@@ -945,8 +947,6 @@ impl<T: Pixel> ContextInner<T> {
945947
let inter_cost = get_satd(
946948
&region_org,
947949
&region_ref,
948-
bsize.width(),
949-
bsize.height(),
950950
bit_depth,
951951
fi.cpu_feature_level,
952952
) as f32;

src/api/lookahead.rs

+3-14
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ pub(crate) fn estimate_intra_costs<T: Pixel>(
107107
let intra_cost = get_satd(
108108
&plane_org,
109109
&plane_after_prediction_region,
110-
bsize.width(),
111-
bsize.height(),
112110
bit_depth,
113111
cpu_feature_level,
114112
);
@@ -223,10 +221,6 @@ pub(crate) fn estimate_inter_costs<T: Pixel>(
223221
let h_in_imp_b = plane_org.cfg.height / IMPORTANCE_BLOCK_SIZE;
224222
let w_in_imp_b = plane_org.cfg.width / IMPORTANCE_BLOCK_SIZE;
225223
let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0];
226-
let bsize = BlockSize::from_width_and_height(
227-
IMPORTANCE_BLOCK_SIZE,
228-
IMPORTANCE_BLOCK_SIZE,
229-
);
230224

231225
let mut inter_costs = 0;
232226
(0..h_in_imp_b).for_each(|y| {
@@ -252,14 +246,9 @@ pub(crate) fn estimate_inter_costs<T: Pixel>(
252246
height: IMPORTANCE_BLOCK_SIZE,
253247
});
254248

255-
inter_costs += get_satd(
256-
&region_org,
257-
&region_ref,
258-
bsize.width(),
259-
bsize.height(),
260-
bit_depth,
261-
fi.cpu_feature_level,
262-
) as u64;
249+
inter_costs +=
250+
get_satd(&region_org, &region_ref, bit_depth, fi.cpu_feature_level)
251+
as u64;
263252
});
264253
});
265254
inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64

src/asm/x86/dist/mod.rs

+14-11
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,13 @@ pub(crate) const fn to_index(bsize: BlockSize) -> usize {
166166
#[inline(always)]
167167
#[allow(clippy::let_and_return)]
168168
pub fn get_sad<T: Pixel>(
169-
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize,
170-
bit_depth: usize, cpu: CpuFeatureLevel,
169+
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize,
170+
cpu: CpuFeatureLevel,
171171
) -> u32 {
172-
let bsize_opt = BlockSize::from_width_and_height_opt(w, h);
172+
let bsize_opt =
173+
BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height);
173174

174-
let call_rust = || -> u32 { rust::get_sad(dst, src, w, h, bit_depth, cpu) };
175+
let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) };
175176

176177
#[cfg(feature = "check_asm")]
177178
let ref_dist = call_rust();
@@ -220,12 +221,14 @@ pub fn get_sad<T: Pixel>(
220221
#[inline(always)]
221222
#[allow(clippy::let_and_return)]
222223
pub fn get_satd<T: Pixel>(
223-
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize,
224-
bit_depth: usize, cpu: CpuFeatureLevel,
224+
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize,
225+
cpu: CpuFeatureLevel,
225226
) -> u32 {
227+
let w = src.rect().width;
228+
let h = src.rect().height;
226229
let bsize_opt = BlockSize::from_width_and_height_opt(w, h);
227230

228-
let call_rust = || -> u32 { rust::get_satd(dst, src, w, h, bit_depth, cpu) };
231+
let call_rust = || -> u32 { rust::get_satd(dst, src, bit_depth, cpu) };
229232

230233
#[cfg(feature = "check_asm")]
231234
let ref_dist = call_rust();
@@ -565,8 +568,8 @@ mod test {
565568
*s = random::<u8>() as u16 * $BD / 8;
566569
*d = random::<u8>() as u16 * $BD / 8;
567570
}
568-
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
569-
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST);
571+
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
572+
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST);
570573

571574
assert_eq!(rust_result, result);
572575
} else {
@@ -578,8 +581,8 @@ mod test {
578581
*s = random::<u8>();
579582
*d = random::<u8>();
580583
}
581-
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
582-
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST);
584+
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
585+
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST);
583586

584587
assert_eq!(rust_result, result);
585588
}

src/dist.rs

+12-19
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,12 @@ pub(crate) mod rust {
3131
/// Compute the sum of absolute differences over a block.
3232
/// w and h can be at most 128, the size of the largest block.
3333
pub fn get_sad<T: Pixel>(
34-
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize,
35-
h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel,
34+
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
35+
_bit_depth: usize, _cpu: CpuFeatureLevel,
3636
) -> u32 {
37-
debug_assert!(w <= 128 && h <= 128);
38-
let plane_org =
39-
plane_org.subregion(Area::Rect { x: 0, y: 0, width: w, height: h });
40-
let plane_ref =
41-
plane_ref.subregion(Area::Rect { x: 0, y: 0, width: w, height: h });
37+
debug_assert!(
38+
plane_org.rect().width <= 128 && plane_org.rect().height <= 128
39+
);
4240

4341
plane_org
4442
.rows_iter()
@@ -156,11 +154,12 @@ pub(crate) mod rust {
156154
/// revert to sad on edges when these transforms do not fit into w and h.
157155
/// 4x4 transforms instead of 8x8 transforms when width or height < 8.
158156
pub fn get_satd<T: Pixel>(
159-
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize,
160-
h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel,
157+
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
158+
_bit_depth: usize, _cpu: CpuFeatureLevel,
161159
) -> u32 {
160+
let w = plane_org.rect().width;
161+
let h = plane_org.rect().height;
162162
assert!(w <= 128 && h <= 128);
163-
assert!(plane_org.rect().width >= w && plane_org.rect().height >= h);
164163
assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h);
165164

166165
// Size of hadamard transform should be 4x4 or 8x8
@@ -186,9 +185,7 @@ pub(crate) mod rust {
186185

187186
// Revert to sad on edge blocks (frame edges)
188187
if chunk_w != size || chunk_h != size {
189-
sum += get_sad(
190-
&chunk_org, &chunk_ref, chunk_w, chunk_h, _bit_depth, _cpu,
191-
) as u64;
188+
sum += get_sad(&chunk_org, &chunk_ref, _bit_depth, _cpu) as u64;
192189
continue;
193190
}
194191

@@ -443,7 +440,7 @@ pub mod test {
443440
let (input_plane, rec_plane) = setup_planes::<T>();
444441

445442
for (w, h, distortion) in blocks {
446-
let area = Area::StartingAt { x: 32, y: 40 };
443+
let area = Area::Rect { x: 32, y: 40, width: w, height: h };
447444

448445
let input_region = input_plane.region(area);
449446
let rec_region = rec_plane.region(area);
@@ -453,8 +450,6 @@ pub mod test {
453450
get_sad(
454451
&input_region,
455452
&rec_region,
456-
w,
457-
h,
458453
bit_depth,
459454
CpuFeatureLevel::default()
460455
)
@@ -502,7 +497,7 @@ pub mod test {
502497
let (input_plane, rec_plane) = setup_planes::<T>();
503498

504499
for (w, h, distortion) in blocks {
505-
let area = Area::StartingAt { x: 32, y: 40 };
500+
let area = Area::Rect { x: 32, y: 40, width: w, height: h };
506501

507502
let input_region = input_plane.region(area);
508503
let rec_region = rec_plane.region(area);
@@ -512,8 +507,6 @@ pub mod test {
512507
get_satd(
513508
&input_region,
514509
&rec_region,
515-
w,
516-
h,
517510
bit_depth,
518511
CpuFeatureLevel::default()
519512
)

0 commit comments

Comments
 (0)