Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: xiph/rav1e
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: p20231017
Choose a base ref
...
head repository: xiph/rav1e
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: p20231024
Choose a head ref
  • 14 commits
  • 18 files changed
  • 2 contributors

Commits on Oct 18, 2023

  1. Copy the full SHA
    1c87320 View commit details

Commits on Oct 19, 2023

  1. Copy the full SHA
    4f73201 View commit details
  2. Copy the full SHA
    26b753d View commit details
  3. Copy the full SHA
    47a9d1b View commit details

Commits on Oct 20, 2023

  1. Don't use out params

    kornelski authored and barrbrain committed Oct 20, 2023
    Copy the full SHA
    8c7ca21 View commit details
  2. Reduce use of uninitialized arrays

    kornelski authored and barrbrain committed Oct 20, 2023
    Copy the full SHA
    de56196 View commit details
  3. Eliminate panic path from diff()

    kornelski authored and barrbrain committed Oct 20, 2023
    Copy the full SHA
    94b1179 View commit details
  4. Copy the full SHA
    ff2df3d View commit details
  5. Copy the full SHA
    620d541 View commit details

Commits on Oct 23, 2023

  1. Copy the full SHA
    bfa98f5 View commit details
  2. Copy the full SHA
    163aed9 View commit details
  3. Copy the full SHA
    d7c9a18 View commit details
  4. Copy the full SHA
    021183f View commit details
  5. Copy the full SHA
    7f01f3c View commit details
16 changes: 8 additions & 8 deletions benches/mc.rs
Original file line number Diff line number Diff line change
@@ -263,7 +263,7 @@ fn bench_prep_8tap_top_left_lbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u8>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -294,7 +294,7 @@ fn bench_prep_8tap_top_lbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u8>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -325,7 +325,7 @@ fn bench_prep_8tap_left_lbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u8>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -356,7 +356,7 @@ fn bench_prep_8tap_center_lbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u8>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -387,7 +387,7 @@ fn bench_prep_8tap_top_left_hbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u16>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -418,7 +418,7 @@ fn bench_prep_8tap_top_hbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u16>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -449,7 +449,7 @@ fn bench_prep_8tap_left_hbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u16>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
@@ -480,7 +480,7 @@ fn bench_prep_8tap_center_hbd(c: &mut Criterion) {
let w = 640;
let h = 480;
let input_plane = new_plane::<u16>(&mut ra, w, h);
let mut dst = unsafe { Aligned::<[i16; 128 * 128]>::uninitialized() };
let mut dst = Aligned::<[i16; 128 * 128]>::from_fn(|_| 0);

let (row_frac, col_frac, src) = get_params(
&input_plane,
3 changes: 2 additions & 1 deletion benches/transform.rs
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@ use rav1e::bench::transform;
use rav1e::bench::transform::{
forward_transform, get_valid_txfm_types, TxSize,
};
use std::mem::MaybeUninit;

fn init_buffers(size: usize) -> (Vec<i32>, Vec<i32>) {
let mut ra = ChaChaRng::from_seed([0; 32]);
@@ -96,7 +97,7 @@ pub fn bench_forward_transforms(c: &mut Criterion) {

let input: Vec<i16> =
(0..area).map(|_| rng.gen_range(-255..256)).collect();
let mut output = vec![0i16; area];
let mut output = vec![MaybeUninit::new(0i16); area];

for &tx_type in get_valid_txfm_types(tx_size) {
group.bench_function(
44 changes: 43 additions & 1 deletion src/asm/aarch64/transform/inverse.rs
Original file line number Diff line number Diff line change
@@ -20,6 +20,22 @@ pub fn inverse_transform_add<T: Pixel>(
input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>, eob: usize,
tx_size: TxSize, tx_type: TxType, bd: usize, cpu: CpuFeatureLevel,
) {
if tx_type == TxType::WHT_WHT {
debug_assert!(tx_size == TxSize::TX_4X4);
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_WHT_FN[cpu.as_index()] {
return call_inverse_func(func, input, output, eob, 4, 4, bd);
}
}
PixelType::U16 if bd == 10 => {
if let Some(func) = INV_TXFM_WHT_HBD_FN[cpu.as_index()] {
return call_inverse_hbd_func(func, input, output, eob, 4, 4, bd);
}
}
PixelType::U16 => {}
}
}
match T::type_enum() {
PixelType::U8 => {
if let Some(func) = INV_TXFM_FNS[cpu.as_index()]
@@ -57,6 +73,32 @@ pub fn inverse_transform_add<T: Pixel>(
rust::inverse_transform_add(input, output, eob, tx_size, tx_type, bd, cpu);
}

extern {
fn rav1e_inv_txfm_add_wht_wht_4x4_8bpc_neon(
dst: *mut u8, dst_stride: libc::ptrdiff_t, coeff: *mut i16, eob: i32,
);
fn rav1e_inv_txfm_add_wht_wht_4x4_16bpc_neon(
dst: *mut u16, dst_stride: libc::ptrdiff_t, coeff: *mut i16, eob: i32,
bitdepth_max: i32,
);
}
const INV_TXFM_WHT_FN_NEON: Option<InvTxfmFunc> =
Some(rav1e_inv_txfm_add_wht_wht_4x4_8bpc_neon as _);
const INV_TXFM_WHT_HBD_FN_NEON: Option<InvTxfmHBDFunc> =
Some(rav1e_inv_txfm_add_wht_wht_4x4_16bpc_neon as _);

cpu_function_lookup_table!(
INV_TXFM_WHT_FN: [Option<InvTxfmFunc>],
default: None,
[NEON]
);

cpu_function_lookup_table!(
INV_TXFM_WHT_HBD_FN: [Option<InvTxfmHBDFunc>],
default: None,
[NEON]
);

macro_rules! decl_itx_fns {
// Takes a 2d list of tx types for W and H
([$([$(($ENUM:expr, $TYPE1:ident, $TYPE2:ident)),*]),*], $W:expr, $H:expr,
@@ -100,7 +142,7 @@ macro_rules! decl_itx_hbd_fns {
// Note: type1 and type2 are flipped
fn [<rav1e_inv_txfm_add_ $TYPE2 _$TYPE1 _$W x $H _16bpc_$OPT_LOWER>](
dst: *mut u16, dst_stride: libc::ptrdiff_t, coeff: *mut i16,
eob: i32,
eob: i32, bitdepth_max: i32,
);
}
)*
16 changes: 4 additions & 12 deletions src/asm/shared/predict.rs
Original file line number Diff line number Diff line change
@@ -39,18 +39,10 @@ mod test {

fn pred_matches_inner<T: Pixel>(cpu: CpuFeatureLevel, bit_depth: usize) {
let tx_size = TxSize::TX_4X4;
// SAFETY: We write to the array below before reading from it.
let mut ac: Aligned<[i16; 32 * 32]> = unsafe { Aligned::uninitialized() };
for i in 0..ac.data.len() {
ac.data[i] = i as i16 - 16 * 32;
}
// SAFETY: We write to the array below before reading from it.
let mut edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> =
unsafe { Aligned::uninitialized() };
for i in 0..edge_buf.data.len() {
edge_buf.data[i] =
T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE));
}
let ac: Aligned<[i16; 32 * 32]> = Aligned::from_fn(|i| i as i16 - 16 * 32);
let edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> = Aligned::from_fn(|i| {
T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE))
});

let ief_params_all = [
None,
46 changes: 31 additions & 15 deletions src/asm/shared/transform/inverse.rs
Original file line number Diff line number Diff line change
@@ -9,13 +9,14 @@

use crate::tiling::PlaneRegionMut;
use crate::util::*;
use std::mem::MaybeUninit;

// Note: Input coeffs are mutable since the assembly uses them as a scratchpad
pub type InvTxfmFunc =
unsafe extern fn(*mut u8, libc::ptrdiff_t, *mut i16, i32);

pub type InvTxfmHBDFunc =
unsafe extern fn(*mut u16, libc::ptrdiff_t, *mut i16, i32);
unsafe extern fn(*mut u16, libc::ptrdiff_t, *mut i16, i32, i32);

pub fn call_inverse_func<T: Pixel>(
func: InvTxfmFunc, input: &[T::Coeff], output: &mut PlaneRegionMut<'_, T>,
@@ -27,13 +28,13 @@ pub fn call_inverse_func<T: Pixel>(
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];

// SAFETY: We write to the array below before reading from it.
let mut copied: Aligned<[T::Coeff; 32 * 32]> =
let mut copied: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
unsafe { Aligned::uninitialized() };

// Convert input to 16-bits.
// TODO: Remove by changing inverse assembly to not overwrite its input
for (a, b) in copied.data.iter_mut().zip(input) {
*a = *b;
a.write(*b);
}

// perform the inverse transform
@@ -51,19 +52,19 @@ pub fn call_inverse_func<T: Pixel>(
pub fn call_inverse_hbd_func<T: Pixel>(
func: InvTxfmHBDFunc, input: &[T::Coeff],
output: &mut PlaneRegionMut<'_, T>, eob: usize, width: usize, height: usize,
_bd: usize,
bd: usize,
) {
// Only use at most 32 columns and 32 rows of input coefficients.
let input: &[T::Coeff] = &input[..width.min(32) * height.min(32)];

// SAFETY: We write to the array below before reading from it.
let mut copied: Aligned<[T::Coeff; 32 * 32]> =
let mut copied: Aligned<[MaybeUninit<T::Coeff>; 32 * 32]> =
unsafe { Aligned::uninitialized() };

// Convert input to 16-bits.
// TODO: Remove by changing inverse assembly to not overwrite its input
for (a, b) in copied.data.iter_mut().zip(input) {
*a = *b;
a.write(*b);
}

// perform the inverse transform
@@ -74,6 +75,7 @@ pub fn call_inverse_hbd_func<T: Pixel>(
T::to_asm_stride(output.plane_cfg.stride),
copied.data.as_mut_ptr() as *mut _,
eob as i32 - 1,
(1 << bd) - 1,
);
}
}
@@ -88,6 +90,7 @@ pub mod test {
use crate::transform::TxSize::*;
use crate::transform::*;
use rand::{random, thread_rng, Rng};
use std::mem::MaybeUninit;

pub fn pick_eob<T: Coefficient>(
coeffs: &mut [T], tx_size: TxSize, tx_type: TxType, sub_h: usize,
@@ -105,7 +108,8 @@ pub mod test {
let mut eob = 0;
let mut exit = 0;

let scan = av1_scan_orders[tx_size as usize][tx_type as usize].scan;
// Wrap WHT_WHT (16) to DCT_DCT (0) scan table
let scan = av1_scan_orders[tx_size as usize][(tx_type as usize) & 15].scan;

for (i, &pos) in scan.iter().enumerate() {
exit = i;
@@ -145,22 +149,26 @@ pub mod test {
for sub_h in 0..sub_h_iterations {
let mut src_storage = [T::zero(); 64 * 64];
let src = &mut src_storage[..tx_size.area()];
let mut dst = Plane::from_slice(&[T::zero(); 64 * 64], 64);
// SAFETY: We write to the array below before reading from it.
let mut res_storage: Aligned<[i16; 64 * 64]> =
let mut dst = Plane::from_slice(
&[T::zero(); 64 * 64][..tx_size.area()],
tx_size.width(),
);
let mut res_storage: Aligned<[MaybeUninit<i16>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let res = &mut res_storage.data[..tx_size.area()];
// SAFETY: We write to the array below before reading from it.
let mut freq_storage: Aligned<[T::Coeff; 64 * 64]> =
let mut freq_storage: Aligned<[MaybeUninit<T::Coeff>; 64 * 64]> =
unsafe { Aligned::uninitialized() };
let freq = &mut freq_storage.data[..tx_size.area()];
for ((r, s), d) in
res.iter_mut().zip(src.iter_mut()).zip(dst.data.iter_mut())
{
*s = T::cast_from(random::<u16>() >> (16 - bit_depth));
*d = T::cast_from(random::<u16>() >> (16 - bit_depth));
*r = i16::cast_from(*s) - i16::cast_from(*d);
r.write(i16::cast_from(*s) - i16::cast_from(*d));
}
// SAFETY: The loop just initialized res, and all three slices have the same length
let res = unsafe { slice_assume_init_mut(res) };
forward_transform(
res,
freq,
@@ -170,6 +178,8 @@ pub mod test {
bit_depth,
CpuFeatureLevel::RUST,
);
// SAFETY: forward_transform initialized freq
let freq = unsafe { slice_assume_init_mut(freq) };

let eob: usize = pick_eob(freq, tx_size, tx_type, sub_h);
let mut rust_dst = dst.clone();
@@ -223,13 +233,16 @@ pub mod test {
};

($TYPES64:tt, $DIMS64:tt, $TYPES32:tt, $DIMS32:tt, $TYPES16:tt, $DIMS16:tt,
$TYPES84:tt, $DIMS84:tt) => {
$TYPES84:tt, $DIMS84:tt, $TYPES4:tt, $DIMS4:tt) => {
test_itx_fns!([$TYPES64], $DIMS64);
test_itx_fns!([$TYPES64, $TYPES32], $DIMS32);
test_itx_fns!([$TYPES64, $TYPES32, $TYPES16], $DIMS16);
test_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84], $DIMS84
);
test_itx_fns!(
[$TYPES64, $TYPES32, $TYPES16, $TYPES84, $TYPES4], $DIMS4
);
};
}

@@ -254,13 +267,16 @@ pub mod test {
(TxType::FLIPADST_FLIPADST, flipadst, flipadst)
],
[(16, 16)],
// 8x, 4x and 16x (minus 16x16)
// 8x, 4x and 16x (minus 16x16 and 4x4)
[
(TxType::V_ADST, adst, identity),
(TxType::H_ADST, identity, adst),
(TxType::V_FLIPADST, flipadst, identity),
(TxType::H_FLIPADST, identity, flipadst)
],
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8), (4, 4)]
[(16, 8), (8, 16), (16, 4), (4, 16), (8, 8), (8, 4), (4, 8)],
// 4x4
[(TxType::WHT_WHT, wht, wht)],
[(4, 4)]
);
}
20 changes: 12 additions & 8 deletions src/asm/x86/quantize.rs
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@ use crate::cpu_features::CpuFeatureLevel;
use crate::quantize::*;
use crate::transform::TxSize;
use crate::util::*;
use std::mem::MaybeUninit;

type DequantizeFn = unsafe fn(
qindex: u8,
@@ -37,21 +38,21 @@ cpu_function_lookup_table!(

#[inline(always)]
pub fn dequantize<T: Coefficient>(
qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [T], tx_size: TxSize,
bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8, cpu: CpuFeatureLevel,
qindex: u8, coeffs: &[T], eob: usize, rcoeffs: &mut [MaybeUninit<T>],
tx_size: TxSize, bit_depth: usize, dc_delta_q: i8, ac_delta_q: i8,
cpu: CpuFeatureLevel,
) {
let call_rust = |rcoeffs: &mut [T]| {
let call_rust = |rcoeffs: &mut [MaybeUninit<T>]| {
crate::quantize::rust::dequantize(
qindex, coeffs, eob, rcoeffs, tx_size, bit_depth, dc_delta_q,
ac_delta_q, cpu,
);
};

#[cfg(any(feature = "check_asm", test))]
let ref_rcoeffs = {
let mut ref_rcoeffs = {
let area = av1_get_coded_tx_size(tx_size).area();
let mut copy = vec![T::cast_from(0); area];
copy[..].copy_from_slice(&rcoeffs[..area]);
let mut copy = vec![MaybeUninit::new(T::cast_from(0)); area];
call_rust(&mut copy);
copy
};
@@ -82,7 +83,9 @@ pub fn dequantize<T: Coefficient>(
#[cfg(any(feature = "check_asm", test))]
{
let area = av1_get_coded_tx_size(tx_size).area();
assert_eq!(&rcoeffs[..area], &ref_rcoeffs[..]);
let rcoeffs = unsafe { assume_slice_init_mut(&mut rcoeffs[..area]) };
let ref_rcoeffs = unsafe { assume_slice_init_mut(&mut ref_rcoeffs[..]) };
assert_eq!(rcoeffs, ref_rcoeffs);
}
}

@@ -157,6 +160,7 @@ mod test {
use super::*;
use rand::distributions::{Distribution, Uniform};
use rand::{thread_rng, Rng};
use std::mem::MaybeUninit;

#[test]
fn dequantize_test() {
@@ -190,7 +194,7 @@ mod test {

for &eob in &eobs {
let mut qcoeffs = Aligned::new([0i16; 32 * 32]);
let mut rcoeffs = Aligned::new([0i16; 32 * 32]);
let mut rcoeffs = Aligned::new([MaybeUninit::new(0i16); 32 * 32]);

// Generate quantized coefficients up to the eob
let between = Uniform::from(-i16::MAX..=i16::MAX);
Loading