Skip to content

Commit db7ff31

Browse files
committed
Integrate CfL AC x86 assembly functions
1 parent 0cd5a3b commit db7ff31

File tree

3 files changed

+95
-2
lines changed

3 files changed

+95
-2
lines changed

src/asm/aarch64/predict.rs

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ use crate::util::Aligned;
1818
use crate::{Pixel, PixelType};
1919
use libc;
2020

21+
pub(crate) use crate::predict::rust::pred_cfl_ac;
22+
2123
macro_rules! decl_angular_ipred_fn {
2224
($($f:ident),+) => {
2325
extern {

src/asm/x86/predict.rs

+93-1
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99

1010
use crate::context::MAX_TX_SIZE;
1111
use crate::cpu_features::CpuFeatureLevel;
12+
use crate::partition::BlockSize;
1213
use crate::predict::{
1314
rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
1415
};
15-
use crate::tiling::PlaneRegionMut;
16+
use crate::tiling::{PlaneRegion, PlaneRegionMut};
1617
use crate::transform::TxSize;
1718
use crate::util::Aligned;
1819
use crate::Pixel;
@@ -103,6 +104,49 @@ extern {
103104
);
104105
}
105106

107+
macro_rules! decl_cfl_ac_fn {
108+
($($f:ident),+) => {
109+
extern {
110+
$(
111+
fn $f(
112+
ac: *mut i16, src: *const u8, stride: libc::ptrdiff_t,
113+
w_pad: libc::c_int, h_pad: libc::c_int,
114+
width: libc::c_int, height: libc::c_int,
115+
);
116+
)*
117+
}
118+
};
119+
}
120+
121+
decl_cfl_ac_fn! {
122+
rav1e_ipred_cfl_ac_420_8bpc_avx2,
123+
rav1e_ipred_cfl_ac_420_8bpc_ssse3,
124+
rav1e_ipred_cfl_ac_422_8bpc_avx2,
125+
rav1e_ipred_cfl_ac_422_8bpc_ssse3,
126+
rav1e_ipred_cfl_ac_444_8bpc_avx2,
127+
rav1e_ipred_cfl_ac_444_8bpc_ssse3
128+
}
129+
130+
macro_rules! decl_cfl_ac_hbd_fn {
131+
($($f:ident),+) => {
132+
extern {
133+
$(
134+
fn $f(
135+
ac: *mut i16, src: *const u16, stride: libc::ptrdiff_t,
136+
w_pad: libc::c_int, h_pad: libc::c_int,
137+
width: libc::c_int, height: libc::c_int,
138+
);
139+
)*
140+
}
141+
};
142+
}
143+
144+
decl_cfl_ac_hbd_fn! {
145+
rav1e_ipred_cfl_ac_420_16bpc_avx2,
146+
rav1e_ipred_cfl_ac_422_16bpc_avx2,
147+
rav1e_ipred_cfl_ac_444_16bpc_avx2
148+
}
149+
106150
macro_rules! decl_cfl_pred_fn {
107151
($($f:ident),+) => {
108152
extern {
@@ -427,3 +471,51 @@ pub fn dispatch_predict_intra<T: Pixel>(
427471
}
428472
}
429473
}
474+
475+
#[inline(always)]
476+
pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>(
477+
ac: &mut [i16], luma: &PlaneRegion<'_, T>, bsize: BlockSize, w_pad: usize,
478+
h_pad: usize, cpu: CpuFeatureLevel,
479+
) {
480+
let call_rust = |ac: &mut [i16]| {
481+
rust::pred_cfl_ac::<T, XDEC, YDEC>(ac, luma, bsize, w_pad, h_pad, cpu);
482+
};
483+
484+
let stride = T::to_asm_stride(luma.plane_cfg.stride) as libc::ptrdiff_t;
485+
let w = bsize.width() as libc::c_int;
486+
let h = bsize.height() as libc::c_int;
487+
let w_pad = w_pad as libc::c_int;
488+
let h_pad = h_pad as libc::c_int;
489+
490+
// SAFETY: Calls Assembly code.
491+
unsafe {
492+
let ac_ptr = ac.as_mut_ptr();
493+
match T::type_enum() {
494+
PixelType::U8 if cpu >= CpuFeatureLevel::SSSE3 => {
495+
let luma_ptr = luma.data_ptr() as *const u8;
496+
(if cpu >= CpuFeatureLevel::AVX2 {
497+
match (XDEC, YDEC) {
498+
(0, 0) => rav1e_ipred_cfl_ac_444_8bpc_avx2,
499+
(1, 0) => rav1e_ipred_cfl_ac_422_8bpc_avx2,
500+
_ => rav1e_ipred_cfl_ac_420_8bpc_avx2,
501+
}
502+
} else {
503+
match (XDEC, YDEC) {
504+
(0, 0) => rav1e_ipred_cfl_ac_444_8bpc_ssse3,
505+
(1, 0) => rav1e_ipred_cfl_ac_422_8bpc_ssse3,
506+
_ => rav1e_ipred_cfl_ac_420_8bpc_ssse3,
507+
}
508+
})(ac_ptr, luma_ptr, stride, w_pad, h_pad, w, h)
509+
}
510+
PixelType::U16 if cpu >= CpuFeatureLevel::AVX2 => {
511+
let luma_ptr = luma.data_ptr() as *const u16;
512+
(match (XDEC, YDEC) {
513+
(0, 0) => rav1e_ipred_cfl_ac_444_16bpc_avx2,
514+
(1, 0) => rav1e_ipred_cfl_ac_422_16bpc_avx2,
515+
_ => rav1e_ipred_cfl_ac_420_16bpc_avx2,
516+
})(ac_ptr, luma_ptr, stride, w_pad, h_pad, w, h)
517+
}
518+
_ => call_rust(ac),
519+
}
520+
}
521+
}

src/predict.rs

-1
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,6 @@ pub fn luma_ac<T: Pixel>(
680680
let h_pad = (bsize.height() - max_luma_h) >> (2 + ydec);
681681
let cpu = fi.cpu_feature_level;
682682

683-
use crate::predict::rust::pred_cfl_ac;
684683
match (xdec, ydec) {
685684
(0, 0) => pred_cfl_ac::<T, 0, 0>(ac, luma, plane_bsize, w_pad, h_pad, cpu),
686685
(1, 0) => pred_cfl_ac::<T, 1, 0>(ac, luma, plane_bsize, w_pad, h_pad, cpu),

0 commit comments

Comments
 (0)