|
9 | 9 |
|
10 | 10 | use crate::context::MAX_TX_SIZE;
|
11 | 11 | use crate::cpu_features::CpuFeatureLevel;
|
| 12 | +use crate::partition::BlockSize; |
12 | 13 | use crate::predict::{
|
13 | 14 | rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
|
14 | 15 | };
|
15 |
| -use crate::tiling::PlaneRegionMut; |
| 16 | +use crate::tiling::{PlaneRegion, PlaneRegionMut}; |
16 | 17 | use crate::transform::TxSize;
|
17 | 18 | use crate::util::Aligned;
|
18 | 19 | use crate::Pixel;
|
@@ -103,6 +104,49 @@ extern {
|
103 | 104 | );
|
104 | 105 | }
|
105 | 106 |
|
| 107 | +macro_rules! decl_cfl_ac_fn { |
| 108 | + ($($f:ident),+) => { |
| 109 | + extern { |
| 110 | + $( |
| 111 | + fn $f( |
| 112 | + ac: *mut i16, src: *const u8, stride: libc::ptrdiff_t, |
| 113 | + w_pad: libc::c_int, h_pad: libc::c_int, |
| 114 | + width: libc::c_int, height: libc::c_int, |
| 115 | + ); |
| 116 | + )* |
| 117 | + } |
| 118 | + }; |
| 119 | +} |
| 120 | + |
| 121 | +decl_cfl_ac_fn! { |
| 122 | + rav1e_ipred_cfl_ac_420_8bpc_avx2, |
| 123 | + rav1e_ipred_cfl_ac_420_8bpc_ssse3, |
| 124 | + rav1e_ipred_cfl_ac_422_8bpc_avx2, |
| 125 | + rav1e_ipred_cfl_ac_422_8bpc_ssse3, |
| 126 | + rav1e_ipred_cfl_ac_444_8bpc_avx2, |
| 127 | + rav1e_ipred_cfl_ac_444_8bpc_ssse3 |
| 128 | +} |
| 129 | + |
| 130 | +macro_rules! decl_cfl_ac_hbd_fn { |
| 131 | + ($($f:ident),+) => { |
| 132 | + extern { |
| 133 | + $( |
| 134 | + fn $f( |
| 135 | + ac: *mut i16, src: *const u16, stride: libc::ptrdiff_t, |
| 136 | + w_pad: libc::c_int, h_pad: libc::c_int, |
| 137 | + width: libc::c_int, height: libc::c_int, |
| 138 | + ); |
| 139 | + )* |
| 140 | + } |
| 141 | + }; |
| 142 | +} |
| 143 | + |
| 144 | +decl_cfl_ac_hbd_fn! { |
| 145 | + rav1e_ipred_cfl_ac_420_16bpc_avx2, |
| 146 | + rav1e_ipred_cfl_ac_422_16bpc_avx2, |
| 147 | + rav1e_ipred_cfl_ac_444_16bpc_avx2 |
| 148 | +} |
| 149 | + |
106 | 150 | macro_rules! decl_cfl_pred_fn {
|
107 | 151 | ($($f:ident),+) => {
|
108 | 152 | extern {
|
@@ -427,3 +471,51 @@ pub fn dispatch_predict_intra<T: Pixel>(
|
427 | 471 | }
|
428 | 472 | }
|
429 | 473 | }
|
| 474 | + |
| 475 | +#[inline(always)] |
| 476 | +pub(crate) fn pred_cfl_ac<T: Pixel, const XDEC: usize, const YDEC: usize>( |
| 477 | + ac: &mut [i16], luma: &PlaneRegion<'_, T>, bsize: BlockSize, w_pad: usize, |
| 478 | + h_pad: usize, cpu: CpuFeatureLevel, |
| 479 | +) { |
| 480 | + let call_rust = |ac: &mut [i16]| { |
| 481 | + rust::pred_cfl_ac::<T, XDEC, YDEC>(ac, luma, bsize, w_pad, h_pad, cpu); |
| 482 | + }; |
| 483 | + |
| 484 | + let stride = T::to_asm_stride(luma.plane_cfg.stride) as libc::ptrdiff_t; |
| 485 | + let w = bsize.width() as libc::c_int; |
| 486 | + let h = bsize.height() as libc::c_int; |
| 487 | + let w_pad = w_pad as libc::c_int; |
| 488 | + let h_pad = h_pad as libc::c_int; |
| 489 | + |
| 490 | + // SAFETY: Calls Assembly code. |
| 491 | + unsafe { |
| 492 | + let ac_ptr = ac.as_mut_ptr(); |
| 493 | + match T::type_enum() { |
| 494 | + PixelType::U8 if cpu >= CpuFeatureLevel::SSSE3 => { |
| 495 | + let luma_ptr = luma.data_ptr() as *const u8; |
| 496 | + (if cpu >= CpuFeatureLevel::AVX2 { |
| 497 | + match (XDEC, YDEC) { |
| 498 | + (0, 0) => rav1e_ipred_cfl_ac_444_8bpc_avx2, |
| 499 | + (1, 0) => rav1e_ipred_cfl_ac_422_8bpc_avx2, |
| 500 | + _ => rav1e_ipred_cfl_ac_420_8bpc_avx2, |
| 501 | + } |
| 502 | + } else { |
| 503 | + match (XDEC, YDEC) { |
| 504 | + (0, 0) => rav1e_ipred_cfl_ac_444_8bpc_ssse3, |
| 505 | + (1, 0) => rav1e_ipred_cfl_ac_422_8bpc_ssse3, |
| 506 | + _ => rav1e_ipred_cfl_ac_420_8bpc_ssse3, |
| 507 | + } |
| 508 | + })(ac_ptr, luma_ptr, stride, w_pad, h_pad, w, h) |
| 509 | + } |
| 510 | + PixelType::U16 if cpu >= CpuFeatureLevel::AVX2 => { |
| 511 | + let luma_ptr = luma.data_ptr() as *const u16; |
| 512 | + (match (XDEC, YDEC) { |
| 513 | + (0, 0) => rav1e_ipred_cfl_ac_444_16bpc_avx2, |
| 514 | + (1, 0) => rav1e_ipred_cfl_ac_422_16bpc_avx2, |
| 515 | + _ => rav1e_ipred_cfl_ac_420_16bpc_avx2, |
| 516 | + })(ac_ptr, luma_ptr, stride, w_pad, h_pad, w, h) |
| 517 | + } |
| 518 | + _ => call_rust(ac), |
| 519 | + } |
| 520 | + } |
| 521 | +} |
0 commit comments