Skip to content

Commit 590d521

Browse files
authored
Integrate SATD ASM for SSSE3 and SSE4.1 (#1813)
This also involves adding a new CpuFeatureLevel for SSE4.1.
1 parent 0282b20 commit 590d521

File tree

4 files changed

+44
-13
lines changed

4 files changed

+44
-13
lines changed

src/asm/x86/dist.rs

+32
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ declare_asm_dist_fn![
4545
// SSSE3
4646
(rav1e_sad_4x4_hbd_ssse3, u16),
4747
(rav1e_sad_16x16_hbd_ssse3, u16),
48+
(rav1e_satd_8x8_ssse3, u8),
4849
// SSE2
4950
(rav1e_sad4x4_sse2, u8),
5051
(rav1e_sad4x8_sse2, u8),
@@ -57,6 +58,8 @@ declare_asm_dist_fn![
5758
(rav1e_sad32x32_sse2, u8),
5859
(rav1e_sad64x64_sse2, u8),
5960
(rav1e_sad128x128_sse2, u8),
61+
// SSE4
62+
(rav1e_satd_4x4_sse4, u8),
6063
// AVX
6164
(rav1e_sad16x4_avx2, u8),
6265
(rav1e_sad16x8_avx2, u8),
@@ -255,6 +258,7 @@ pub static SAD_FNS: [[Option<SadFn>; DIST_FNS_LENGTH];
255258

256259
out[CpuFeatureLevel::SSE2 as usize] = SAD_FNS_SSE2;
257260
out[CpuFeatureLevel::SSSE3 as usize] = SAD_FNS_SSE2;
261+
out[CpuFeatureLevel::SSE4_1 as usize] = SAD_FNS_SSE2;
258262
out[CpuFeatureLevel::AVX2 as usize] = SAD_FNS_AVX2;
259263

260264
out
@@ -276,11 +280,33 @@ pub(crate) static SAD_HBD_FNS: [[Option<SadHBDFn>; DIST_FNS_LENGTH];
276280
let mut out = [[None; DIST_FNS_LENGTH]; CpuFeatureLevel::len()];
277281

278282
out[CpuFeatureLevel::SSSE3 as usize] = SAD_HBD_FNS_SSSE3;
283+
out[CpuFeatureLevel::SSE4_1 as usize] = SAD_HBD_FNS_SSSE3;
279284
out[CpuFeatureLevel::AVX2 as usize] = SAD_HBD_FNS_SSSE3;
280285

281286
out
282287
};
283288

289+
static SATD_FNS_SSSE3: [Option<SatdFn>; DIST_FNS_LENGTH] = {
290+
let mut out: [Option<SatdFn>; DIST_FNS_LENGTH] = [None; DIST_FNS_LENGTH];
291+
292+
use BlockSize::*;
293+
294+
out[BLOCK_8X8 as usize] = Some(rav1e_satd_8x8_ssse3);
295+
296+
out
297+
};
298+
299+
static SATD_FNS_SSE4: [Option<SatdFn>; DIST_FNS_LENGTH] = {
300+
let mut out: [Option<SatdFn>; DIST_FNS_LENGTH] = [None; DIST_FNS_LENGTH];
301+
302+
use BlockSize::*;
303+
304+
out[BLOCK_4X4 as usize] = Some(rav1e_satd_4x4_sse4);
305+
out[BLOCK_8X8 as usize] = Some(rav1e_satd_8x8_ssse3);
306+
307+
out
308+
};
309+
284310
static SATD_FNS_AVX2: [Option<SatdFn>; DIST_FNS_LENGTH] = {
285311
let mut out: [Option<SatdFn>; DIST_FNS_LENGTH] = [None; DIST_FNS_LENGTH];
286312

@@ -318,6 +344,8 @@ pub(crate) static SATD_FNS: [[Option<SatdFn>; DIST_FNS_LENGTH];
318344
CpuFeatureLevel::len()] = {
319345
let mut out = [[None; DIST_FNS_LENGTH]; CpuFeatureLevel::len()];
320346

347+
out[CpuFeatureLevel::SSSE3 as usize] = SATD_FNS_SSSE3;
348+
out[CpuFeatureLevel::SSE4_1 as usize] = SATD_FNS_SSE4;
321349
out[CpuFeatureLevel::AVX2 as usize] = SATD_FNS_AVX2;
322350

323351
out
@@ -416,6 +444,10 @@ mod test {
416444
"avx2"
417445
);
418446

447+
test_dist_fns!((8, 8), satd, 8, ssse3, "ssse3");
448+
449+
test_dist_fns!((4, 4), satd, 8, sse4, "sse4.1");
450+
419451
test_dist_fns!(
420452
(4, 4),
421453
(8, 8),

src/asm/x86/mc.rs

+3
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ decl_mc_fns!(
313313
pub(crate) static PUT_FNS: [[Option<PutFn>; 16]; CpuFeatureLevel::len()] = {
314314
let mut out = [[None; 16]; CpuFeatureLevel::len()];
315315
out[CpuFeatureLevel::SSSE3 as usize] = PUT_FNS_SSSE3;
316+
out[CpuFeatureLevel::SSE4_1 as usize] = PUT_FNS_SSSE3;
316317
out[CpuFeatureLevel::AVX2 as usize] = PUT_FNS_AVX2;
317318
out
318319
};
@@ -372,6 +373,7 @@ decl_mct_fns!(
372373
pub(crate) static PREP_FNS: [[Option<PrepFn>; 16]; CpuFeatureLevel::len()] = {
373374
let mut out = [[None; 16]; CpuFeatureLevel::len()];
374375
out[CpuFeatureLevel::SSSE3 as usize] = PREP_FNS_SSSE3;
376+
out[CpuFeatureLevel::SSE4_1 as usize] = PREP_FNS_SSSE3;
375377
out[CpuFeatureLevel::AVX2 as usize] = PREP_FNS_AVX2;
376378
out
377379
};
@@ -395,6 +397,7 @@ pub(crate) static AVG_FNS: [Option<AvgFn>; CpuFeatureLevel::len()] = {
395397
let mut out: [Option<AvgFn>; CpuFeatureLevel::len()] =
396398
[None; CpuFeatureLevel::len()];
397399
out[CpuFeatureLevel::SSSE3 as usize] = Some(rav1e_avg_ssse3);
400+
out[CpuFeatureLevel::SSE4_1 as usize] = Some(rav1e_avg_ssse3);
398401
out[CpuFeatureLevel::AVX2 as usize] = Some(rav1e_avg_avx2);
399402
out
400403
};

src/cpu_features/aarch64.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@ impl CpuFeatureLevel {
2323

2424
#[inline(always)]
2525
pub fn as_index(self) -> usize {
26-
const LEN: usize = CpuFeatureLevel::len();
27-
assert_eq!(LEN & (LEN - 1), 0);
28-
self as usize & (LEN - 1)
26+
self as usize
2927
}
3028
}
3129

src/cpu_features/x86.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,16 @@
99

1010
use arg_enum_proc_macro::ArgEnum;
1111
use std::env;
12+
use std::str::FromStr;
1213

1314
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, ArgEnum)]
1415
pub enum CpuFeatureLevel {
16+
#[arg_enum(alias = "rust")]
1517
NATIVE,
1618
SSE2,
1719
SSSE3,
20+
#[arg_enum(alias = "sse4.1")]
21+
SSE4_1,
1822
AVX2,
1923
}
2024

@@ -25,16 +29,16 @@ impl CpuFeatureLevel {
2529

2630
#[inline(always)]
2731
pub fn as_index(self) -> usize {
28-
const LEN: usize = CpuFeatureLevel::len();
29-
assert_eq!(LEN & (LEN - 1), 0);
30-
self as usize & (LEN - 1)
32+
self as usize
3133
}
3234
}
3335

3436
impl Default for CpuFeatureLevel {
3537
fn default() -> CpuFeatureLevel {
3638
let detected: CpuFeatureLevel = if is_x86_feature_detected!("avx2") {
3739
CpuFeatureLevel::AVX2
40+
} else if is_x86_feature_detected!("sse4.1") {
41+
CpuFeatureLevel::SSE4_1
3842
} else if is_x86_feature_detected!("ssse3") {
3943
CpuFeatureLevel::SSSE3
4044
} else if is_x86_feature_detected!("sse2") {
@@ -43,13 +47,7 @@ impl Default for CpuFeatureLevel {
4347
CpuFeatureLevel::NATIVE
4448
};
4549
let manual: CpuFeatureLevel = match env::var("RAV1E_CPU_TARGET") {
46-
Ok(feature) => match feature.as_ref() {
47-
"rust" => CpuFeatureLevel::NATIVE,
48-
"avx2" => CpuFeatureLevel::AVX2,
49-
"ssse3" => CpuFeatureLevel::SSSE3,
50-
"sse2" => CpuFeatureLevel::SSE2,
51-
_ => detected,
52-
},
50+
Ok(feature) => CpuFeatureLevel::from_str(&feature).unwrap_or(detected),
5351
Err(_e) => detected,
5452
};
5553
if manual > detected {

0 commit comments

Comments
 (0)