@@ -45,6 +45,7 @@ declare_asm_dist_fn![
45
45
// SSSE3
46
46
( rav1e_sad_4x4_hbd_ssse3, u16 ) ,
47
47
( rav1e_sad_16x16_hbd_ssse3, u16 ) ,
48
+ ( rav1e_satd_8x8_ssse3, u8 ) ,
48
49
// SSE2
49
50
( rav1e_sad4x4_sse2, u8 ) ,
50
51
( rav1e_sad4x8_sse2, u8 ) ,
@@ -57,6 +58,8 @@ declare_asm_dist_fn![
57
58
( rav1e_sad32x32_sse2, u8 ) ,
58
59
( rav1e_sad64x64_sse2, u8 ) ,
59
60
( rav1e_sad128x128_sse2, u8 ) ,
61
+ // SSE4
62
+ ( rav1e_satd_4x4_sse4, u8 ) ,
60
63
// AVX
61
64
( rav1e_sad16x4_avx2, u8 ) ,
62
65
( rav1e_sad16x8_avx2, u8 ) ,
@@ -255,6 +258,7 @@ pub static SAD_FNS: [[Option<SadFn>; DIST_FNS_LENGTH];
255
258
256
259
out[ CpuFeatureLevel :: SSE2 as usize ] = SAD_FNS_SSE2 ;
257
260
out[ CpuFeatureLevel :: SSSE3 as usize ] = SAD_FNS_SSE2 ;
261
+ out[ CpuFeatureLevel :: SSE4_1 as usize ] = SAD_FNS_SSE2 ;
258
262
out[ CpuFeatureLevel :: AVX2 as usize ] = SAD_FNS_AVX2 ;
259
263
260
264
out
@@ -276,11 +280,33 @@ pub(crate) static SAD_HBD_FNS: [[Option<SadHBDFn>; DIST_FNS_LENGTH];
276
280
let mut out = [ [ None ; DIST_FNS_LENGTH ] ; CpuFeatureLevel :: len ( ) ] ;
277
281
278
282
out[ CpuFeatureLevel :: SSSE3 as usize ] = SAD_HBD_FNS_SSSE3 ;
283
+ out[ CpuFeatureLevel :: SSE4_1 as usize ] = SAD_HBD_FNS_SSSE3 ;
279
284
out[ CpuFeatureLevel :: AVX2 as usize ] = SAD_HBD_FNS_SSSE3 ;
280
285
281
286
out
282
287
} ;
283
288
289
+ static SATD_FNS_SSSE3 : [ Option < SatdFn > ; DIST_FNS_LENGTH ] = {
290
+ let mut out: [ Option < SatdFn > ; DIST_FNS_LENGTH ] = [ None ; DIST_FNS_LENGTH ] ;
291
+
292
+ use BlockSize :: * ;
293
+
294
+ out[ BLOCK_8X8 as usize ] = Some ( rav1e_satd_8x8_ssse3) ;
295
+
296
+ out
297
+ } ;
298
+
299
+ static SATD_FNS_SSE4 : [ Option < SatdFn > ; DIST_FNS_LENGTH ] = {
300
+ let mut out: [ Option < SatdFn > ; DIST_FNS_LENGTH ] = [ None ; DIST_FNS_LENGTH ] ;
301
+
302
+ use BlockSize :: * ;
303
+
304
+ out[ BLOCK_4X4 as usize ] = Some ( rav1e_satd_4x4_sse4) ;
305
+ out[ BLOCK_8X8 as usize ] = Some ( rav1e_satd_8x8_ssse3) ;
306
+
307
+ out
308
+ } ;
309
+
284
310
static SATD_FNS_AVX2 : [ Option < SatdFn > ; DIST_FNS_LENGTH ] = {
285
311
let mut out: [ Option < SatdFn > ; DIST_FNS_LENGTH ] = [ None ; DIST_FNS_LENGTH ] ;
286
312
@@ -318,6 +344,8 @@ pub(crate) static SATD_FNS: [[Option<SatdFn>; DIST_FNS_LENGTH];
318
344
CpuFeatureLevel :: len ( ) ] = {
319
345
let mut out = [ [ None ; DIST_FNS_LENGTH ] ; CpuFeatureLevel :: len ( ) ] ;
320
346
347
+ out[ CpuFeatureLevel :: SSSE3 as usize ] = SATD_FNS_SSSE3 ;
348
+ out[ CpuFeatureLevel :: SSE4_1 as usize ] = SATD_FNS_SSE4 ;
321
349
out[ CpuFeatureLevel :: AVX2 as usize ] = SATD_FNS_AVX2 ;
322
350
323
351
out
@@ -416,6 +444,10 @@ mod test {
416
444
"avx2"
417
445
) ;
418
446
447
+ test_dist_fns ! ( ( 8 , 8 ) , satd, 8 , ssse3, "ssse3" ) ;
448
+
449
+ test_dist_fns ! ( ( 4 , 4 ) , satd, 8 , sse4, "sse4.1" ) ;
450
+
419
451
test_dist_fns ! (
420
452
( 4 , 4 ) ,
421
453
( 8 , 8 ) ,
0 commit comments