@@ -139,7 +139,6 @@ const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
139
139
mod nasm {
140
140
use super :: * ;
141
141
use crate :: plane:: * ;
142
- use crate :: util:: * ;
143
142
144
143
use std:: mem;
145
144
@@ -255,44 +254,23 @@ mod nasm {
255
254
height : usize , col_frac : i32 , row_frac : i32 , mode_x : FilterMode ,
256
255
mode_y : FilterMode , bit_depth : usize
257
256
) {
258
- #[ cfg( all( target_arch = "x86_64" , not( windows) , feature = "nasm" ) ) ]
259
- {
260
- if is_x86_feature_detected ! ( "avx2" ) && bit_depth == 8 {
261
- let mut dst8: AlignedArray < [ u8 ; 128 * 128 ] > =
262
- UninitializedAlignedArray ( ) ;
263
- let mut src8: [ u8 ; ( 128 + 7 ) * ( 128 + 7 ) ] =
264
- unsafe { mem:: uninitialized ( ) } ;
265
- unsafe {
266
- convert_slice_2d (
267
- src8. as_mut_ptr ( ) ,
268
- width + 7 ,
269
- src. go_left ( 3 ) . go_up ( 3 ) . as_ptr ( ) ,
270
- src. plane . cfg . stride ,
271
- width + 7 ,
272
- height + 7
273
- ) ;
274
- select_put_fn_avx2 ( mode_x, mode_y) (
275
- dst8. array . as_mut_ptr ( ) ,
276
- width as isize ,
277
- src8[ ( width + 7 ) * 3 + 3 ..] . as_ptr ( ) ,
278
- ( width + 7 ) as isize ,
279
- width as i32 ,
280
- height as i32 ,
281
- col_frac,
282
- row_frac
283
- ) ;
284
- let dst_stride = dst. plane . cfg . stride ;
285
- convert_slice_2d (
286
- dst. as_mut_ptr ( ) ,
287
- dst_stride,
288
- dst8. array . as_ptr ( ) ,
289
- width,
290
- width,
291
- height
292
- ) ;
293
- }
294
- return ;
257
+ if mem:: size_of :: < T > ( ) == 1 && is_x86_feature_detected ! ( "avx2" ) {
258
+ debug_assert ! ( bit_depth == 8 ) ;
259
+ let dst_stride = dst. plane . cfg . stride as isize ;
260
+ let src_stride = src. plane . cfg . stride as isize ;
261
+ unsafe {
262
+ select_put_fn_avx2 ( mode_x, mode_y) (
263
+ dst. as_mut_ptr ( ) as * mut _ ,
264
+ dst_stride,
265
+ src. as_ptr ( ) as * const _ ,
266
+ src_stride,
267
+ width as i32 ,
268
+ height as i32 ,
269
+ col_frac,
270
+ row_frac
271
+ ) ;
295
272
}
273
+ return ;
296
274
}
297
275
super :: native:: put_8tap (
298
276
dst, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth,
@@ -304,66 +282,47 @@ mod nasm {
304
282
col_frac : i32 , row_frac : i32 , mode_x : FilterMode , mode_y : FilterMode ,
305
283
bit_depth : usize
306
284
) {
307
- if is_x86_feature_detected ! ( "avx2" ) && bit_depth == 8 {
308
- let mut src8 : [ u8 ; ( 128 + 7 ) * ( 128 + 7 ) ] =
309
- unsafe { mem :: uninitialized ( ) } ;
285
+ if mem :: size_of :: < T > ( ) == 1 && is_x86_feature_detected ! ( "avx2" ) {
286
+ debug_assert ! ( bit_depth == 8 ) ;
287
+ let src_stride = src . plane . cfg . stride as isize ;
310
288
unsafe {
311
- convert_slice_2d (
312
- src8. as_mut_ptr ( ) ,
313
- width + 7 ,
314
- src. go_left ( 3 ) . go_up ( 3 ) . as_ptr ( ) ,
315
- src. plane . cfg . stride ,
316
- width + 7 ,
317
- height + 7
318
- ) ;
319
289
select_prep_fn_avx2 ( mode_x, mode_y) (
320
290
tmp. as_mut_ptr ( ) ,
321
- src8 [ ( width + 7 ) * 3 + 3 .. ] . as_ptr ( ) ,
322
- ( width + 7 ) as isize ,
291
+ src . as_ptr ( ) as * const _ ,
292
+ src_stride ,
323
293
width as i32 ,
324
294
height as i32 ,
325
295
col_frac,
326
296
row_frac
327
297
) ;
328
298
}
329
- } else {
330
- super :: native:: prep_8tap (
331
- tmp, src, width, height, col_frac, row_frac, mode_x, mode_y,
332
- bit_depth,
333
- ) ;
299
+ return ;
334
300
}
301
+ super :: native:: prep_8tap (
302
+ tmp, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth
303
+ ) ;
335
304
}
336
305
337
306
pub fn mc_avg < T : Pixel > (
338
307
dst : & mut PlaneMutSlice < ' _ , T > , tmp1 : & [ i16 ] , tmp2 : & [ i16 ] , width : usize ,
339
308
height : usize , bit_depth : usize
340
309
) {
341
- if is_x86_feature_detected ! ( "avx2" ) && bit_depth == 8 {
342
- let mut dst8 : AlignedArray < [ u8 ; 128 * 128 ] > =
343
- UninitializedAlignedArray ( ) ;
310
+ if mem :: size_of :: < T > ( ) == 1 && is_x86_feature_detected ! ( "avx2" ) {
311
+ debug_assert ! ( bit_depth == 8 ) ;
312
+ let dst_stride = dst . plane . cfg . stride as isize ;
344
313
unsafe {
345
314
rav1e_avg_avx2 (
346
- dst8 . array . as_mut_ptr ( ) ,
347
- width as isize ,
315
+ dst . as_mut_ptr ( ) as * mut _ ,
316
+ dst_stride ,
348
317
tmp1. as_ptr ( ) ,
349
318
tmp2. as_ptr ( ) ,
350
319
width as i32 ,
351
320
height as i32
352
321
) ;
353
- let dst_stride = dst. plane . cfg . stride ;
354
- convert_slice_2d (
355
- dst. as_mut_ptr ( ) ,
356
- dst_stride,
357
- dst8. array . as_ptr ( ) ,
358
- width,
359
- width,
360
- height
361
- ) ;
362
322
}
363
323
return ;
364
- } else {
365
- super :: native:: mc_avg ( dst, tmp1, tmp2, width, height, bit_depth) ;
366
324
}
325
+ super :: native:: mc_avg ( dst, tmp1, tmp2, width, height, bit_depth) ;
367
326
}
368
327
}
369
328
0 commit comments