Skip to content

Commit 0a35174

Browse files
committed
Remove obsolete 8-bit buffers for bitdepth of 8
1 parent 9baf1d6 commit 0a35174

File tree

2 files changed

+40
-103
lines changed

2 files changed

+40
-103
lines changed

src/mc.rs

+31-72
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,6 @@ const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
139139
mod nasm {
140140
use super::*;
141141
use crate::plane::*;
142-
use crate::util::*;
143142

144143
use std::mem;
145144

@@ -255,44 +254,23 @@ mod nasm {
255254
height: usize, col_frac: i32, row_frac: i32, mode_x: FilterMode,
256255
mode_y: FilterMode, bit_depth: usize
257256
) {
258-
#[cfg(all(target_arch = "x86_64", not(windows), feature = "nasm"))]
259-
{
260-
if is_x86_feature_detected!("avx2") && bit_depth == 8 {
261-
let mut dst8: AlignedArray<[u8; 128 * 128]> =
262-
UninitializedAlignedArray();
263-
let mut src8: [u8; (128 + 7) * (128 + 7)] =
264-
unsafe { mem::uninitialized() };
265-
unsafe {
266-
convert_slice_2d(
267-
src8.as_mut_ptr(),
268-
width + 7,
269-
src.go_left(3).go_up(3).as_ptr(),
270-
src.plane.cfg.stride,
271-
width + 7,
272-
height + 7
273-
);
274-
select_put_fn_avx2(mode_x, mode_y)(
275-
dst8.array.as_mut_ptr(),
276-
width as isize,
277-
src8[(width + 7) * 3 + 3..].as_ptr(),
278-
(width + 7) as isize,
279-
width as i32,
280-
height as i32,
281-
col_frac,
282-
row_frac
283-
);
284-
let dst_stride = dst.plane.cfg.stride;
285-
convert_slice_2d(
286-
dst.as_mut_ptr(),
287-
dst_stride,
288-
dst8.array.as_ptr(),
289-
width,
290-
width,
291-
height
292-
);
293-
}
294-
return;
257+
if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
258+
debug_assert!(bit_depth == 8);
259+
let dst_stride = dst.plane.cfg.stride as isize;
260+
let src_stride = src.plane.cfg.stride as isize;
261+
unsafe {
262+
select_put_fn_avx2(mode_x, mode_y)(
263+
dst.as_mut_ptr() as *mut _,
264+
dst_stride,
265+
src.as_ptr() as *const _,
266+
src_stride,
267+
width as i32,
268+
height as i32,
269+
col_frac,
270+
row_frac
271+
);
295272
}
273+
return;
296274
}
297275
super::native::put_8tap(
298276
dst, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth,
@@ -304,66 +282,47 @@ mod nasm {
304282
col_frac: i32, row_frac: i32, mode_x: FilterMode, mode_y: FilterMode,
305283
bit_depth: usize
306284
) {
307-
if is_x86_feature_detected!("avx2") && bit_depth == 8 {
308-
let mut src8: [u8; (128 + 7) * (128 + 7)] =
309-
unsafe { mem::uninitialized() };
285+
if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
286+
debug_assert!(bit_depth == 8);
287+
let src_stride = src.plane.cfg.stride as isize;
310288
unsafe {
311-
convert_slice_2d(
312-
src8.as_mut_ptr(),
313-
width + 7,
314-
src.go_left(3).go_up(3).as_ptr(),
315-
src.plane.cfg.stride,
316-
width + 7,
317-
height + 7
318-
);
319289
select_prep_fn_avx2(mode_x, mode_y)(
320290
tmp.as_mut_ptr(),
321-
src8[(width + 7) * 3 + 3..].as_ptr(),
322-
(width + 7) as isize,
291+
src.as_ptr() as *const _,
292+
src_stride,
323293
width as i32,
324294
height as i32,
325295
col_frac,
326296
row_frac
327297
);
328298
}
329-
} else {
330-
super::native::prep_8tap(
331-
tmp, src, width, height, col_frac, row_frac, mode_x, mode_y,
332-
bit_depth,
333-
);
299+
return;
334300
}
301+
super::native::prep_8tap(
302+
tmp, src, width, height, col_frac, row_frac, mode_x, mode_y, bit_depth
303+
);
335304
}
336305

337306
pub fn mc_avg<T: Pixel>(
338307
dst: &mut PlaneMutSlice<'_, T>, tmp1: &[i16], tmp2: &[i16], width: usize,
339308
height: usize, bit_depth: usize
340309
) {
341-
if is_x86_feature_detected!("avx2") && bit_depth == 8 {
342-
let mut dst8: AlignedArray<[u8; 128 * 128]> =
343-
UninitializedAlignedArray();
310+
if mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
311+
debug_assert!(bit_depth == 8);
312+
let dst_stride = dst.plane.cfg.stride as isize;
344313
unsafe {
345314
rav1e_avg_avx2(
346-
dst8.array.as_mut_ptr(),
347-
width as isize,
315+
dst.as_mut_ptr() as *mut _,
316+
dst_stride,
348317
tmp1.as_ptr(),
349318
tmp2.as_ptr(),
350319
width as i32,
351320
height as i32
352321
);
353-
let dst_stride = dst.plane.cfg.stride;
354-
convert_slice_2d(
355-
dst.as_mut_ptr(),
356-
dst_stride,
357-
dst8.array.as_ptr(),
358-
width,
359-
width,
360-
height
361-
);
362322
}
363323
return;
364-
} else {
365-
super::native::mc_avg(dst, tmp1, tmp2, width, height, bit_depth);
366324
}
325+
super::native::mc_avg(dst, tmp1, tmp2, width, height, bit_depth);
367326
}
368327
}
369328

src/transform/inverse.rs

+9-31
Original file line numberDiff line numberDiff line change
@@ -1523,14 +1523,12 @@ mod nasm {
15231523
) where
15241524
T: Pixel,
15251525
{
1526-
let stride = output.plane.cfg.stride;
1526+
if std::mem::size_of::<T>() == 1 && is_x86_feature_detected!("avx2") {
1527+
debug_assert!(bd == 8);
15271528

1528-
if is_x86_feature_detected!("avx2") && bd == 8 {
15291529
// 64x only uses 32 coeffs
15301530
let coeff_w = Self::W.min(32);
15311531
let coeff_h = Self::H.min(32);
1532-
let mut dst8: AlignedArray<[u8; 64 * 64]> =
1533-
UninitializedAlignedArray();
15341532
let mut coeff16: AlignedArray<[i16; 32 * 32]> =
15351533
UninitializedAlignedArray();
15361534

@@ -1542,41 +1540,21 @@ mod nasm {
15421540
}
15431541
}
15441542

1543+
let stride = output.plane.cfg.stride as isize;
15451544
unsafe {
1546-
// copy output to dst8 so that the results of the inverse transform
1547-
// can be added to it
1548-
convert_slice_2d(
1549-
dst8.array.as_mut_ptr(),
1550-
Self::W,
1551-
output.as_ptr(),
1552-
stride,
1553-
Self::W,
1554-
Self::H
1555-
);
1556-
15571545
// perform the inverse transform
15581546
Self::match_tx_type(tx_type)(
1559-
dst8.array.as_mut_ptr(),
1560-
Self::W as isize,
1547+
output.as_mut_ptr() as *mut _,
1548+
stride,
15611549
coeff16.array.as_ptr(),
15621550
(coeff_w * coeff_h) as i32
15631551
);
1564-
1565-
// copy back to output
1566-
convert_slice_2d(
1567-
output.as_mut_ptr(),
1568-
stride,
1569-
dst8.array.as_ptr(),
1570-
Self::W,
1571-
Self::W,
1572-
Self::H
1573-
);
15741552
}
1575-
} else {
1576-
<Self as super::native::InvTxfm2D>::inv_txfm2d_add(
1577-
input, output, tx_type, bd,
1578-
);
1553+
return;
15791554
}
1555+
<Self as super::native::InvTxfm2D>::inv_txfm2d_add(
1556+
input, output, tx_type, bd,
1557+
);
15801558
}
15811559
}
15821560

0 commit comments

Comments
 (0)