Skip to content

Commit ea608b4

Browse files
committed
Ease auto vectorization of tx distortion computation
The compiler was not autovectorizing this section of code. Switches the code to doing two seperate iteration on the input coefficients from using repeat and chain. The first iteration processes the stored reconstructed coefficients and the second processes the reconstructed coefficients that can be assumed to be zero. This isn't used by default, but Thomas is working on a project that uses it.
1 parent bdef061 commit ea608b4

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

src/encoder.rs

+16-8
Original file line numberDiff line numberDiff line change
@@ -1279,18 +1279,26 @@ pub fn encode_tx_block<T: Pixel>(
12791279

12801280
let tx_dist = if rdo_type.needs_tx_dist() {
12811281
// Store tx-domain distortion of this block
1282+
// rcoeffs above 32 rows/cols aren't held in the array, because they are
1283+
// always 0. The first 32x32 is stored first in coeffs so we can iterate
1284+
// over coeffs and rcoeffs for the first 32 rows/cols. For the
1285+
// coefficients above 32 rows/cols, we iterate over the rest of coeffs
1286+
// with the assumption that rcoeff coefficients are zero.
12821287
let mut raw_tx_dist = coeffs
12831288
.iter()
1284-
.zip(
1285-
// rcoeffs above 32 rows/cols are always 0. The first 32x32 is stored
1286-
// first in coeffs, so just chain repeating zeroes to rcoeff.
1287-
rcoeffs.iter().chain(std::iter::repeat(&T::Coeff::cast_from(0))),
1288-
)
1289-
.map(|(a, b)| {
1290-
let c = i32::cast_from(*a) - i32::cast_from(*b);
1289+
.zip(rcoeffs.iter())
1290+
.map(|(&a, &b)| {
1291+
let c = i32::cast_from(a) - i32::cast_from(b);
12911292
(c * c) as u64
12921293
})
1293-
.sum::<u64>();
1294+
.sum::<u64>()
1295+
+ coeffs[rcoeffs.len()..]
1296+
.iter()
1297+
.map(|&a| {
1298+
let c = i32::cast_from(a);
1299+
(c * c) as u64
1300+
})
1301+
.sum::<u64>();
12941302

12951303
let tx_dist_scale_bits = 2 * (3 - get_log_tx_scale(tx_size));
12961304
let tx_dist_scale_rounding_offset = 1 << (tx_dist_scale_bits - 1);

0 commit comments

Comments
 (0)