Skip to content

Commit 32e0b52

Browse files
shssoichirolu-zero
authored andcommitted
Avoid a very expensive clone in compute_block_importances
This clone was responsible for 40% of allocations within rav1e. Although counterintuitive, removing the frame data from the map and reinserting it at the end of the iteration is better because it avoids this clone.
1 parent 75559ef commit 32e0b52

File tree

1 file changed

+16
-10
lines changed

1 file changed

+16
-10
lines changed

src/api/internal.rs

+16-10
Original file line numberDiff line numberDiff line change
@@ -879,20 +879,24 @@ impl<T: Pixel> ContextInner<T> {
879879
);
880880

881881
for &output_frameno in output_framenos.iter().skip(1).rev() {
882-
// Copy fi from the map to avoid the borrow checker complaining
883-
// when we mutably borrow another fi.
884-
let fi = self
885-
.frame_data
886-
.get(&output_frameno)
887-
.map(|data| data.fi.clone())
888-
.unwrap();
889-
890882
// TODO: see comment above about key frames not having references.
891-
if fi.frame_type == FrameType::KEY {
892-
// No need to update the existing frame invariants alone
883+
if self.frame_data.get(&output_frameno).unwrap().fi.frame_type
884+
== FrameType::KEY
885+
{
893886
continue;
894887
}
895888

889+
// Remove fi from the map temporarily and put it back in in the end of
890+
// the iteration. This is required because we need to mutably borrow
891+
// referenced fis from the map, and that wouldn't be possible if this was
892+
// an active borrow.
893+
//
894+
// Performance note: Contrary to intuition,
895+
// removing the data and re-inserting it at the end
896+
// is more performant because it avoids a very expensive clone.
897+
let output_frame_data = self.frame_data.remove(&output_frameno).unwrap();
898+
let fi = &output_frame_data.fi;
899+
896900
let frame = self.frame_q[&fi.input_frameno].as_ref().unwrap();
897901

898902
// There can be at most 3 of these.
@@ -1098,6 +1102,8 @@ impl<T: Pixel> ContextInner<T> {
10981102
}
10991103
}
11001104
});
1105+
1106+
self.frame_data.insert(output_frameno, output_frame_data);
11011107
}
11021108

11031109
// Get the final block importance values for the current output frame.

0 commit comments

Comments
 (0)