Skip to content

Commit 59a15dd

Browse files
committed
Compute frame pyramid sizes and frame depths
1 parent 4f7bf50 commit 59a15dd

File tree

3 files changed

+170
-43
lines changed

3 files changed

+170
-43
lines changed

src/api/internal.rs

+162-40
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ use crate::stats::EncoderStats;
2828
use crate::tiling::Area;
2929
use crate::util::Pixel;
3030
use arrayvec::ArrayVec;
31+
use debug_unreachable::debug_unreachable;
32+
use itertools::Itertools;
3133
use rust_hawktracer::*;
3234
use std::cmp;
3335
use std::collections::{BTreeMap, BTreeSet};
@@ -234,11 +236,10 @@ pub(crate) struct ContextInner<T: Pixel> {
234236
pub(super) frame_q: FrameQueue<T>,
235237
/// Maps *output_frameno* to frame data
236238
pub(super) frame_data: FrameDataQueue<T>,
237-
/// A list of the input_frameno for keyframes in this encode.
238-
/// Needed so that we don't need to keep all of the frame_invariants in
239-
/// memory for the whole life of the encode.
240-
// TODO: Is this needed at all?
241-
keyframes: BTreeSet<u64>,
239+
/// A list of the precomputed frame types and pyramid depth for frames within the lookahead.
240+
/// This allows us to have dynamic pyramid depths and widths by computing them before
241+
/// creating the frame invariants.
242+
frame_depths: BTreeMap<u64, FrameDepth>,
242243
// TODO: Is this needed at all?
243244
keyframes_forced: BTreeSet<u64>,
244245
/// A storage space for reordered frames.
@@ -266,8 +267,8 @@ impl<T: Pixel> ContextInner<T> {
266267
pub fn new(enc: &EncoderConfig) -> Self {
267268
// initialize with temporal delimiter
268269
let packet_data = TEMPORAL_DELIMITER.to_vec();
269-
let mut keyframes = BTreeSet::new();
270-
keyframes.insert(0);
270+
let mut frame_depths = BTreeMap::new();
271+
frame_depths.insert(0, FrameDepth::Intra);
271272

272273
let maybe_ac_qi_max =
273274
if enc.quantizer < 255 { Some(enc.quantizer as u8) } else { None };
@@ -284,7 +285,7 @@ impl<T: Pixel> ContextInner<T> {
284285
frames_processed: 0,
285286
frame_q: BTreeMap::new(),
286287
frame_data: BTreeMap::new(),
287-
keyframes,
288+
frame_depths,
288289
keyframes_forced: BTreeSet::new(),
289290
packet_data,
290291
gop_output_frameno_start: BTreeMap::new(),
@@ -362,8 +363,8 @@ impl<T: Pixel> ContextInner<T> {
362363
let lookahead_frames = self
363364
.frame_q
364365
.range(self.next_lookahead_frame - 1..)
365-
.filter_map(|(&_input_frameno, frame)| frame.as_ref())
366-
.collect::<Vec<&Arc<Frame<T>>>>();
366+
.filter_map(|(&_input_frameno, frame)| frame.as_ref().map(Arc::clone))
367+
.collect::<Vec<Arc<Frame<T>>>>();
367368

368369
if is_flushing {
369370
// This is the last time send_frame is called, process all the
@@ -376,22 +377,10 @@ impl<T: Pixel> ContextInner<T> {
376377
break;
377378
}
378379

379-
Self::compute_keyframe_placement(
380-
cur_lookahead_frames,
381-
&self.keyframes_forced,
382-
&mut self.keyframe_detector,
383-
&mut self.next_lookahead_frame,
384-
&mut self.keyframes,
385-
);
380+
self.compute_frame_placement(cur_lookahead_frames);
386381
}
387382
} else {
388-
Self::compute_keyframe_placement(
389-
&lookahead_frames,
390-
&self.keyframes_forced,
391-
&mut self.keyframe_detector,
392-
&mut self.next_lookahead_frame,
393-
&mut self.keyframes,
394-
);
383+
self.compute_frame_placement(&lookahead_frames);
395384
}
396385
}
397386

@@ -439,10 +428,13 @@ impl<T: Pixel> ContextInner<T> {
439428
&self, gop_input_frameno_start: u64, ignore_limit: bool,
440429
) -> u64 {
441430
let next_detected = self
442-
.keyframes
431+
.frame_depths
443432
.iter()
444-
.find(|&&input_frameno| input_frameno > gop_input_frameno_start)
445-
.cloned();
433+
.find(|&(&input_frameno, frame_depth)| {
434+
frame_depth == &FrameDepth::Intra
435+
&& input_frameno > gop_input_frameno_start
436+
})
437+
.map(|(input_frameno, _)| *input_frameno);
446438
let mut next_limit =
447439
gop_input_frameno_start + self.config.max_key_frame_interval;
448440
if !ignore_limit && self.limit.is_some() {
@@ -577,7 +569,8 @@ impl<T: Pixel> ContextInner<T> {
577569
}
578570

579571
// Now that we know the input_frameno, look up the correct frame type
580-
let frame_type = if self.keyframes.contains(&input_frameno) {
572+
let frame_type = if self.frame_depths[&input_frameno] == FrameDepth::Intra
573+
{
581574
FrameType::KEY
582575
} else {
583576
FrameType::INTER
@@ -862,22 +855,145 @@ impl<T: Pixel> ContextInner<T> {
862855
}
863856

864857
#[hawktracer(compute_keyframe_placement)]
865-
pub fn compute_keyframe_placement(
866-
lookahead_frames: &[&Arc<Frame<T>>], keyframes_forced: &BTreeSet<u64>,
867-
keyframe_detector: &mut SceneChangeDetector<T>,
868-
next_lookahead_frame: &mut u64, keyframes: &mut BTreeSet<u64>,
858+
pub fn compute_frame_placement(
859+
&mut self, lookahead_frames: &[Arc<Frame<T>>],
869860
) {
870-
if keyframes_forced.contains(next_lookahead_frame)
871-
|| keyframe_detector.analyze_next_frame(
861+
if self.keyframes_forced.contains(&self.next_lookahead_frame) {
862+
self.frame_depths.insert(self.next_lookahead_frame, FrameDepth::Intra);
863+
} else {
864+
let is_keyframe = self.keyframe_detector.analyze_next_frame(
872865
lookahead_frames,
873-
*next_lookahead_frame,
874-
*keyframes.iter().last().unwrap(),
875-
)
876-
{
877-
keyframes.insert(*next_lookahead_frame);
866+
self.next_lookahead_frame,
867+
*self.frame_depths.iter().last().unwrap().0,
868+
);
869+
if is_keyframe {
870+
self.keyframe_detector.inter_costs.remove(&self.next_lookahead_frame);
871+
self.frame_depths.insert(self.next_lookahead_frame, FrameDepth::Intra);
872+
} else if self.frame_depths[&(self.next_lookahead_frame - 1)]
873+
== FrameDepth::Intra
874+
{
875+
// The last frame is a keyframe, so this one must start a new mini-GOP
876+
self.keyframe_detector.inter_costs.remove(&self.next_lookahead_frame);
877+
self
878+
.frame_depths
879+
.insert(self.next_lookahead_frame, FrameDepth::Inter { depth: 0 });
880+
} else {
881+
self.compute_current_minigop_cost();
882+
};
878883
}
879884

880-
*next_lookahead_frame += 1;
885+
self.next_lookahead_frame += 1;
886+
}
887+
888+
fn compute_current_minigop_cost(&mut self) {
889+
let minigop_start_frame = *self
890+
.frame_depths
891+
.iter()
892+
.rev()
893+
.find(|(_, d)| **d == FrameDepth::Inter { depth: 0 })
894+
.unwrap()
895+
.0;
896+
897+
let current_width =
898+
(self.next_lookahead_frame - minigop_start_frame) as u8;
899+
let max_pyramid_width =
900+
self.config.speed_settings.rdo_lookahead_frames.min(32) as u8;
901+
902+
let mut need_new_minigop = false;
903+
if current_width == max_pyramid_width {
904+
// Since we hit the max width, we must start a new mini-GOP.
905+
need_new_minigop = true;
906+
} else {
907+
let current_minigop_cost = self
908+
.keyframe_detector
909+
.inter_costs
910+
.range(minigop_start_frame..=self.next_lookahead_frame)
911+
.map(|cost| {
912+
// Adjust the inter cost down to 8-bit scaling
913+
*cost.1 / (1 << (self.config.bit_depth - 8)) as f64
914+
})
915+
.sum::<f64>();
916+
let allowance = match current_width + 1 {
917+
// Depth 0
918+
1..=2 => 18000.0,
919+
// Depth 1
920+
3 => 20000.0,
921+
// Depth 2
922+
4 => 20000.0,
923+
// Depth 3
924+
5..=8 => 18000.0,
925+
// Depth 4
926+
9..=16 => 12000.0,
927+
// Depth 5
928+
17..=32 => 10000.0,
929+
// SAFETY: 32 is the max mini-GOP width
930+
_ => unsafe { debug_unreachable!() },
931+
};
932+
if current_minigop_cost > allowance {
933+
need_new_minigop = true;
934+
}
935+
}
936+
937+
if need_new_minigop {
938+
self.compute_minigop_frame_order(
939+
minigop_start_frame,
940+
self.next_lookahead_frame - 1,
941+
);
942+
self
943+
.frame_depths
944+
.insert(self.next_lookahead_frame, FrameDepth::Inter { depth: 0 });
945+
for frameno in minigop_start_frame..=self.next_lookahead_frame {
946+
self.keyframe_detector.inter_costs.remove(&frameno);
947+
}
948+
}
949+
}
950+
951+
// Start and end frame are inclusive
952+
fn compute_minigop_frame_order(&mut self, start_frame: u64, end_frame: u64) {
953+
// By this point, `start_frame` should already be inserted at depth 0
954+
if start_frame == end_frame {
955+
return;
956+
}
957+
958+
let mut frames = ((start_frame + 1)..=end_frame).collect::<BTreeSet<_>>();
959+
let mut current_depth = 0;
960+
while !frames.is_empty() {
961+
if current_depth == 0 {
962+
// Special case for depth 0, we generally want the last frame at this depth
963+
self
964+
.frame_depths
965+
.insert(frames.pop_last().unwrap(), FrameDepth::Inter { depth: 0 });
966+
current_depth = 1;
967+
} else {
968+
let max_frames_in_level = 1 << (current_depth - 1);
969+
if frames.len() <= max_frames_in_level {
970+
for frameno in frames.into_iter() {
971+
self
972+
.frame_depths
973+
.insert(frameno, FrameDepth::Inter { depth: current_depth });
974+
}
975+
break;
976+
} else {
977+
let mut breakpoints = vec![*frames.first().unwrap()];
978+
let mut prev_val = *frames.first().unwrap();
979+
for frameno in &frames {
980+
if *frameno > prev_val + 1 {
981+
breakpoints.push(*frameno);
982+
}
983+
prev_val = *frameno;
984+
}
985+
breakpoints.push(*frames.last().unwrap());
986+
for (start, end) in breakpoints.into_iter().tuple_windows() {
987+
let midpoint = (end - start + 1) / 2;
988+
frames.remove(&midpoint);
989+
self
990+
.frame_depths
991+
.insert(midpoint, FrameDepth::Inter { depth: current_depth });
992+
}
993+
current_depth += 1;
994+
}
995+
}
996+
}
881997
}
882998

883999
#[hawktracer(compute_frame_invariants)]
@@ -1718,3 +1834,9 @@ impl<T: Pixel> ContextInner<T> {
17181834
}
17191835
}
17201836
}
1837+
1838+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1839+
pub(crate) enum FrameDepth {
1840+
Intra,
1841+
Inter { depth: u8 },
1842+
}

src/scenechange/mod.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,12 @@ pub struct SceneChangeDetector<T: Pixel> {
9292
cpu_feature_level: CpuFeatureLevel,
9393
encoder_config: EncoderConfig,
9494
sequence: Arc<Sequence>,
95-
/// Calculated intra costs for each input frame.
95+
/// Calculated block-level intra costs for each input frame.
9696
/// These are cached for reuse later in rav1e.
9797
pub(crate) intra_costs: BTreeMap<u64, Box<[u32]>>,
98+
/// Calculated frame-level inter costs for each input frames compared to its previous frame.
99+
/// These are cached for reuse later in rav1e.
100+
pub(crate) inter_costs: BTreeMap<u64, f64>,
98101
/// Temporary buffer used by estimate_intra_costs.
99102
pub(crate) temp_plane: Option<Plane<T>>,
100103
}
@@ -152,6 +155,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
152155
encoder_config,
153156
sequence,
154157
intra_costs: BTreeMap::new(),
158+
inter_costs: BTreeMap::new(),
155159
temp_plane: None,
156160
}
157161
}
@@ -166,7 +170,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
166170
/// This will gracefully handle the first frame in the video as well.
167171
#[hawktracer(analyze_next_frame)]
168172
pub fn analyze_next_frame(
169-
&mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
173+
&mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
170174
previous_keyframe: u64,
171175
) -> bool {
172176
// Use score deque for adaptive threshold for scene cut
@@ -254,7 +258,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
254258

255259
// Initially fill score deque with frame scores
256260
fn initialize_score_deque(
257-
&mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
261+
&mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
258262
init_len: usize,
259263
) {
260264
for x in 0..init_len {

src/scenechange/standard.rs

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
7575
self.sequence.clone(),
7676
buffer,
7777
);
78+
self.inter_costs.insert(input_frameno, mv_inter_cost);
7879
});
7980
s.spawn(|_| {
8081
imp_block_cost =

0 commit comments

Comments
 (0)