Skip to content

Commit 0828d1f

Browse files
committed
wip
1 parent 8de70e8 commit 0828d1f

File tree

2 files changed

+111
-61
lines changed

2 files changed

+111
-61
lines changed

src/api/internal.rs

+2
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ impl<T: Pixel> ContextInner<T> {
271271

272272
let seq = Arc::new(Sequence::new(enc));
273273
let inter_cfg = InterConfig::new(enc);
274+
let lookahead_distance = inter_cfg.keyframe_lookahead_distance() as usize;
274275

275276
ContextInner {
276277
frame_count: 0,
@@ -288,6 +289,7 @@ impl<T: Pixel> ContextInner<T> {
288289
keyframe_detector: SceneChangeDetector::new(
289290
*enc,
290291
CpuFeatureLevel::default(),
292+
lookahead_distance,
291293
seq.clone(),
292294
),
293295
config: Arc::new(*enc),

src/scenechange/mod.rs

+109-61
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@ pub struct SceneChangeDetector<T: Pixel> {
2727
scale_factor: usize,
2828
// Frame buffer for scaled frames
2929
frame_buffer: Vec<Plane<T>>,
30+
// Deque offset for current
31+
lookahead_offset: usize,
32+
// Start deque offset based on lookahead
33+
deque_offset: usize,
3034
// Scenechange results for adaptive threshold
31-
score_deque: Vec<f64>,
35+
score_deque: Vec<(f64, f64)>,
3236
/// Number of pixels in scaled frame for fast mode
3337
pixels: usize,
3438
/// The bit depth of the video.
@@ -42,7 +46,7 @@ pub struct SceneChangeDetector<T: Pixel> {
4246
impl<T: Pixel> SceneChangeDetector<T> {
4347
pub fn new(
4448
encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel,
45-
sequence: Arc<Sequence>,
49+
lookahead_distance: usize, sequence: Arc<Sequence>,
4650
) -> Self {
4751
// This implementation is based on a Python implementation at
4852
// https://pyscenedetect.readthedocs.io/en/latest/reference/detection-methods/.
@@ -55,6 +59,9 @@ impl<T: Pixel> SceneChangeDetector<T> {
5559
// This may be adjusted later.
5660
//
5761
// This threshold is only used for the fast scenecut implementation.
62+
//
63+
// Testing shown that default threshold of 12 overallocates keyframes by almost double,
64+
// compared to other scene change implementations
5865
const BASE_THRESHOLD: usize = 25;
5966
let bit_depth = encoder_config.bit_depth;
6067
let fast_mode = encoder_config.speed_settings.fast_scene_detection
@@ -64,23 +71,30 @@ impl<T: Pixel> SceneChangeDetector<T> {
6471
let scale_factor =
6572
if fast_mode { detect_scale_factor(&sequence) } else { 1_usize };
6673

67-
let score_deque = Vec::with_capacity(5);
68-
// Pixel count for fast scenedetect
74+
// Set lookahead offset to 5 if normal lookahead available
75+
let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 };
76+
let deque_offset = lookahead_offset;
77+
78+
let score_deque = Vec::with_capacity(5 + lookahead_distance);
6979

80+
// Pixel count for fast scenedetect
7081
let pixels = if fast_mode {
7182
(sequence.max_frame_height as usize / scale_factor)
7283
* (sequence.max_frame_width as usize / scale_factor)
7384
} else {
7485
1
7586
};
7687

77-
let frame_buffer = Vec::with_capacity(2);
88+
let frame_buffer =
89+
if fast_mode { Vec::with_capacity(2) } else { Vec::new() };
7890

7991
Self {
8092
threshold: BASE_THRESHOLD * bit_depth / 8,
8193
fast_mode,
8294
scale_factor,
8395
frame_buffer,
96+
lookahead_offset,
97+
deque_offset,
8498
score_deque,
8599
pixels,
86100
bit_depth,
@@ -103,6 +117,9 @@ impl<T: Pixel> SceneChangeDetector<T> {
103117
&mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
104118
previous_keyframe: u64,
105119
) -> bool {
120+
// Use score deque for adaptive threshold for scene cut
121+
// Declare score_deque offset based on lookahead for scene change scores
122+
106123
// Find the distance to the previous keyframe.
107124
let distance = input_frameno - previous_keyframe;
108125

@@ -118,56 +135,111 @@ impl<T: Pixel> SceneChangeDetector<T> {
118135
return false;
119136
}
120137

121-
// Set our scenecut method
138+
// Decrease deque offset if there is no more frames
139+
if self.deque_offset > frame_set.len() && self.lookahead_offset > 0 {
140+
self.deque_offset = frame_set.len();
141+
}
142+
143+
// Initially fill score deque with forward frames
144+
if self.deque_offset > 0 && self.score_deque.len() == 0 {
145+
for x in 0..self.lookahead_offset {
146+
// Filling score deque with forward frames
147+
let result = if self.fast_mode {
148+
self.fast_scenecut(frame_set[x].clone(), frame_set[x + 1].clone())
149+
} else {
150+
self.cost_scenecut(
151+
frame_set[x].clone(),
152+
frame_set[x + 1].clone(),
153+
input_frameno,
154+
previous_keyframe,
155+
)
156+
};
157+
self.score_deque.push((result.inter_cost, result.intra_cost));
158+
}
159+
debug!("{:.0?}", self.score_deque)
160+
}
161+
162+
// Running single frame comparison and adding it to deque
122163
let result = if self.fast_mode {
123-
self.fast_scenecut(frame_set[0].clone(), frame_set[1].clone())
164+
self.fast_scenecut(
165+
frame_set[0 + self.deque_offset].clone(),
166+
frame_set[1 + self.deque_offset].clone(),
167+
)
124168
} else {
125169
self.cost_scenecut(
126-
frame_set[0].clone(),
127-
frame_set[1].clone(),
170+
frame_set[0 + self.deque_offset].clone(),
171+
frame_set[1 + self.deque_offset].clone(),
128172
input_frameno,
129173
previous_keyframe,
130174
)
131175
};
176+
self
177+
.score_deque
178+
.push((result.inter_cost as f64, result.intra_cost as f64));
179+
180+
// Adaptive scenecut check;
181+
let scenecut = self.adaptive_scenecut();
132182

133183
debug!(
134-
"[SC-Detect] Frame {}: T={:.1} P={:.1} {}",
184+
"[SC-Detect] Frame {}: Cost={:.0} Threshold= {:.0} {}",
135185
input_frameno,
136-
result.threshold,
137-
result.inter_cost,
138-
if result.has_scenecut { "Scenecut" } else { "No cut" }
186+
self.score_deque[self.score_deque.len() - self.deque_offset].0,
187+
self.score_deque[self.score_deque.len() - self.deque_offset].1,
188+
if scenecut { "Scenecut" } else { "No cut" }
139189
);
140-
result.has_scenecut
190+
191+
if scenecut {
192+
// Reset lookahead offset
193+
self.deque_offset = self.lookahead_offset;
194+
195+
// Clear buffers and deque
196+
self.frame_buffer.clear();
197+
self.score_deque.clear();
198+
} else {
199+
// Keep score deque 5 + lookahead_size frames
200+
self
201+
.score_deque
202+
.push((result.inter_cost as f64, result.intra_cost as f64));
203+
if self.score_deque.len() > 5 + self.deque_offset {
204+
self.score_deque.remove(0);
205+
}
206+
}
207+
208+
scenecut
141209
}
142210

143211
/// Compares current scene score to adapted threshold based on previous scores
212+
/// Value of current frame is offset by lookahead, if lookahead >=5
144213
/// Returns true if current scene score is higher than adapted threshold
145-
fn adaptive_scenecut(&mut self, scene_score: f64) -> bool {
146-
if self.score_deque.is_empty() {
147-
true // we skip high delta on first frame comparision as it's probably inside flashing or high motion scene
214+
fn adaptive_scenecut(&mut self) -> bool {
215+
// Max cost of all available frames
216+
let max_of_deque: f64 = self
217+
.score_deque
218+
.iter()
219+
.cloned()
220+
.map(|(_, b)| b)
221+
.fold(-1. / 0. /* -inf */, f64::max);
222+
223+
// Scenecut check
224+
let threshold = if self.fast_mode {
225+
self.threshold as f64 + max_of_deque
148226
} else {
149-
let max_of_deque: f64 = self
150-
.score_deque
151-
.iter()
152-
.cloned()
153-
.fold(-1. / 0. /* -inf */, f64::max); // max of last n(5) frames
154-
155-
//
156-
let scenecut = scene_score > self.threshold as f64 + max_of_deque;
157-
debug!(
158-
"[SC-Detect] P: {:.1} {:.1?} Cut: {}",
159-
scene_score, self.score_deque, scenecut
160-
);
161-
scenecut
162-
}
227+
max_of_deque
228+
};
229+
230+
let scene_score =
231+
self.score_deque[self.score_deque.len() - self.deque_offset].0;
232+
233+
let scenecut = scene_score >= threshold;
234+
scenecut
163235
}
164236

165237
/// The fast algorithm detects fast cuts using a raw difference
166238
/// in pixel values between the scaled frames.
167239
#[hawktracer(fast_scenecut)]
168240
fn fast_scenecut(
169241
&mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
170-
) -> ScenecutResult {
242+
) -> ScenecutData {
171243
// Downscaling both frames for comparison
172244
// Moving scaled frames to buffer
173245
if self.frame_buffer.is_empty() {
@@ -184,27 +256,10 @@ impl<T: Pixel> SceneChangeDetector<T> {
184256
let delta =
185257
self.delta_in_planes(&self.frame_buffer[0], &self.frame_buffer[1]);
186258

187-
// Adaptive scenecut check;
188-
let scenecut =
189-
delta >= self.threshold as f64 && self.adaptive_scenecut(delta);
190-
191-
if scenecut {
192-
// Clear buffers
193-
self.frame_buffer.clear();
194-
self.score_deque.clear();
195-
} else {
196-
// Keep score deque 5 frames
197-
self.score_deque.push(delta as f64);
198-
if self.score_deque.len() > 5 {
199-
self.score_deque.remove(0);
200-
}
201-
}
202-
203-
ScenecutResult {
259+
ScenecutData {
204260
intra_cost: self.threshold as f64,
205261
threshold: self.threshold as f64,
206262
inter_cost: delta as f64,
207-
has_scenecut: scenecut,
208263
}
209264
}
210265

@@ -217,7 +272,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
217272
fn cost_scenecut(
218273
&self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, frameno: u64,
219274
previous_keyframe: u64,
220-
) -> ScenecutResult {
275+
) -> ScenecutData {
221276
let frame2_ref2 = Arc::clone(&frame2);
222277
let (intra_cost, inter_cost) = crate::rayon::join(
223278
move || {
@@ -269,12 +324,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
269324
};
270325
let threshold = intra_cost * (1.0 - bias);
271326

272-
ScenecutResult {
273-
intra_cost,
274-
threshold,
275-
inter_cost,
276-
has_scenecut: inter_cost > threshold,
277-
}
327+
ScenecutData { intra_cost, inter_cost, threshold }
278328
}
279329

280330
/// Calculates delta beetween 2 planes
@@ -299,7 +349,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
299349
}
300350
}
301351

302-
/// Scaling factor for frame in scenedetection
352+
/// Scaling factor for frame in scene detection
303353
fn detect_scale_factor(sequence: &Arc<Sequence>) -> usize {
304354
let small_edge =
305355
cmp::min(sequence.max_frame_height, sequence.max_frame_width) as usize;
@@ -324,11 +374,9 @@ fn detect_scale_factor(sequence: &Arc<Sequence>) -> usize {
324374
}
325375

326376
/// This struct primarily exists for returning metrics to the caller
327-
/// for logging debug information.
328377
#[derive(Debug, Clone, Copy)]
329-
struct ScenecutResult {
378+
struct ScenecutData {
330379
intra_cost: f64,
331380
inter_cost: f64,
332381
threshold: f64,
333-
has_scenecut: bool,
334382
}

0 commit comments

Comments
 (0)