@@ -27,8 +27,12 @@ pub struct SceneChangeDetector<T: Pixel> {
27
27
scale_factor : usize ,
28
28
// Frame buffer for scaled frames
29
29
frame_buffer : Vec < Plane < T > > ,
30
+ // Deque offset for current
31
+ lookahead_offset : usize ,
32
+ // Start deque offset based on lookahead
33
+ deque_offset : usize ,
30
34
// Scenechange results for adaptive threshold
31
- score_deque : Vec < f64 > ,
35
+ score_deque : Vec < ( f64 , f64 ) > ,
32
36
/// Number of pixels in scaled frame for fast mode
33
37
pixels : usize ,
34
38
/// The bit depth of the video.
@@ -42,7 +46,7 @@ pub struct SceneChangeDetector<T: Pixel> {
42
46
impl < T : Pixel > SceneChangeDetector < T > {
43
47
pub fn new (
44
48
encoder_config : EncoderConfig , cpu_feature_level : CpuFeatureLevel ,
45
- sequence : Arc < Sequence > ,
49
+ lookahead_distance : usize , sequence : Arc < Sequence > ,
46
50
) -> Self {
47
51
// This implementation is based on a Python implementation at
48
52
// https://pyscenedetect.readthedocs.io/en/latest/reference/detection-methods/.
@@ -55,6 +59,9 @@ impl<T: Pixel> SceneChangeDetector<T> {
55
59
// This may be adjusted later.
56
60
//
57
61
// This threshold is only used for the fast scenecut implementation.
62
+ //
63
+ // Testing shown that default threshold of 12 overallocates keyframes by almost double,
64
+ // compared to other scene change implementations
58
65
const BASE_THRESHOLD : usize = 25 ;
59
66
let bit_depth = encoder_config. bit_depth ;
60
67
let fast_mode = encoder_config. speed_settings . fast_scene_detection
@@ -64,23 +71,30 @@ impl<T: Pixel> SceneChangeDetector<T> {
64
71
let scale_factor =
65
72
if fast_mode { detect_scale_factor ( & sequence) } else { 1_usize } ;
66
73
67
- let score_deque = Vec :: with_capacity ( 5 ) ;
68
- // Pixel count for fast scenedetect
74
+ // Set lookahead offset to 5 if normal lookahead available
75
+ let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 } ;
76
+ let deque_offset = lookahead_offset;
77
+
78
+ let score_deque = Vec :: with_capacity ( 5 + lookahead_distance) ;
69
79
80
+ // Pixel count for fast scenedetect
70
81
let pixels = if fast_mode {
71
82
( sequence. max_frame_height as usize / scale_factor)
72
83
* ( sequence. max_frame_width as usize / scale_factor)
73
84
} else {
74
85
1
75
86
} ;
76
87
77
- let frame_buffer = Vec :: with_capacity ( 2 ) ;
88
+ let frame_buffer =
89
+ if fast_mode { Vec :: with_capacity ( 2 ) } else { Vec :: new ( ) } ;
78
90
79
91
Self {
80
92
threshold : BASE_THRESHOLD * bit_depth / 8 ,
81
93
fast_mode,
82
94
scale_factor,
83
95
frame_buffer,
96
+ lookahead_offset,
97
+ deque_offset,
84
98
score_deque,
85
99
pixels,
86
100
bit_depth,
@@ -103,6 +117,9 @@ impl<T: Pixel> SceneChangeDetector<T> {
103
117
& mut self , frame_set : & [ Arc < Frame < T > > ] , input_frameno : u64 ,
104
118
previous_keyframe : u64 ,
105
119
) -> bool {
120
+ // Use score deque for adaptive threshold for scene cut
121
+ // Declare score_deque offset based on lookahead for scene change scores
122
+
106
123
// Find the distance to the previous keyframe.
107
124
let distance = input_frameno - previous_keyframe;
108
125
@@ -118,56 +135,111 @@ impl<T: Pixel> SceneChangeDetector<T> {
118
135
return false ;
119
136
}
120
137
121
- // Set our scenecut method
138
+ // Decrease deque offset if there is no more frames
139
+ if self . deque_offset > frame_set. len ( ) && self . lookahead_offset > 0 {
140
+ self . deque_offset = frame_set. len ( ) ;
141
+ }
142
+
143
+ // Initially fill score deque with forward frames
144
+ if self . deque_offset > 0 && self . score_deque . len ( ) == 0 {
145
+ for x in 0 ..self . lookahead_offset {
146
+ // Filling score deque with forward frames
147
+ let result = if self . fast_mode {
148
+ self . fast_scenecut ( frame_set[ x] . clone ( ) , frame_set[ x + 1 ] . clone ( ) )
149
+ } else {
150
+ self . cost_scenecut (
151
+ frame_set[ x] . clone ( ) ,
152
+ frame_set[ x + 1 ] . clone ( ) ,
153
+ input_frameno,
154
+ previous_keyframe,
155
+ )
156
+ } ;
157
+ self . score_deque . push ( ( result. inter_cost , result. intra_cost ) ) ;
158
+ }
159
+ debug ! ( "{:.0?}" , self . score_deque)
160
+ }
161
+
162
+ // Running single frame comparison and adding it to deque
122
163
let result = if self . fast_mode {
123
- self . fast_scenecut ( frame_set[ 0 ] . clone ( ) , frame_set[ 1 ] . clone ( ) )
164
+ self . fast_scenecut (
165
+ frame_set[ 0 + self . deque_offset ] . clone ( ) ,
166
+ frame_set[ 1 + self . deque_offset ] . clone ( ) ,
167
+ )
124
168
} else {
125
169
self . cost_scenecut (
126
- frame_set[ 0 ] . clone ( ) ,
127
- frame_set[ 1 ] . clone ( ) ,
170
+ frame_set[ 0 + self . deque_offset ] . clone ( ) ,
171
+ frame_set[ 1 + self . deque_offset ] . clone ( ) ,
128
172
input_frameno,
129
173
previous_keyframe,
130
174
)
131
175
} ;
176
+ self
177
+ . score_deque
178
+ . push ( ( result. inter_cost as f64 , result. intra_cost as f64 ) ) ;
179
+
180
+ // Adaptive scenecut check;
181
+ let scenecut = self . adaptive_scenecut ( ) ;
132
182
133
183
debug ! (
134
- "[SC-Detect] Frame {}: T ={:.1} P= {:.1 } {}" ,
184
+ "[SC-Detect] Frame {}: Cost ={:.0} Threshold= {:.0 } {}" ,
135
185
input_frameno,
136
- result . threshold ,
137
- result . inter_cost ,
138
- if result . has_scenecut { "Scenecut" } else { "No cut" }
186
+ self . score_deque [ self . score_deque . len ( ) - self . deque_offset ] . 0 ,
187
+ self . score_deque [ self . score_deque . len ( ) - self . deque_offset ] . 1 ,
188
+ if scenecut { "Scenecut" } else { "No cut" }
139
189
) ;
140
- result. has_scenecut
190
+
191
+ if scenecut {
192
+ // Reset lookahead offset
193
+ self . deque_offset = self . lookahead_offset ;
194
+
195
+ // Clear buffers and deque
196
+ self . frame_buffer . clear ( ) ;
197
+ self . score_deque . clear ( ) ;
198
+ } else {
199
+ // Keep score deque 5 + lookahead_size frames
200
+ self
201
+ . score_deque
202
+ . push ( ( result. inter_cost as f64 , result. intra_cost as f64 ) ) ;
203
+ if self . score_deque . len ( ) > 5 + self . deque_offset {
204
+ self . score_deque . remove ( 0 ) ;
205
+ }
206
+ }
207
+
208
+ scenecut
141
209
}
142
210
143
211
/// Compares current scene score to adapted threshold based on previous scores
212
+ /// Value of current frame is offset by lookahead, if lookahead >=5
144
213
/// Returns true if current scene score is higher than adapted threshold
145
- fn adaptive_scenecut ( & mut self , scene_score : f64 ) -> bool {
146
- if self . score_deque . is_empty ( ) {
147
- true // we skip high delta on first frame comparision as it's probably inside flashing or high motion scene
214
+ fn adaptive_scenecut ( & mut self ) -> bool {
215
+ // Max cost of all available frames
216
+ let max_of_deque: f64 = self
217
+ . score_deque
218
+ . iter ( )
219
+ . cloned ( )
220
+ . map ( |( _, b) | b)
221
+ . fold ( -1. / 0. /* -inf */ , f64:: max) ;
222
+
223
+ // Scenecut check
224
+ let threshold = if self . fast_mode {
225
+ self . threshold as f64 + max_of_deque
148
226
} else {
149
- let max_of_deque: f64 = self
150
- . score_deque
151
- . iter ( )
152
- . cloned ( )
153
- . fold ( -1. / 0. /* -inf */ , f64:: max) ; // max of last n(5) frames
154
-
155
- //
156
- let scenecut = scene_score > self . threshold as f64 + max_of_deque;
157
- debug ! (
158
- "[SC-Detect] P: {:.1} {:.1?} Cut: {}" ,
159
- scene_score, self . score_deque, scenecut
160
- ) ;
161
- scenecut
162
- }
227
+ max_of_deque
228
+ } ;
229
+
230
+ let scene_score =
231
+ self . score_deque [ self . score_deque . len ( ) - self . deque_offset ] . 0 ;
232
+
233
+ let scenecut = scene_score >= threshold;
234
+ scenecut
163
235
}
164
236
165
237
/// The fast algorithm detects fast cuts using a raw difference
166
238
/// in pixel values between the scaled frames.
167
239
#[ hawktracer( fast_scenecut) ]
168
240
fn fast_scenecut (
169
241
& mut self , frame1 : Arc < Frame < T > > , frame2 : Arc < Frame < T > > ,
170
- ) -> ScenecutResult {
242
+ ) -> ScenecutData {
171
243
// Downscaling both frames for comparison
172
244
// Moving scaled frames to buffer
173
245
if self . frame_buffer . is_empty ( ) {
@@ -184,27 +256,10 @@ impl<T: Pixel> SceneChangeDetector<T> {
184
256
let delta =
185
257
self . delta_in_planes ( & self . frame_buffer [ 0 ] , & self . frame_buffer [ 1 ] ) ;
186
258
187
- // Adaptive scenecut check;
188
- let scenecut =
189
- delta >= self . threshold as f64 && self . adaptive_scenecut ( delta) ;
190
-
191
- if scenecut {
192
- // Clear buffers
193
- self . frame_buffer . clear ( ) ;
194
- self . score_deque . clear ( ) ;
195
- } else {
196
- // Keep score deque 5 frames
197
- self . score_deque . push ( delta as f64 ) ;
198
- if self . score_deque . len ( ) > 5 {
199
- self . score_deque . remove ( 0 ) ;
200
- }
201
- }
202
-
203
- ScenecutResult {
259
+ ScenecutData {
204
260
intra_cost : self . threshold as f64 ,
205
261
threshold : self . threshold as f64 ,
206
262
inter_cost : delta as f64 ,
207
- has_scenecut : scenecut,
208
263
}
209
264
}
210
265
@@ -217,7 +272,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
217
272
fn cost_scenecut (
218
273
& self , frame1 : Arc < Frame < T > > , frame2 : Arc < Frame < T > > , frameno : u64 ,
219
274
previous_keyframe : u64 ,
220
- ) -> ScenecutResult {
275
+ ) -> ScenecutData {
221
276
let frame2_ref2 = Arc :: clone ( & frame2) ;
222
277
let ( intra_cost, inter_cost) = crate :: rayon:: join (
223
278
move || {
@@ -269,12 +324,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
269
324
} ;
270
325
let threshold = intra_cost * ( 1.0 - bias) ;
271
326
272
- ScenecutResult {
273
- intra_cost,
274
- threshold,
275
- inter_cost,
276
- has_scenecut : inter_cost > threshold,
277
- }
327
+ ScenecutData { intra_cost, inter_cost, threshold }
278
328
}
279
329
280
330
/// Calculates delta beetween 2 planes
@@ -299,7 +349,7 @@ impl<T: Pixel> SceneChangeDetector<T> {
299
349
}
300
350
}
301
351
302
- /// Scaling factor for frame in scenedetection
352
+ /// Scaling factor for frame in scene detection
303
353
fn detect_scale_factor ( sequence : & Arc < Sequence > ) -> usize {
304
354
let small_edge =
305
355
cmp:: min ( sequence. max_frame_height , sequence. max_frame_width ) as usize ;
@@ -324,11 +374,9 @@ fn detect_scale_factor(sequence: &Arc<Sequence>) -> usize {
324
374
}
325
375
326
376
/// This struct primarily exists for returning metrics to the caller
327
- /// for logging debug information.
328
377
#[ derive( Debug , Clone , Copy ) ]
329
- struct ScenecutResult {
378
+ struct ScenecutData {
330
379
intra_cost : f64 ,
331
380
inter_cost : f64 ,
332
381
threshold : f64 ,
333
- has_scenecut : bool ,
334
382
}
0 commit comments