blob: 03c1c0f3f321880ddabb8db14095cafee139e21b
1 | /* |
2 | * Copyright (c) 2011 Stefano Sabatini |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | /** |
22 | * @file |
23 | * filter for selecting which frame passes in the filterchain |
24 | */ |
25 | |
26 | #include "libavutil/avstring.h" |
27 | #include "libavutil/eval.h" |
28 | #include "libavutil/fifo.h" |
29 | #include "libavutil/internal.h" |
30 | #include "libavutil/opt.h" |
31 | #include "libavutil/pixelutils.h" |
32 | #include "avfilter.h" |
33 | #include "audio.h" |
34 | #include "formats.h" |
35 | #include "internal.h" |
36 | #include "video.h" |
37 | |
38 | static const char *const var_names[] = { |
39 | "TB", ///< timebase |
40 | |
41 | "pts", ///< original pts in the file of the frame |
42 | "start_pts", ///< first PTS in the stream, expressed in TB units |
43 | "prev_pts", ///< previous frame PTS |
44 | "prev_selected_pts", ///< previous selected frame PTS |
45 | |
46 | "t", ///< timestamp expressed in seconds |
47 | "start_t", ///< first PTS in the stream, expressed in seconds |
48 | "prev_t", ///< previous frame time |
49 | "prev_selected_t", ///< previously selected time |
50 | |
51 | "pict_type", ///< the type of picture in the movie |
52 | "I", |
53 | "P", |
54 | "B", |
55 | "S", |
56 | "SI", |
57 | "SP", |
58 | "BI", |
59 | "PICT_TYPE_I", |
60 | "PICT_TYPE_P", |
61 | "PICT_TYPE_B", |
62 | "PICT_TYPE_S", |
63 | "PICT_TYPE_SI", |
64 | "PICT_TYPE_SP", |
65 | "PICT_TYPE_BI", |
66 | |
67 | "interlace_type", ///< the frame interlace type |
68 | "PROGRESSIVE", |
69 | "TOPFIRST", |
70 | "BOTTOMFIRST", |
71 | |
72 | "consumed_samples_n",///< number of samples consumed by the filter (only audio) |
73 | "samples_n", ///< number of samples in the current frame (only audio) |
74 | "sample_rate", ///< sample rate (only audio) |
75 | |
76 | "n", ///< frame number (starting from zero) |
77 | "selected_n", ///< selected frame number (starting from zero) |
78 | "prev_selected_n", ///< number of the last selected frame |
79 | |
80 | "key", ///< tell if the frame is a key frame |
81 | "pos", ///< original position in the file of the frame |
82 | |
83 | "scene", |
84 | |
85 | "concatdec_select", ///< frame is within the interval set by the concat demuxer |
86 | |
87 | NULL |
88 | }; |
89 | |
90 | enum var_name { |
91 | VAR_TB, |
92 | |
93 | VAR_PTS, |
94 | VAR_START_PTS, |
95 | VAR_PREV_PTS, |
96 | VAR_PREV_SELECTED_PTS, |
97 | |
98 | VAR_T, |
99 | VAR_START_T, |
100 | VAR_PREV_T, |
101 | VAR_PREV_SELECTED_T, |
102 | |
103 | VAR_PICT_TYPE, |
104 | VAR_I, |
105 | VAR_P, |
106 | VAR_B, |
107 | VAR_S, |
108 | VAR_SI, |
109 | VAR_SP, |
110 | VAR_BI, |
111 | VAR_PICT_TYPE_I, |
112 | VAR_PICT_TYPE_P, |
113 | VAR_PICT_TYPE_B, |
114 | VAR_PICT_TYPE_S, |
115 | VAR_PICT_TYPE_SI, |
116 | VAR_PICT_TYPE_SP, |
117 | VAR_PICT_TYPE_BI, |
118 | |
119 | VAR_INTERLACE_TYPE, |
120 | VAR_INTERLACE_TYPE_P, |
121 | VAR_INTERLACE_TYPE_T, |
122 | VAR_INTERLACE_TYPE_B, |
123 | |
124 | VAR_CONSUMED_SAMPLES_N, |
125 | VAR_SAMPLES_N, |
126 | VAR_SAMPLE_RATE, |
127 | |
128 | VAR_N, |
129 | VAR_SELECTED_N, |
130 | VAR_PREV_SELECTED_N, |
131 | |
132 | VAR_KEY, |
133 | VAR_POS, |
134 | |
135 | VAR_SCENE, |
136 | |
137 | VAR_CONCATDEC_SELECT, |
138 | |
139 | VAR_VARS_NB |
140 | }; |
141 | |
142 | typedef struct SelectContext { |
143 | const AVClass *class; |
144 | char *expr_str; |
145 | AVExpr *expr; |
146 | double var_values[VAR_VARS_NB]; |
147 | int do_scene_detect; ///< 1 if the expression requires scene detection variables, 0 otherwise |
148 | av_pixelutils_sad_fn sad; ///< Sum of the absolute difference function (scene detect only) |
149 | double prev_mafd; ///< previous MAFD (scene detect only) |
150 | AVFrame *prev_picref; ///< previous frame (scene detect only) |
151 | double select; |
152 | int select_out; ///< mark the selected output pad index |
153 | int nb_outputs; |
154 | } SelectContext; |
155 | |
156 | #define OFFSET(x) offsetof(SelectContext, x) |
157 | #define DEFINE_OPTIONS(filt_name, FLAGS) \ |
158 | static const AVOption filt_name##_options[] = { \ |
159 | { "expr", "set an expression to use for selecting frames", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "1" }, .flags=FLAGS }, \ |
160 | { "e", "set an expression to use for selecting frames", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "1" }, .flags=FLAGS }, \ |
161 | { "outputs", "set the number of outputs", OFFSET(nb_outputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, .flags=FLAGS }, \ |
162 | { "n", "set the number of outputs", OFFSET(nb_outputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, .flags=FLAGS }, \ |
163 | { NULL } \ |
164 | } |
165 | |
166 | static int request_frame(AVFilterLink *outlink); |
167 | |
168 | static av_cold int init(AVFilterContext *ctx) |
169 | { |
170 | SelectContext *select = ctx->priv; |
171 | int i, ret; |
172 | |
173 | if ((ret = av_expr_parse(&select->expr, select->expr_str, |
174 | var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) { |
175 | av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n", |
176 | select->expr_str); |
177 | return ret; |
178 | } |
179 | select->do_scene_detect = !!strstr(select->expr_str, "scene"); |
180 | |
181 | for (i = 0; i < select->nb_outputs; i++) { |
182 | AVFilterPad pad = { 0 }; |
183 | |
184 | pad.name = av_asprintf("output%d", i); |
185 | if (!pad.name) |
186 | return AVERROR(ENOMEM); |
187 | pad.type = ctx->filter->inputs[0].type; |
188 | pad.request_frame = request_frame; |
189 | ff_insert_outpad(ctx, i, &pad); |
190 | } |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | #define INTERLACE_TYPE_P 0 |
196 | #define INTERLACE_TYPE_T 1 |
197 | #define INTERLACE_TYPE_B 2 |
198 | |
199 | static int config_input(AVFilterLink *inlink) |
200 | { |
201 | SelectContext *select = inlink->dst->priv; |
202 | |
203 | select->var_values[VAR_N] = 0.0; |
204 | select->var_values[VAR_SELECTED_N] = 0.0; |
205 | |
206 | select->var_values[VAR_TB] = av_q2d(inlink->time_base); |
207 | |
208 | select->var_values[VAR_PREV_PTS] = NAN; |
209 | select->var_values[VAR_PREV_SELECTED_PTS] = NAN; |
210 | select->var_values[VAR_PREV_SELECTED_T] = NAN; |
211 | select->var_values[VAR_PREV_T] = NAN; |
212 | select->var_values[VAR_START_PTS] = NAN; |
213 | select->var_values[VAR_START_T] = NAN; |
214 | |
215 | select->var_values[VAR_I] = AV_PICTURE_TYPE_I; |
216 | select->var_values[VAR_P] = AV_PICTURE_TYPE_P; |
217 | select->var_values[VAR_B] = AV_PICTURE_TYPE_B; |
218 | select->var_values[VAR_SI] = AV_PICTURE_TYPE_SI; |
219 | select->var_values[VAR_SP] = AV_PICTURE_TYPE_SP; |
220 | select->var_values[VAR_BI] = AV_PICTURE_TYPE_BI; |
221 | select->var_values[VAR_PICT_TYPE_I] = AV_PICTURE_TYPE_I; |
222 | select->var_values[VAR_PICT_TYPE_P] = AV_PICTURE_TYPE_P; |
223 | select->var_values[VAR_PICT_TYPE_B] = AV_PICTURE_TYPE_B; |
224 | select->var_values[VAR_PICT_TYPE_SI] = AV_PICTURE_TYPE_SI; |
225 | select->var_values[VAR_PICT_TYPE_SP] = AV_PICTURE_TYPE_SP; |
226 | select->var_values[VAR_PICT_TYPE_BI] = AV_PICTURE_TYPE_BI; |
227 | |
228 | select->var_values[VAR_INTERLACE_TYPE_P] = INTERLACE_TYPE_P; |
229 | select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T; |
230 | select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B; |
231 | |
232 | select->var_values[VAR_PICT_TYPE] = NAN; |
233 | select->var_values[VAR_INTERLACE_TYPE] = NAN; |
234 | select->var_values[VAR_SCENE] = NAN; |
235 | select->var_values[VAR_CONSUMED_SAMPLES_N] = NAN; |
236 | select->var_values[VAR_SAMPLES_N] = NAN; |
237 | |
238 | select->var_values[VAR_SAMPLE_RATE] = |
239 | inlink->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN; |
240 | |
241 | if (select->do_scene_detect) { |
242 | select->sad = av_pixelutils_get_sad_fn(3, 3, 2, select); // 8x8 both sources aligned |
243 | if (!select->sad) |
244 | return AVERROR(EINVAL); |
245 | } |
246 | return 0; |
247 | } |
248 | |
249 | static double get_scene_score(AVFilterContext *ctx, AVFrame *frame) |
250 | { |
251 | double ret = 0; |
252 | SelectContext *select = ctx->priv; |
253 | AVFrame *prev_picref = select->prev_picref; |
254 | |
255 | if (prev_picref && |
256 | frame->height == prev_picref->height && |
257 | frame->width == prev_picref->width) { |
258 | int x, y, nb_sad = 0; |
259 | int64_t sad = 0; |
260 | double mafd, diff; |
261 | uint8_t *p1 = frame->data[0]; |
262 | uint8_t *p2 = prev_picref->data[0]; |
263 | const int p1_linesize = frame->linesize[0]; |
264 | const int p2_linesize = prev_picref->linesize[0]; |
265 | |
266 | for (y = 0; y < frame->height - 7; y += 8) { |
267 | for (x = 0; x < frame->width*3 - 7; x += 8) { |
268 | sad += select->sad(p1 + x, p1_linesize, p2 + x, p2_linesize); |
269 | nb_sad += 8 * 8; |
270 | } |
271 | p1 += 8 * p1_linesize; |
272 | p2 += 8 * p2_linesize; |
273 | } |
274 | emms_c(); |
275 | mafd = nb_sad ? (double)sad / nb_sad : 0; |
276 | diff = fabs(mafd - select->prev_mafd); |
277 | ret = av_clipf(FFMIN(mafd, diff) / 100., 0, 1); |
278 | select->prev_mafd = mafd; |
279 | av_frame_free(&prev_picref); |
280 | } |
281 | select->prev_picref = av_frame_clone(frame); |
282 | return ret; |
283 | } |
284 | |
285 | static double get_concatdec_select(AVFrame *frame, int64_t pts) |
286 | { |
287 | AVDictionary *metadata = av_frame_get_metadata(frame); |
288 | AVDictionaryEntry *start_time_entry = av_dict_get(metadata, "lavf.concatdec.start_time", NULL, 0); |
289 | AVDictionaryEntry *duration_entry = av_dict_get(metadata, "lavf.concatdec.duration", NULL, 0); |
290 | if (start_time_entry) { |
291 | int64_t start_time = strtoll(start_time_entry->value, NULL, 10); |
292 | if (pts >= start_time) { |
293 | if (duration_entry) { |
294 | int64_t duration = strtoll(duration_entry->value, NULL, 10); |
295 | if (pts < start_time + duration) |
296 | return -1; |
297 | else |
298 | return 0; |
299 | } |
300 | return -1; |
301 | } |
302 | return 0; |
303 | } |
304 | return NAN; |
305 | } |
306 | |
307 | #define D2TS(d) (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d)) |
308 | #define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts)) |
309 | |
310 | static void select_frame(AVFilterContext *ctx, AVFrame *frame) |
311 | { |
312 | SelectContext *select = ctx->priv; |
313 | AVFilterLink *inlink = ctx->inputs[0]; |
314 | double res; |
315 | |
316 | if (isnan(select->var_values[VAR_START_PTS])) |
317 | select->var_values[VAR_START_PTS] = TS2D(frame->pts); |
318 | if (isnan(select->var_values[VAR_START_T])) |
319 | select->var_values[VAR_START_T] = TS2D(frame->pts) * av_q2d(inlink->time_base); |
320 | |
321 | select->var_values[VAR_N ] = inlink->frame_count_out; |
322 | select->var_values[VAR_PTS] = TS2D(frame->pts); |
323 | select->var_values[VAR_T ] = TS2D(frame->pts) * av_q2d(inlink->time_base); |
324 | select->var_values[VAR_POS] = av_frame_get_pkt_pos(frame) == -1 ? NAN : av_frame_get_pkt_pos(frame); |
325 | select->var_values[VAR_KEY] = frame->key_frame; |
326 | select->var_values[VAR_CONCATDEC_SELECT] = get_concatdec_select(frame, av_rescale_q(frame->pts, inlink->time_base, AV_TIME_BASE_Q)); |
327 | |
328 | switch (inlink->type) { |
329 | case AVMEDIA_TYPE_AUDIO: |
330 | select->var_values[VAR_SAMPLES_N] = frame->nb_samples; |
331 | break; |
332 | |
333 | case AVMEDIA_TYPE_VIDEO: |
334 | select->var_values[VAR_INTERLACE_TYPE] = |
335 | !frame->interlaced_frame ? INTERLACE_TYPE_P : |
336 | frame->top_field_first ? INTERLACE_TYPE_T : INTERLACE_TYPE_B; |
337 | select->var_values[VAR_PICT_TYPE] = frame->pict_type; |
338 | if (select->do_scene_detect) { |
339 | char buf[32]; |
340 | select->var_values[VAR_SCENE] = get_scene_score(ctx, frame); |
341 | // TODO: document metadata |
342 | snprintf(buf, sizeof(buf), "%f", select->var_values[VAR_SCENE]); |
343 | av_dict_set(avpriv_frame_get_metadatap(frame), "lavfi.scene_score", buf, 0); |
344 | } |
345 | break; |
346 | } |
347 | |
348 | select->select = res = av_expr_eval(select->expr, select->var_values, NULL); |
349 | av_log(inlink->dst, AV_LOG_DEBUG, |
350 | "n:%f pts:%f t:%f key:%d", |
351 | select->var_values[VAR_N], |
352 | select->var_values[VAR_PTS], |
353 | select->var_values[VAR_T], |
354 | frame->key_frame); |
355 | |
356 | switch (inlink->type) { |
357 | case AVMEDIA_TYPE_VIDEO: |
358 | av_log(inlink->dst, AV_LOG_DEBUG, " interlace_type:%c pict_type:%c scene:%f", |
359 | (!frame->interlaced_frame) ? 'P' : |
360 | frame->top_field_first ? 'T' : 'B', |
361 | av_get_picture_type_char(frame->pict_type), |
362 | select->var_values[VAR_SCENE]); |
363 | break; |
364 | case AVMEDIA_TYPE_AUDIO: |
365 | av_log(inlink->dst, AV_LOG_DEBUG, " samples_n:%d consumed_samples_n:%f", |
366 | frame->nb_samples, |
367 | select->var_values[VAR_CONSUMED_SAMPLES_N]); |
368 | break; |
369 | } |
370 | |
371 | if (res == 0) { |
372 | select->select_out = -1; /* drop */ |
373 | } else if (isnan(res) || res < 0) { |
374 | select->select_out = 0; /* first output */ |
375 | } else { |
376 | select->select_out = FFMIN(ceilf(res)-1, select->nb_outputs-1); /* other outputs */ |
377 | } |
378 | |
379 | av_log(inlink->dst, AV_LOG_DEBUG, " -> select:%f select_out:%d\n", res, select->select_out); |
380 | |
381 | if (res) { |
382 | select->var_values[VAR_PREV_SELECTED_N] = select->var_values[VAR_N]; |
383 | select->var_values[VAR_PREV_SELECTED_PTS] = select->var_values[VAR_PTS]; |
384 | select->var_values[VAR_PREV_SELECTED_T] = select->var_values[VAR_T]; |
385 | select->var_values[VAR_SELECTED_N] += 1.0; |
386 | if (inlink->type == AVMEDIA_TYPE_AUDIO) |
387 | select->var_values[VAR_CONSUMED_SAMPLES_N] += frame->nb_samples; |
388 | } |
389 | |
390 | select->var_values[VAR_PREV_PTS] = select->var_values[VAR_PTS]; |
391 | select->var_values[VAR_PREV_T] = select->var_values[VAR_T]; |
392 | } |
393 | |
394 | static int filter_frame(AVFilterLink *inlink, AVFrame *frame) |
395 | { |
396 | AVFilterContext *ctx = inlink->dst; |
397 | SelectContext *select = ctx->priv; |
398 | |
399 | select_frame(ctx, frame); |
400 | if (select->select) |
401 | return ff_filter_frame(ctx->outputs[select->select_out], frame); |
402 | |
403 | av_frame_free(&frame); |
404 | return 0; |
405 | } |
406 | |
407 | static int request_frame(AVFilterLink *outlink) |
408 | { |
409 | AVFilterLink *inlink = outlink->src->inputs[0]; |
410 | int ret = ff_request_frame(inlink); |
411 | return ret; |
412 | } |
413 | |
414 | static av_cold void uninit(AVFilterContext *ctx) |
415 | { |
416 | SelectContext *select = ctx->priv; |
417 | int i; |
418 | |
419 | av_expr_free(select->expr); |
420 | select->expr = NULL; |
421 | |
422 | for (i = 0; i < ctx->nb_outputs; i++) |
423 | av_freep(&ctx->output_pads[i].name); |
424 | |
425 | if (select->do_scene_detect) { |
426 | av_frame_free(&select->prev_picref); |
427 | } |
428 | } |
429 | |
430 | static int query_formats(AVFilterContext *ctx) |
431 | { |
432 | SelectContext *select = ctx->priv; |
433 | |
434 | if (!select->do_scene_detect) { |
435 | return ff_default_query_formats(ctx); |
436 | } else { |
437 | int ret; |
438 | static const enum AVPixelFormat pix_fmts[] = { |
439 | AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, |
440 | AV_PIX_FMT_NONE |
441 | }; |
442 | AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); |
443 | |
444 | if (!fmts_list) |
445 | return AVERROR(ENOMEM); |
446 | ret = ff_set_common_formats(ctx, fmts_list); |
447 | if (ret < 0) |
448 | return ret; |
449 | } |
450 | return 0; |
451 | } |
452 | |
453 | #if CONFIG_ASELECT_FILTER |
454 | |
455 | DEFINE_OPTIONS(aselect, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM); |
456 | AVFILTER_DEFINE_CLASS(aselect); |
457 | |
458 | static av_cold int aselect_init(AVFilterContext *ctx) |
459 | { |
460 | SelectContext *select = ctx->priv; |
461 | int ret; |
462 | |
463 | if ((ret = init(ctx)) < 0) |
464 | return ret; |
465 | |
466 | if (select->do_scene_detect) { |
467 | av_log(ctx, AV_LOG_ERROR, "Scene detection is ignored in aselect filter\n"); |
468 | return AVERROR(EINVAL); |
469 | } |
470 | |
471 | return 0; |
472 | } |
473 | |
474 | static const AVFilterPad avfilter_af_aselect_inputs[] = { |
475 | { |
476 | .name = "default", |
477 | .type = AVMEDIA_TYPE_AUDIO, |
478 | .config_props = config_input, |
479 | .filter_frame = filter_frame, |
480 | }, |
481 | { NULL } |
482 | }; |
483 | |
484 | AVFilter ff_af_aselect = { |
485 | .name = "aselect", |
486 | .description = NULL_IF_CONFIG_SMALL("Select audio frames to pass in output."), |
487 | .init = aselect_init, |
488 | .uninit = uninit, |
489 | .priv_size = sizeof(SelectContext), |
490 | .inputs = avfilter_af_aselect_inputs, |
491 | .priv_class = &aselect_class, |
492 | .flags = AVFILTER_FLAG_DYNAMIC_OUTPUTS, |
493 | }; |
494 | #endif /* CONFIG_ASELECT_FILTER */ |
495 | |
496 | #if CONFIG_SELECT_FILTER |
497 | |
498 | DEFINE_OPTIONS(select, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM); |
499 | AVFILTER_DEFINE_CLASS(select); |
500 | |
501 | static av_cold int select_init(AVFilterContext *ctx) |
502 | { |
503 | int ret; |
504 | |
505 | if ((ret = init(ctx)) < 0) |
506 | return ret; |
507 | |
508 | return 0; |
509 | } |
510 | |
511 | static const AVFilterPad avfilter_vf_select_inputs[] = { |
512 | { |
513 | .name = "default", |
514 | .type = AVMEDIA_TYPE_VIDEO, |
515 | .config_props = config_input, |
516 | .filter_frame = filter_frame, |
517 | }, |
518 | { NULL } |
519 | }; |
520 | |
521 | AVFilter ff_vf_select = { |
522 | .name = "select", |
523 | .description = NULL_IF_CONFIG_SMALL("Select video frames to pass in output."), |
524 | .init = select_init, |
525 | .uninit = uninit, |
526 | .query_formats = query_formats, |
527 | .priv_size = sizeof(SelectContext), |
528 | .priv_class = &select_class, |
529 | .inputs = avfilter_vf_select_inputs, |
530 | .flags = AVFILTER_FLAG_DYNAMIC_OUTPUTS, |
531 | }; |
532 | #endif /* CONFIG_SELECT_FILTER */ |
533 |