blob: bd780cc379d5ce39afef672d3e51bcd695bdc99b
1 | /* |
2 | * This file is part of FFmpeg. |
3 | * |
4 | * FFmpeg is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * FFmpeg is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with FFmpeg; if not, write to the Free Software |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | /** |
20 | * @file |
21 | * Audio join filter |
22 | * |
23 | * Join multiple audio inputs as different channels in |
24 | * a single output |
25 | */ |
26 | |
27 | #include "libavutil/avassert.h" |
28 | #include "libavutil/channel_layout.h" |
29 | #include "libavutil/common.h" |
30 | #include "libavutil/opt.h" |
31 | |
32 | #include "audio.h" |
33 | #include "avfilter.h" |
34 | #include "formats.h" |
35 | #include "internal.h" |
36 | |
37 | typedef struct ChannelMap { |
38 | int input; ///< input stream index |
39 | int in_channel_idx; ///< index of in_channel in the input stream data |
40 | uint64_t in_channel; ///< layout describing the input channel |
41 | uint64_t out_channel; ///< layout describing the output channel |
42 | } ChannelMap; |
43 | |
44 | typedef struct JoinContext { |
45 | const AVClass *class; |
46 | |
47 | int inputs; |
48 | char *map; |
49 | char *channel_layout_str; |
50 | uint64_t channel_layout; |
51 | |
52 | int nb_channels; |
53 | ChannelMap *channels; |
54 | |
55 | /** |
56 | * Temporary storage for input frames, until we get one on each input. |
57 | */ |
58 | AVFrame **input_frames; |
59 | |
60 | /** |
61 | * Temporary storage for buffer references, for assembling the output frame. |
62 | */ |
63 | AVBufferRef **buffers; |
64 | } JoinContext; |
65 | |
66 | #define OFFSET(x) offsetof(JoinContext, x) |
67 | #define A AV_OPT_FLAG_AUDIO_PARAM |
68 | #define F AV_OPT_FLAG_FILTERING_PARAM |
69 | static const AVOption join_options[] = { |
70 | { "inputs", "Number of input streams.", OFFSET(inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, INT_MAX, A|F }, |
71 | { "channel_layout", "Channel layout of the " |
72 | "output stream.", OFFSET(channel_layout_str), AV_OPT_TYPE_STRING, {.str = "stereo"}, 0, 0, A|F }, |
73 | { "map", "A comma-separated list of channels maps in the format " |
74 | "'input_stream.input_channel-output_channel.", |
75 | OFFSET(map), AV_OPT_TYPE_STRING, .flags = A|F }, |
76 | { NULL } |
77 | }; |
78 | |
79 | AVFILTER_DEFINE_CLASS(join); |
80 | |
81 | static int try_push_frame(AVFilterContext *ctx); |
82 | |
83 | static int filter_frame(AVFilterLink *link, AVFrame *frame) |
84 | { |
85 | AVFilterContext *ctx = link->dst; |
86 | JoinContext *s = ctx->priv; |
87 | int i, j; |
88 | |
89 | for (i = 0; i < ctx->nb_inputs; i++) |
90 | if (link == ctx->inputs[i]) |
91 | break; |
92 | av_assert0(i < ctx->nb_inputs); |
93 | av_assert0(!s->input_frames[i]); |
94 | s->input_frames[i] = frame; |
95 | |
96 | /* request the same number of samples on all inputs */ |
97 | /* FIXME that means a frame arriving asynchronously on a different input |
98 | will not have the requested number of samples */ |
99 | if (i == 0) { |
100 | int nb_samples = s->input_frames[0]->nb_samples; |
101 | |
102 | for (j = 1; !i && j < ctx->nb_inputs; j++) |
103 | ctx->inputs[j]->request_samples = nb_samples; |
104 | } |
105 | |
106 | return try_push_frame(ctx); |
107 | } |
108 | |
109 | static int parse_maps(AVFilterContext *ctx) |
110 | { |
111 | JoinContext *s = ctx->priv; |
112 | char separator = '|'; |
113 | char *cur = s->map; |
114 | |
115 | #if FF_API_OLD_FILTER_OPTS |
116 | if (cur && strchr(cur, ',')) { |
117 | av_log(ctx, AV_LOG_WARNING, "This syntax is deprecated, use '|' to " |
118 | "separate the mappings.\n"); |
119 | separator = ','; |
120 | } |
121 | #endif |
122 | |
123 | while (cur && *cur) { |
124 | char *sep, *next, *p; |
125 | uint64_t in_channel = 0, out_channel = 0; |
126 | int input_idx, out_ch_idx, in_ch_idx; |
127 | |
128 | next = strchr(cur, separator); |
129 | if (next) |
130 | *next++ = 0; |
131 | |
132 | /* split the map into input and output parts */ |
133 | if (!(sep = strchr(cur, '-'))) { |
134 | av_log(ctx, AV_LOG_ERROR, "Missing separator '-' in channel " |
135 | "map '%s'\n", cur); |
136 | return AVERROR(EINVAL); |
137 | } |
138 | *sep++ = 0; |
139 | |
140 | #define PARSE_CHANNEL(str, var, inout) \ |
141 | if (!(var = av_get_channel_layout(str))) { \ |
142 | av_log(ctx, AV_LOG_ERROR, "Invalid " inout " channel: %s.\n", str);\ |
143 | return AVERROR(EINVAL); \ |
144 | } \ |
145 | if (av_get_channel_layout_nb_channels(var) != 1) { \ |
146 | av_log(ctx, AV_LOG_ERROR, "Channel map describes more than one " \ |
147 | inout " channel.\n"); \ |
148 | return AVERROR(EINVAL); \ |
149 | } |
150 | |
151 | /* parse output channel */ |
152 | PARSE_CHANNEL(sep, out_channel, "output"); |
153 | if (!(out_channel & s->channel_layout)) { |
154 | av_log(ctx, AV_LOG_ERROR, "Output channel '%s' is not present in " |
155 | "requested channel layout.\n", sep); |
156 | return AVERROR(EINVAL); |
157 | } |
158 | |
159 | out_ch_idx = av_get_channel_layout_channel_index(s->channel_layout, |
160 | out_channel); |
161 | if (s->channels[out_ch_idx].input >= 0) { |
162 | av_log(ctx, AV_LOG_ERROR, "Multiple maps for output channel " |
163 | "'%s'.\n", sep); |
164 | return AVERROR(EINVAL); |
165 | } |
166 | |
167 | /* parse input channel */ |
168 | input_idx = strtol(cur, &cur, 0); |
169 | if (input_idx < 0 || input_idx >= s->inputs) { |
170 | av_log(ctx, AV_LOG_ERROR, "Invalid input stream index: %d.\n", |
171 | input_idx); |
172 | return AVERROR(EINVAL); |
173 | } |
174 | |
175 | if (*cur) |
176 | cur++; |
177 | |
178 | in_ch_idx = strtol(cur, &p, 0); |
179 | if (p == cur) { |
180 | /* channel specifier is not a number, |
181 | * try to parse as channel name */ |
182 | PARSE_CHANNEL(cur, in_channel, "input"); |
183 | } |
184 | |
185 | s->channels[out_ch_idx].input = input_idx; |
186 | if (in_channel) |
187 | s->channels[out_ch_idx].in_channel = in_channel; |
188 | else |
189 | s->channels[out_ch_idx].in_channel_idx = in_ch_idx; |
190 | |
191 | cur = next; |
192 | } |
193 | return 0; |
194 | } |
195 | |
196 | static av_cold int join_init(AVFilterContext *ctx) |
197 | { |
198 | JoinContext *s = ctx->priv; |
199 | int ret, i; |
200 | |
201 | if (!(s->channel_layout = av_get_channel_layout(s->channel_layout_str))) { |
202 | av_log(ctx, AV_LOG_ERROR, "Error parsing channel layout '%s'.\n", |
203 | s->channel_layout_str); |
204 | return AVERROR(EINVAL); |
205 | } |
206 | |
207 | s->nb_channels = av_get_channel_layout_nb_channels(s->channel_layout); |
208 | s->channels = av_mallocz_array(s->nb_channels, sizeof(*s->channels)); |
209 | s->buffers = av_mallocz_array(s->nb_channels, sizeof(*s->buffers)); |
210 | s->input_frames = av_mallocz_array(s->inputs, sizeof(*s->input_frames)); |
211 | if (!s->channels || !s->buffers|| !s->input_frames) |
212 | return AVERROR(ENOMEM); |
213 | |
214 | for (i = 0; i < s->nb_channels; i++) { |
215 | s->channels[i].out_channel = av_channel_layout_extract_channel(s->channel_layout, i); |
216 | s->channels[i].input = -1; |
217 | } |
218 | |
219 | if ((ret = parse_maps(ctx)) < 0) |
220 | return ret; |
221 | |
222 | for (i = 0; i < s->inputs; i++) { |
223 | char name[32]; |
224 | AVFilterPad pad = { 0 }; |
225 | |
226 | snprintf(name, sizeof(name), "input%d", i); |
227 | pad.type = AVMEDIA_TYPE_AUDIO; |
228 | pad.name = av_strdup(name); |
229 | if (!pad.name) |
230 | return AVERROR(ENOMEM); |
231 | pad.filter_frame = filter_frame; |
232 | |
233 | pad.needs_fifo = 1; |
234 | |
235 | ff_insert_inpad(ctx, i, &pad); |
236 | } |
237 | |
238 | return 0; |
239 | } |
240 | |
241 | static av_cold void join_uninit(AVFilterContext *ctx) |
242 | { |
243 | JoinContext *s = ctx->priv; |
244 | int i; |
245 | |
246 | for (i = 0; i < ctx->nb_inputs; i++) { |
247 | av_freep(&ctx->input_pads[i].name); |
248 | av_frame_free(&s->input_frames[i]); |
249 | } |
250 | |
251 | av_freep(&s->channels); |
252 | av_freep(&s->buffers); |
253 | av_freep(&s->input_frames); |
254 | } |
255 | |
256 | static int join_query_formats(AVFilterContext *ctx) |
257 | { |
258 | JoinContext *s = ctx->priv; |
259 | AVFilterChannelLayouts *layouts = NULL; |
260 | int i, ret; |
261 | |
262 | if ((ret = ff_add_channel_layout(&layouts, s->channel_layout)) < 0 || |
263 | (ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0) |
264 | return ret; |
265 | |
266 | for (i = 0; i < ctx->nb_inputs; i++) { |
267 | layouts = ff_all_channel_layouts(); |
268 | if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0) |
269 | return ret; |
270 | } |
271 | |
272 | if ((ret = ff_set_common_formats(ctx, ff_planar_sample_fmts())) < 0 || |
273 | (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0) |
274 | return ret; |
275 | |
276 | return 0; |
277 | } |
278 | |
279 | static void guess_map_matching(AVFilterContext *ctx, ChannelMap *ch, |
280 | uint64_t *inputs) |
281 | { |
282 | int i; |
283 | |
284 | for (i = 0; i < ctx->nb_inputs; i++) { |
285 | AVFilterLink *link = ctx->inputs[i]; |
286 | |
287 | if (ch->out_channel & link->channel_layout && |
288 | !(ch->out_channel & inputs[i])) { |
289 | ch->input = i; |
290 | ch->in_channel = ch->out_channel; |
291 | inputs[i] |= ch->out_channel; |
292 | return; |
293 | } |
294 | } |
295 | } |
296 | |
297 | static void guess_map_any(AVFilterContext *ctx, ChannelMap *ch, |
298 | uint64_t *inputs) |
299 | { |
300 | int i; |
301 | |
302 | for (i = 0; i < ctx->nb_inputs; i++) { |
303 | AVFilterLink *link = ctx->inputs[i]; |
304 | |
305 | if ((inputs[i] & link->channel_layout) != link->channel_layout) { |
306 | uint64_t unused = link->channel_layout & ~inputs[i]; |
307 | |
308 | ch->input = i; |
309 | ch->in_channel = av_channel_layout_extract_channel(unused, 0); |
310 | inputs[i] |= ch->in_channel; |
311 | return; |
312 | } |
313 | } |
314 | } |
315 | |
316 | static int join_config_output(AVFilterLink *outlink) |
317 | { |
318 | AVFilterContext *ctx = outlink->src; |
319 | JoinContext *s = ctx->priv; |
320 | uint64_t *inputs; // nth element tracks which channels are used from nth input |
321 | int i, ret = 0; |
322 | |
323 | /* initialize inputs to user-specified mappings */ |
324 | if (!(inputs = av_mallocz_array(ctx->nb_inputs, sizeof(*inputs)))) |
325 | return AVERROR(ENOMEM); |
326 | for (i = 0; i < s->nb_channels; i++) { |
327 | ChannelMap *ch = &s->channels[i]; |
328 | AVFilterLink *inlink; |
329 | |
330 | if (ch->input < 0) |
331 | continue; |
332 | |
333 | inlink = ctx->inputs[ch->input]; |
334 | |
335 | if (!ch->in_channel) |
336 | ch->in_channel = av_channel_layout_extract_channel(inlink->channel_layout, |
337 | ch->in_channel_idx); |
338 | |
339 | if (!(ch->in_channel & inlink->channel_layout)) { |
340 | av_log(ctx, AV_LOG_ERROR, "Requested channel %s is not present in " |
341 | "input stream #%d.\n", av_get_channel_name(ch->in_channel), |
342 | ch->input); |
343 | ret = AVERROR(EINVAL); |
344 | goto fail; |
345 | } |
346 | |
347 | inputs[ch->input] |= ch->in_channel; |
348 | } |
349 | |
350 | /* guess channel maps when not explicitly defined */ |
351 | /* first try unused matching channels */ |
352 | for (i = 0; i < s->nb_channels; i++) { |
353 | ChannelMap *ch = &s->channels[i]; |
354 | |
355 | if (ch->input < 0) |
356 | guess_map_matching(ctx, ch, inputs); |
357 | } |
358 | |
359 | /* if the above failed, try to find _any_ unused input channel */ |
360 | for (i = 0; i < s->nb_channels; i++) { |
361 | ChannelMap *ch = &s->channels[i]; |
362 | |
363 | if (ch->input < 0) |
364 | guess_map_any(ctx, ch, inputs); |
365 | |
366 | if (ch->input < 0) { |
367 | av_log(ctx, AV_LOG_ERROR, "Could not find input channel for " |
368 | "output channel '%s'.\n", |
369 | av_get_channel_name(ch->out_channel)); |
370 | goto fail; |
371 | } |
372 | |
373 | ch->in_channel_idx = av_get_channel_layout_channel_index(ctx->inputs[ch->input]->channel_layout, |
374 | ch->in_channel); |
375 | } |
376 | |
377 | /* print mappings */ |
378 | av_log(ctx, AV_LOG_VERBOSE, "mappings: "); |
379 | for (i = 0; i < s->nb_channels; i++) { |
380 | ChannelMap *ch = &s->channels[i]; |
381 | av_log(ctx, AV_LOG_VERBOSE, "%d.%s => %s ", ch->input, |
382 | av_get_channel_name(ch->in_channel), |
383 | av_get_channel_name(ch->out_channel)); |
384 | } |
385 | av_log(ctx, AV_LOG_VERBOSE, "\n"); |
386 | |
387 | for (i = 0; i < ctx->nb_inputs; i++) { |
388 | if (!inputs[i]) |
389 | av_log(ctx, AV_LOG_WARNING, "No channels are used from input " |
390 | "stream %d.\n", i); |
391 | } |
392 | |
393 | fail: |
394 | av_freep(&inputs); |
395 | return ret; |
396 | } |
397 | |
398 | static int join_request_frame(AVFilterLink *outlink) |
399 | { |
400 | AVFilterContext *ctx = outlink->src; |
401 | JoinContext *s = ctx->priv; |
402 | int i; |
403 | |
404 | /* get a frame on each input */ |
405 | for (i = 0; i < ctx->nb_inputs; i++) { |
406 | AVFilterLink *inlink = ctx->inputs[i]; |
407 | if (!s->input_frames[i]) |
408 | return ff_request_frame(inlink); |
409 | } |
410 | return 0; |
411 | } |
412 | |
413 | static int try_push_frame(AVFilterContext *ctx) |
414 | { |
415 | AVFilterLink *outlink = ctx->outputs[0]; |
416 | JoinContext *s = ctx->priv; |
417 | AVFrame *frame; |
418 | int linesize = INT_MAX; |
419 | int nb_samples = INT_MAX; |
420 | int nb_buffers = 0; |
421 | int i, j, ret; |
422 | |
423 | for (i = 0; i < ctx->nb_inputs; i++) { |
424 | if (!s->input_frames[i]) |
425 | return 0; |
426 | nb_samples = FFMIN(nb_samples, s->input_frames[i]->nb_samples); |
427 | } |
428 | |
429 | /* setup the output frame */ |
430 | frame = av_frame_alloc(); |
431 | if (!frame) |
432 | return AVERROR(ENOMEM); |
433 | if (s->nb_channels > FF_ARRAY_ELEMS(frame->data)) { |
434 | frame->extended_data = av_mallocz_array(s->nb_channels, |
435 | sizeof(*frame->extended_data)); |
436 | if (!frame->extended_data) { |
437 | ret = AVERROR(ENOMEM); |
438 | goto fail; |
439 | } |
440 | } |
441 | |
442 | /* copy the data pointers */ |
443 | for (i = 0; i < s->nb_channels; i++) { |
444 | ChannelMap *ch = &s->channels[i]; |
445 | AVFrame *cur = s->input_frames[ch->input]; |
446 | AVBufferRef *buf; |
447 | |
448 | frame->extended_data[i] = cur->extended_data[ch->in_channel_idx]; |
449 | linesize = FFMIN(linesize, cur->linesize[0]); |
450 | |
451 | /* add the buffer where this plan is stored to the list if it's |
452 | * not already there */ |
453 | buf = av_frame_get_plane_buffer(cur, ch->in_channel_idx); |
454 | if (!buf) { |
455 | ret = AVERROR(EINVAL); |
456 | goto fail; |
457 | } |
458 | for (j = 0; j < nb_buffers; j++) |
459 | if (s->buffers[j]->buffer == buf->buffer) |
460 | break; |
461 | if (j == i) |
462 | s->buffers[nb_buffers++] = buf; |
463 | } |
464 | |
465 | /* create references to the buffers we copied to output */ |
466 | if (nb_buffers > FF_ARRAY_ELEMS(frame->buf)) { |
467 | frame->nb_extended_buf = nb_buffers - FF_ARRAY_ELEMS(frame->buf); |
468 | frame->extended_buf = av_mallocz_array(frame->nb_extended_buf, |
469 | sizeof(*frame->extended_buf)); |
470 | if (!frame->extended_buf) { |
471 | frame->nb_extended_buf = 0; |
472 | ret = AVERROR(ENOMEM); |
473 | goto fail; |
474 | } |
475 | } |
476 | for (i = 0; i < FFMIN(FF_ARRAY_ELEMS(frame->buf), nb_buffers); i++) { |
477 | frame->buf[i] = av_buffer_ref(s->buffers[i]); |
478 | if (!frame->buf[i]) { |
479 | ret = AVERROR(ENOMEM); |
480 | goto fail; |
481 | } |
482 | } |
483 | for (i = 0; i < frame->nb_extended_buf; i++) { |
484 | frame->extended_buf[i] = av_buffer_ref(s->buffers[i + |
485 | FF_ARRAY_ELEMS(frame->buf)]); |
486 | if (!frame->extended_buf[i]) { |
487 | ret = AVERROR(ENOMEM); |
488 | goto fail; |
489 | } |
490 | } |
491 | |
492 | frame->nb_samples = nb_samples; |
493 | frame->channel_layout = outlink->channel_layout; |
494 | av_frame_set_channels(frame, outlink->channels); |
495 | frame->sample_rate = outlink->sample_rate; |
496 | frame->format = outlink->format; |
497 | frame->pts = s->input_frames[0]->pts; |
498 | frame->linesize[0] = linesize; |
499 | if (frame->data != frame->extended_data) { |
500 | memcpy(frame->data, frame->extended_data, sizeof(*frame->data) * |
501 | FFMIN(FF_ARRAY_ELEMS(frame->data), s->nb_channels)); |
502 | } |
503 | |
504 | ret = ff_filter_frame(outlink, frame); |
505 | |
506 | for (i = 0; i < ctx->nb_inputs; i++) |
507 | av_frame_free(&s->input_frames[i]); |
508 | |
509 | return ret; |
510 | |
511 | fail: |
512 | av_frame_free(&frame); |
513 | return ret; |
514 | } |
515 | |
516 | static const AVFilterPad avfilter_af_join_outputs[] = { |
517 | { |
518 | .name = "default", |
519 | .type = AVMEDIA_TYPE_AUDIO, |
520 | .config_props = join_config_output, |
521 | .request_frame = join_request_frame, |
522 | }, |
523 | { NULL } |
524 | }; |
525 | |
526 | AVFilter ff_af_join = { |
527 | .name = "join", |
528 | .description = NULL_IF_CONFIG_SMALL("Join multiple audio streams into " |
529 | "multi-channel output."), |
530 | .priv_size = sizeof(JoinContext), |
531 | .priv_class = &join_class, |
532 | .init = join_init, |
533 | .uninit = join_uninit, |
534 | .query_formats = join_query_formats, |
535 | .inputs = NULL, |
536 | .outputs = avfilter_af_join_outputs, |
537 | .flags = AVFILTER_FLAG_DYNAMIC_INPUTS, |
538 | }; |
539 |