platform/external/ffmpeg.git - Unnamed repository; edit this file 'description' to name the repository.

1 /*
2  * Audio Mix Filter
3  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * Audio Mix Filter
25  *
26  * Mixes audio from multiple sources into a single output. The channel layout,
27  * sample rate, and sample format will be the same for all inputs and the
28  * output.
29  */
30
31 #include "libavutil/attributes.h"
32 #include "libavutil/audio_fifo.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/avstring.h"
35 #include "libavutil/channel_layout.h"
36 #include "libavutil/common.h"
37 #include "libavutil/float_dsp.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/opt.h"
40 #include "libavutil/samplefmt.h"
41
42 #include "audio.h"
43 #include "avfilter.h"
44 #include "formats.h"
45 #include "internal.h"
46
47 #define INPUT_ON       1    /**< input is active */
48 #define INPUT_EOF      2    /**< input has reached EOF (may still be active) */
49
50 #define DURATION_LONGEST  0
51 #define DURATION_SHORTEST 1
52 #define DURATION_FIRST    2
53
54
55 typedef struct FrameInfo {
56     int nb_samples;
57     int64_t pts;
58     struct FrameInfo *next;
59 } FrameInfo;
60
61 /**
62  * Linked list used to store timestamps and frame sizes of all frames in the
63  * FIFO for the first input.
64  *
65  * This is needed to keep timestamps synchronized for the case where multiple
66  * input frames are pushed to the filter for processing before a frame is
67  * requested by the output link.
68  */
69 typedef struct FrameList {
70     int nb_frames;
71     int nb_samples;
72     FrameInfo *list;
73     FrameInfo *end;
74 } FrameList;
75
76 static void frame_list_clear(FrameList *frame_list)
77 {
78     if (frame_list) {
79         while (frame_list->list) {
80             FrameInfo *info = frame_list->list;
81             frame_list->list = info->next;
82             av_free(info);
83         }
84         frame_list->nb_frames  = 0;
85         frame_list->nb_samples = 0;
86         frame_list->end        = NULL;
87     }
88 }
89
90 static int frame_list_next_frame_size(FrameList *frame_list)
91 {
92     if (!frame_list->list)
93         return 0;
94     return frame_list->list->nb_samples;
95 }
96
97 static int64_t frame_list_next_pts(FrameList *frame_list)
98 {
99     if (!frame_list->list)
100         return AV_NOPTS_VALUE;
101     return frame_list->list->pts;
102 }
103
104 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
105 {
106     if (nb_samples >= frame_list->nb_samples) {
107         frame_list_clear(frame_list);
108     } else {
109         int samples = nb_samples;
110         while (samples > 0) {
111             FrameInfo *info = frame_list->list;
112             av_assert0(info);
113             if (info->nb_samples <= samples) {
114                 samples -= info->nb_samples;
115                 frame_list->list = info->next;
116                 if (!frame_list->list)
117                     frame_list->end = NULL;
118                 frame_list->nb_frames--;
119                 frame_list->nb_samples -= info->nb_samples;
120                 av_free(info);
121             } else {
122                 info->nb_samples       -= samples;
123                 info->pts              += samples;
124                 frame_list->nb_samples -= samples;
125                 samples = 0;
126             }
127         }
128     }
129 }
130
131 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
132 {
133     FrameInfo *info = av_malloc(sizeof(*info));
134     if (!info)
135         return AVERROR(ENOMEM);
136     info->nb_samples = nb_samples;
137     info->pts        = pts;
138     info->next       = NULL;
139
140     if (!frame_list->list) {
141         frame_list->list = info;
142         frame_list->end  = info;
143     } else {
144         av_assert0(frame_list->end);
145         frame_list->end->next = info;
146         frame_list->end       = info;
147     }
148     frame_list->nb_frames++;
149     frame_list->nb_samples += nb_samples;
150
151     return 0;
152 }
153
154
155 typedef struct MixContext {
156     const AVClass *class;       /**< class for AVOptions */
157     AVFloatDSPContext *fdsp;
158
159     int nb_inputs;              /**< number of inputs */
160     int active_inputs;          /**< number of input currently active */
161     int duration_mode;          /**< mode for determining duration */
162     float dropout_transition;   /**< transition time when an input drops out */
163
164     int nb_channels;            /**< number of channels */
165     int sample_rate;            /**< sample rate */
166     int planar;
167     AVAudioFifo **fifos;        /**< audio fifo for each input */
168     uint8_t *input_state;       /**< current state of each input */
169     float *input_scale;         /**< mixing scale factor for each input */
170     float scale_norm;           /**< normalization factor for all inputs */
171     int64_t next_pts;           /**< calculated pts for next output frame */
172     FrameList *frame_list;      /**< list of frame info for the first input */
173 } MixContext;
174
175 #define OFFSET(x) offsetof(MixContext, x)
176 #define A AV_OPT_FLAG_AUDIO_PARAM
177 #define F AV_OPT_FLAG_FILTERING_PARAM
178 static const AVOption amix_options[] = {
179     { "inputs", "Number of inputs.",
180             OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
181     { "duration", "How to determine the end-of-stream.",
182             OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0,  2, A|F, "duration" },
183         { "longest",  "Duration of longest input.",  0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST  }, INT_MIN, INT_MAX, A|F, "duration" },
184         { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
185         { "first",    "Duration of first input.",    0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST    }, INT_MIN, INT_MAX, A|F, "duration" },
186     { "dropout_transition", "Transition time, in seconds, for volume "
187                             "renormalization when an input stream ends.",
188             OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
189     { NULL }
190 };
191
192 AVFILTER_DEFINE_CLASS(amix);
193
194 /**
195  * Update the scaling factors to apply to each input during mixing.
196  *
197  * This balances the full volume range between active inputs and handles
198  * volume transitions when EOF is encountered on an input but mixing continues
199  * with the remaining inputs.
200  */
201 static void calculate_scales(MixContext *s, int nb_samples)
202 {
203     int i;
204
205     if (s->scale_norm > s->active_inputs) {
206         s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
207         s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
208     }
209
210     for (i = 0; i < s->nb_inputs; i++) {
211         if (s->input_state[i] & INPUT_ON)
212             s->input_scale[i] = 1.0f / s->scale_norm;
213         else
214             s->input_scale[i] = 0.0f;
215     }
216 }
217
218 static int config_output(AVFilterLink *outlink)
219 {
220     AVFilterContext *ctx = outlink->src;
221     MixContext *s      = ctx->priv;
222     int i;
223     char buf[64];
224
225     s->planar          = av_sample_fmt_is_planar(outlink->format);
226     s->sample_rate     = outlink->sample_rate;
227     outlink->time_base = (AVRational){ 1, outlink->sample_rate };
228     s->next_pts        = AV_NOPTS_VALUE;
229
230     s->frame_list = av_mallocz(sizeof(*s->frame_list));
231     if (!s->frame_list)
232         return AVERROR(ENOMEM);
233
234     s->fifos = av_mallocz_array(s->nb_inputs, sizeof(*s->fifos));
235     if (!s->fifos)
236         return AVERROR(ENOMEM);
237
238     s->nb_channels = outlink->channels;
239     for (i = 0; i < s->nb_inputs; i++) {
240         s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
241         if (!s->fifos[i])
242             return AVERROR(ENOMEM);
243     }
244
245     s->input_state = av_malloc(s->nb_inputs);
246     if (!s->input_state)
247         return AVERROR(ENOMEM);
248     memset(s->input_state, INPUT_ON, s->nb_inputs);
249     s->active_inputs = s->nb_inputs;
250
251     s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
252     if (!s->input_scale)
253         return AVERROR(ENOMEM);
254     s->scale_norm = s->active_inputs;
255     calculate_scales(s, 0);
256
257     av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
258
259     av_log(ctx, AV_LOG_VERBOSE,
260            "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
261            av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
262
263     return 0;
264 }
265
266 static int calc_active_inputs(MixContext *s);
267
268 /**
269  * Read samples from the input FIFOs, mix, and write to the output link.
270  */
271 static int output_frame(AVFilterLink *outlink)
272 {
273     AVFilterContext *ctx = outlink->src;
274     MixContext      *s = ctx->priv;
275     AVFrame *out_buf, *in_buf;
276     int nb_samples, ns, ret, i;
277
278     ret = calc_active_inputs(s);
279     if (ret < 0)
280         return ret;
281
282     if (s->input_state[0] & INPUT_ON) {
283         /* first input live: use the corresponding frame size */
284         nb_samples = frame_list_next_frame_size(s->frame_list);
285         for (i = 1; i < s->nb_inputs; i++) {
286             if (s->input_state[i] & INPUT_ON) {
287                 ns = av_audio_fifo_size(s->fifos[i]);
288                 if (ns < nb_samples) {
289                     if (!(s->input_state[i] & INPUT_EOF))
290                         /* unclosed input with not enough samples */
291                         return 0;
292                     /* closed input to drain */
293                     nb_samples = ns;
294                 }
295             }
296         }
297     } else {
298         /* first input closed: use the available samples */
299         nb_samples = INT_MAX;
300         for (i = 1; i < s->nb_inputs; i++) {
301             if (s->input_state[i] & INPUT_ON) {
302                 ns = av_audio_fifo_size(s->fifos[i]);
303                 nb_samples = FFMIN(nb_samples, ns);
304             }
305         }
306         if (nb_samples == INT_MAX)
307             return AVERROR_EOF;
308     }
309
310     s->next_pts = frame_list_next_pts(s->frame_list);
311     frame_list_remove_samples(s->frame_list, nb_samples);
312
313     calculate_scales(s, nb_samples);
314
315     if (nb_samples == 0)
316         return 0;
317
318     out_buf = ff_get_audio_buffer(outlink, nb_samples);
319     if (!out_buf)
320         return AVERROR(ENOMEM);
321
322     in_buf = ff_get_audio_buffer(outlink, nb_samples);
323     if (!in_buf) {
324         av_frame_free(&out_buf);
325         return AVERROR(ENOMEM);
326     }
327
328     for (i = 0; i < s->nb_inputs; i++) {
329         if (s->input_state[i] & INPUT_ON) {
330             int planes, plane_size, p;
331
332             av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
333                                nb_samples);
334
335             planes     = s->planar ? s->nb_channels : 1;
336             plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
337             plane_size = FFALIGN(plane_size, 16);
338
339             for (p = 0; p < planes; p++) {
340                 s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p],
341                                            (float *) in_buf->extended_data[p],
342                                            s->input_scale[i], plane_size);
343             }
344         }
345     }
346     av_frame_free(&in_buf);
347
348     out_buf->pts = s->next_pts;
349     if (s->next_pts != AV_NOPTS_VALUE)
350         s->next_pts += nb_samples;
351
352     return ff_filter_frame(outlink, out_buf);
353 }
354
355 /**
356  * Requests a frame, if needed, from each input link other than the first.
357  */
358 static int request_samples(AVFilterContext *ctx, int min_samples)
359 {
360     MixContext *s = ctx->priv;
361     int i, ret;
362
363     av_assert0(s->nb_inputs > 1);
364
365     for (i = 1; i < s->nb_inputs; i++) {
366         ret = 0;
367         if (!(s->input_state[i] & INPUT_ON))
368             continue;
369         if (av_audio_fifo_size(s->fifos[i]) >= min_samples)
370             continue;
371         ret = ff_request_frame(ctx->inputs[i]);
372         if (ret == AVERROR_EOF) {
373             s->input_state[i] |= INPUT_EOF;
374             if (av_audio_fifo_size(s->fifos[i]) == 0) {
375                 s->input_state[i] = 0;
376                 continue;
377             }
378         } else if (ret < 0)
379             return ret;
380     }
381     return output_frame(ctx->outputs[0]);
382 }
383
384 /**
385  * Calculates the number of active inputs and determines EOF based on the
386  * duration option.
387  *
388  * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
389  */
390 static int calc_active_inputs(MixContext *s)
391 {
392     int i;
393     int active_inputs = 0;
394     for (i = 0; i < s->nb_inputs; i++)
395         active_inputs += !!(s->input_state[i] & INPUT_ON);
396     s->active_inputs = active_inputs;
397
398     if (!active_inputs ||
399         (s->duration_mode == DURATION_FIRST && !(s->input_state[0] & INPUT_ON)) ||
400         (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
401         return AVERROR_EOF;
402     return 0;
403 }
404
405 static int request_frame(AVFilterLink *outlink)
406 {
407     AVFilterContext *ctx = outlink->src;
408     MixContext      *s = ctx->priv;
409     int ret;
410     int wanted_samples;
411
412     ret = calc_active_inputs(s);
413     if (ret < 0)
414         return ret;
415
416     if (!(s->input_state[0] & INPUT_ON))
417         return request_samples(ctx, 1);
418
419     if (s->frame_list->nb_frames == 0) {
420         ret = ff_request_frame(ctx->inputs[0]);
421         if (ret == AVERROR_EOF) {
422             s->input_state[0] = 0;
423             if (s->nb_inputs == 1)
424                 return AVERROR_EOF;
425             return output_frame(ctx->outputs[0]);
426         }
427         return ret;
428     }
429     av_assert0(s->frame_list->nb_frames > 0);
430
431     wanted_samples = frame_list_next_frame_size(s->frame_list);
432
433     return request_samples(ctx, wanted_samples);
434 }
435
436 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
437 {
438     AVFilterContext  *ctx = inlink->dst;
439     MixContext       *s = ctx->priv;
440     AVFilterLink *outlink = ctx->outputs[0];
441     int i, ret = 0;
442
443     for (i = 0; i < ctx->nb_inputs; i++)
444         if (ctx->inputs[i] == inlink)
445             break;
446     if (i >= ctx->nb_inputs) {
447         av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
448         ret = AVERROR(EINVAL);
449         goto fail;
450     }
451
452     if (i == 0) {
453         int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
454                                    outlink->time_base);
455         ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
456         if (ret < 0)
457             goto fail;
458     }
459
460     ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
461                               buf->nb_samples);
462
463     av_frame_free(&buf);
464     return output_frame(outlink);
465
466 fail:
467     av_frame_free(&buf);
468
469     return ret;
470 }
471
472 static av_cold int init(AVFilterContext *ctx)
473 {
474     MixContext *s = ctx->priv;
475     int i;
476
477     for (i = 0; i < s->nb_inputs; i++) {
478         char name[32];
479         AVFilterPad pad = { 0 };
480
481         snprintf(name, sizeof(name), "input%d", i);
482         pad.type           = AVMEDIA_TYPE_AUDIO;
483         pad.name           = av_strdup(name);
484         if (!pad.name)
485             return AVERROR(ENOMEM);
486         pad.filter_frame   = filter_frame;
487
488         ff_insert_inpad(ctx, i, &pad);
489     }
490
491     s->fdsp = avpriv_float_dsp_alloc(0);
492     if (!s->fdsp)
493         return AVERROR(ENOMEM);
494
495     return 0;
496 }
497
498 static av_cold void uninit(AVFilterContext *ctx)
499 {
500     int i;
501     MixContext *s = ctx->priv;
502
503     if (s->fifos) {
504         for (i = 0; i < s->nb_inputs; i++)
505             av_audio_fifo_free(s->fifos[i]);
506         av_freep(&s->fifos);
507     }
508     frame_list_clear(s->frame_list);
509     av_freep(&s->frame_list);
510     av_freep(&s->input_state);
511     av_freep(&s->input_scale);
512     av_freep(&s->fdsp);
513
514     for (i = 0; i < ctx->nb_inputs; i++)
515         av_freep(&ctx->input_pads[i].name);
516 }
517
518 static int query_formats(AVFilterContext *ctx)
519 {
520     AVFilterFormats *formats = NULL;
521     AVFilterChannelLayouts *layouts;
522     int ret;
523
524     layouts = ff_all_channel_counts();
525     if (!layouts) {
526         ret = AVERROR(ENOMEM);
527         goto fail;
528     }
529
530     if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT ))          < 0 ||
531         (ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP))          < 0 ||
532         (ret = ff_set_common_formats        (ctx, formats))          < 0 ||
533         (ret = ff_set_common_channel_layouts(ctx, layouts))          < 0 ||
534         (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
535         goto fail;
536     return 0;
537 fail:
538     if (layouts)
539         av_freep(&layouts->channel_layouts);
540     av_freep(&layouts);
541     return ret;
542 }
543
544 static const AVFilterPad avfilter_af_amix_outputs[] = {
545     {
546         .name          = "default",
547         .type          = AVMEDIA_TYPE_AUDIO,
548         .config_props  = config_output,
549         .request_frame = request_frame
550     },
551     { NULL }
552 };
553
554 AVFilter ff_af_amix = {
555     .name           = "amix",
556     .description    = NULL_IF_CONFIG_SMALL("Audio mixing."),
557     .priv_size      = sizeof(MixContext),
558     .priv_class     = &amix_class,
559     .init           = init,
560     .uninit         = uninit,
561     .query_formats  = query_formats,
562     .inputs         = NULL,
563     .outputs        = avfilter_af_amix_outputs,
564     .flags          = AVFILTER_FLAG_DYNAMIC_INPUTS,
565 };
566
1	/*
2	* Audio Mix Filter
3	* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
4	*
5	* This file is part of FFmpeg.
6	*
7	* FFmpeg is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Lesser General Public
9	* License as published by the Free Software Foundation; either
10	* version 2.1 of the License, or (at your option) any later version.
11	*
12	* FFmpeg is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Lesser General Public License for more details.
16	*
17	* You should have received a copy of the GNU Lesser General Public
18	* License along with FFmpeg; if not, write to the Free Software
19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20	*/
21
22	/**
23	* @file
24	* Audio Mix Filter
25	*
26	* Mixes audio from multiple sources into a single output. The channel layout,
27	* sample rate, and sample format will be the same for all inputs and the
28	* output.
29	*/
30
31	#include "libavutil/attributes.h"
32	#include "libavutil/audio_fifo.h"
33	#include "libavutil/avassert.h"
34	#include "libavutil/avstring.h"
35	#include "libavutil/channel_layout.h"
36	#include "libavutil/common.h"
37	#include "libavutil/float_dsp.h"
38	#include "libavutil/mathematics.h"
39	#include "libavutil/opt.h"
40	#include "libavutil/samplefmt.h"
41
42	#include "audio.h"
43	#include "avfilter.h"
44	#include "formats.h"
45	#include "internal.h"
46
47	#define INPUT_ON 1 /*< input is active /
48	#define INPUT_EOF 2 /*< input has reached EOF (may still be active) /
49
50	#define DURATION_LONGEST 0
51	#define DURATION_SHORTEST 1
52	#define DURATION_FIRST 2
53
54
55	typedef struct FrameInfo {
56	int nb_samples;
57	int64_t pts;
58	struct FrameInfo *next;
59	} FrameInfo;
60
61	/**
62	* Linked list used to store timestamps and frame sizes of all frames in the
63	* FIFO for the first input.
64	*
65	* This is needed to keep timestamps synchronized for the case where multiple
66	* input frames are pushed to the filter for processing before a frame is
67	* requested by the output link.
68	*/
69	typedef struct FrameList {
70	int nb_frames;
71	int nb_samples;
72	FrameInfo *list;
73	FrameInfo *end;
74	} FrameList;
75
76	static void frame_list_clear(FrameList *frame_list)
77	{
78	if (frame_list) {
79	while (frame_list->list) {
80	FrameInfo *info = frame_list->list;
81	frame_list->list = info->next;
82	av_free(info);
83	}
84	frame_list->nb_frames = 0;
85	frame_list->nb_samples = 0;
86	frame_list->end = NULL;
87	}
88	}
89
90	static int frame_list_next_frame_size(FrameList *frame_list)
91	{
92	if (!frame_list->list)
93	return 0;
94	return frame_list->list->nb_samples;
95	}
96
97	static int64_t frame_list_next_pts(FrameList *frame_list)
98	{
99	if (!frame_list->list)
100	return AV_NOPTS_VALUE;
101	return frame_list->list->pts;
102	}
103
104	static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
105	{
106	if (nb_samples >= frame_list->nb_samples) {
107	frame_list_clear(frame_list);
108	} else {
109	int samples = nb_samples;
110	while (samples > 0) {
111	FrameInfo *info = frame_list->list;
112	av_assert0(info);
113	if (info->nb_samples <= samples) {
114	samples -= info->nb_samples;
115	frame_list->list = info->next;
116	if (!frame_list->list)
117	frame_list->end = NULL;
118	frame_list->nb_frames--;
119	frame_list->nb_samples -= info->nb_samples;
120	av_free(info);
121	} else {
122	info->nb_samples -= samples;
123	info->pts += samples;
124	frame_list->nb_samples -= samples;
125	samples = 0;
126	}
127	}
128	}
129	}
130
131	static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
132	{
133	FrameInfo info = av_malloc(sizeof(info));
134	if (!info)
135	return AVERROR(ENOMEM);
136	info->nb_samples = nb_samples;
137	info->pts = pts;
138	info->next = NULL;
139
140	if (!frame_list->list) {
141	frame_list->list = info;
142	frame_list->end = info;
143	} else {
144	av_assert0(frame_list->end);
145	frame_list->end->next = info;
146	frame_list->end = info;
147	}
148	frame_list->nb_frames++;
149	frame_list->nb_samples += nb_samples;
150
151	return 0;
152	}
153
154
155	typedef struct MixContext {
156	const AVClass class; /< class for AVOptions /
157	AVFloatDSPContext *fdsp;
158
159	int nb_inputs; /*< number of inputs /
160	int active_inputs; /*< number of input currently active /
161	int duration_mode; /*< mode for determining duration /
162	float dropout_transition; /*< transition time when an input drops out /
163
164	int nb_channels; /*< number of channels /
165	int sample_rate; /*< sample rate /
166	int planar;
167	AVAudioFifo fifos; /< audio fifo for each input */
168	uint8_t input_state; /< current state of each input /
169	float input_scale; /< mixing scale factor for each input /
170	float scale_norm; /*< normalization factor for all inputs /
171	int64_t next_pts; /*< calculated pts for next output frame /
172	FrameList frame_list; /< list of frame info for the first input /
173	} MixContext;
174
175	#define OFFSET(x) offsetof(MixContext, x)
176	#define A AV_OPT_FLAG_AUDIO_PARAM
177	#define F AV_OPT_FLAG_FILTERING_PARAM
178	static const AVOption amix_options[] = {
179	{ "inputs", "Number of inputs.",
180	OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A\|F },
181	{ "duration", "How to determine the end-of-stream.",
182	OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A\|F, "duration" },
183	{ "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A\|F, "duration" },
184	{ "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A\|F, "duration" },
185	{ "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A\|F, "duration" },
186	{ "dropout_transition", "Transition time, in seconds, for volume "
187	"renormalization when an input stream ends.",
188	OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A\|F },
189	{ NULL }
190	};
191
192	AVFILTER_DEFINE_CLASS(amix);
193
194	/**
195	* Update the scaling factors to apply to each input during mixing.
196	*
197	* This balances the full volume range between active inputs and handles
198	* volume transitions when EOF is encountered on an input but mixing continues
199	* with the remaining inputs.
200	*/
201	static void calculate_scales(MixContext *s, int nb_samples)
202	{
203	int i;
204
205	if (s->scale_norm > s->active_inputs) {
206	s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
207	s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
208	}
209
210	for (i = 0; i < s->nb_inputs; i++) {
211	if (s->input_state[i] & INPUT_ON)
212	s->input_scale[i] = 1.0f / s->scale_norm;
213	else
214	s->input_scale[i] = 0.0f;
215	}
216	}
217
218	static int config_output(AVFilterLink *outlink)
219	{
220	AVFilterContext *ctx = outlink->src;
221	MixContext *s = ctx->priv;
222	int i;
223	char buf[64];
224
225	s->planar = av_sample_fmt_is_planar(outlink->format);
226	s->sample_rate = outlink->sample_rate;
227	outlink->time_base = (AVRational){ 1, outlink->sample_rate };
228	s->next_pts = AV_NOPTS_VALUE;
229
230	s->frame_list = av_mallocz(sizeof(*s->frame_list));
231	if (!s->frame_list)
232	return AVERROR(ENOMEM);
233
234	s->fifos = av_mallocz_array(s->nb_inputs, sizeof(*s->fifos));
235	if (!s->fifos)
236	return AVERROR(ENOMEM);
237
238	s->nb_channels = outlink->channels;
239	for (i = 0; i < s->nb_inputs; i++) {
240	s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
241	if (!s->fifos[i])
242	return AVERROR(ENOMEM);
243	}
244
245	s->input_state = av_malloc(s->nb_inputs);
246	if (!s->input_state)
247	return AVERROR(ENOMEM);
248	memset(s->input_state, INPUT_ON, s->nb_inputs);
249	s->active_inputs = s->nb_inputs;
250
251	s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
252	if (!s->input_scale)
253	return AVERROR(ENOMEM);
254	s->scale_norm = s->active_inputs;
255	calculate_scales(s, 0);
256
257	av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
258
259	av_log(ctx, AV_LOG_VERBOSE,
260	"inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
261	av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
262
263	return 0;
264	}
265
266	static int calc_active_inputs(MixContext *s);
267
268	/**
269	* Read samples from the input FIFOs, mix, and write to the output link.
270	*/
271	static int output_frame(AVFilterLink *outlink)
272	{
273	AVFilterContext *ctx = outlink->src;
274	MixContext *s = ctx->priv;
275	AVFrame out_buf, in_buf;
276	int nb_samples, ns, ret, i;
277
278	ret = calc_active_inputs(s);
279	if (ret < 0)
280	return ret;
281
282	if (s->input_state[0] & INPUT_ON) {
283	/* first input live: use the corresponding frame size */
284	nb_samples = frame_list_next_frame_size(s->frame_list);
285	for (i = 1; i < s->nb_inputs; i++) {
286	if (s->input_state[i] & INPUT_ON) {
287	ns = av_audio_fifo_size(s->fifos[i]);
288	if (ns < nb_samples) {
289	if (!(s->input_state[i] & INPUT_EOF))
290	/* unclosed input with not enough samples */
291	return 0;
292	/* closed input to drain */
293	nb_samples = ns;
294	}
295	}
296	}
297	} else {
298	/* first input closed: use the available samples */
299	nb_samples = INT_MAX;
300	for (i = 1; i < s->nb_inputs; i++) {
301	if (s->input_state[i] & INPUT_ON) {
302	ns = av_audio_fifo_size(s->fifos[i]);
303	nb_samples = FFMIN(nb_samples, ns);
304	}
305	}
306	if (nb_samples == INT_MAX)
307	return AVERROR_EOF;
308	}
309
310	s->next_pts = frame_list_next_pts(s->frame_list);
311	frame_list_remove_samples(s->frame_list, nb_samples);
312
313	calculate_scales(s, nb_samples);
314
315	if (nb_samples == 0)
316	return 0;
317
318	out_buf = ff_get_audio_buffer(outlink, nb_samples);
319	if (!out_buf)
320	return AVERROR(ENOMEM);
321
322	in_buf = ff_get_audio_buffer(outlink, nb_samples);
323	if (!in_buf) {
324	av_frame_free(&out_buf);
325	return AVERROR(ENOMEM);
326	}
327
328	for (i = 0; i < s->nb_inputs; i++) {
329	if (s->input_state[i] & INPUT_ON) {
330	int planes, plane_size, p;
331
332	av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
333	nb_samples);
334
335	planes = s->planar ? s->nb_channels : 1;
336	plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
337	plane_size = FFALIGN(plane_size, 16);
338
339	for (p = 0; p < planes; p++) {
340	s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p],
341	(float *) in_buf->extended_data[p],
342	s->input_scale[i], plane_size);
343	}
344	}
345	}
346	av_frame_free(&in_buf);
347
348	out_buf->pts = s->next_pts;
349	if (s->next_pts != AV_NOPTS_VALUE)
350	s->next_pts += nb_samples;
351
352	return ff_filter_frame(outlink, out_buf);
353	}
354
355	/**
356	* Requests a frame, if needed, from each input link other than the first.
357	*/
358	static int request_samples(AVFilterContext *ctx, int min_samples)
359	{
360	MixContext *s = ctx->priv;
361	int i, ret;
362
363	av_assert0(s->nb_inputs > 1);
364
365	for (i = 1; i < s->nb_inputs; i++) {
366	ret = 0;
367	if (!(s->input_state[i] & INPUT_ON))
368	continue;
369	if (av_audio_fifo_size(s->fifos[i]) >= min_samples)
370	continue;
371	ret = ff_request_frame(ctx->inputs[i]);
372	if (ret == AVERROR_EOF) {
373	s->input_state[i] \|= INPUT_EOF;
374	if (av_audio_fifo_size(s->fifos[i]) == 0) {
375	s->input_state[i] = 0;
376	continue;
377	}
378	} else if (ret < 0)
379	return ret;
380	}
381	return output_frame(ctx->outputs[0]);
382	}
383
384	/**
385	* Calculates the number of active inputs and determines EOF based on the
386	* duration option.
387	*
388	* @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
389	*/
390	static int calc_active_inputs(MixContext *s)
391	{
392	int i;
393	int active_inputs = 0;
394	for (i = 0; i < s->nb_inputs; i++)
395	active_inputs += !!(s->input_state[i] & INPUT_ON);
396	s->active_inputs = active_inputs;
397
398	if (!active_inputs \|\|
399	(s->duration_mode == DURATION_FIRST && !(s->input_state[0] & INPUT_ON)) \|\|
400	(s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
401	return AVERROR_EOF;
402	return 0;
403	}
404
405	static int request_frame(AVFilterLink *outlink)
406	{
407	AVFilterContext *ctx = outlink->src;
408	MixContext *s = ctx->priv;
409	int ret;
410	int wanted_samples;
411
412	ret = calc_active_inputs(s);
413	if (ret < 0)
414	return ret;
415
416	if (!(s->input_state[0] & INPUT_ON))
417	return request_samples(ctx, 1);
418
419	if (s->frame_list->nb_frames == 0) {
420	ret = ff_request_frame(ctx->inputs[0]);
421	if (ret == AVERROR_EOF) {
422	s->input_state[0] = 0;
423	if (s->nb_inputs == 1)
424	return AVERROR_EOF;
425	return output_frame(ctx->outputs[0]);
426	}
427	return ret;
428	}
429	av_assert0(s->frame_list->nb_frames > 0);
430
431	wanted_samples = frame_list_next_frame_size(s->frame_list);
432
433	return request_samples(ctx, wanted_samples);
434	}
435
436	static int filter_frame(AVFilterLink inlink, AVFrame buf)
437	{
438	AVFilterContext *ctx = inlink->dst;
439	MixContext *s = ctx->priv;
440	AVFilterLink *outlink = ctx->outputs[0];
441	int i, ret = 0;
442
443	for (i = 0; i < ctx->nb_inputs; i++)
444	if (ctx->inputs[i] == inlink)
445	break;
446	if (i >= ctx->nb_inputs) {
447	av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
448	ret = AVERROR(EINVAL);
449	goto fail;
450	}
451
452	if (i == 0) {
453	int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
454	outlink->time_base);
455	ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
456	if (ret < 0)
457	goto fail;
458	}
459
460	ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
461	buf->nb_samples);
462
463	av_frame_free(&buf);
464	return output_frame(outlink);
465
466	fail:
467	av_frame_free(&buf);
468
469	return ret;
470	}
471
472	static av_cold int init(AVFilterContext *ctx)
473	{
474	MixContext *s = ctx->priv;
475	int i;
476
477	for (i = 0; i < s->nb_inputs; i++) {
478	char name[32];
479	AVFilterPad pad = { 0 };
480
481	snprintf(name, sizeof(name), "input%d", i);
482	pad.type = AVMEDIA_TYPE_AUDIO;
483	pad.name = av_strdup(name);
484	if (!pad.name)
485	return AVERROR(ENOMEM);
486	pad.filter_frame = filter_frame;
487
488	ff_insert_inpad(ctx, i, &pad);
489	}
490
491	s->fdsp = avpriv_float_dsp_alloc(0);
492	if (!s->fdsp)
493	return AVERROR(ENOMEM);
494
495	return 0;
496	}
497
498	static av_cold void uninit(AVFilterContext *ctx)
499	{
500	int i;
501	MixContext *s = ctx->priv;
502
503	if (s->fifos) {
504	for (i = 0; i < s->nb_inputs; i++)
505	av_audio_fifo_free(s->fifos[i]);
506	av_freep(&s->fifos);
507	}
508	frame_list_clear(s->frame_list);
509	av_freep(&s->frame_list);
510	av_freep(&s->input_state);
511	av_freep(&s->input_scale);
512	av_freep(&s->fdsp);
513
514	for (i = 0; i < ctx->nb_inputs; i++)
515	av_freep(&ctx->input_pads[i].name);
516	}
517
518	static int query_formats(AVFilterContext *ctx)
519	{
520	AVFilterFormats *formats = NULL;
521	AVFilterChannelLayouts *layouts;
522	int ret;
523
524	layouts = ff_all_channel_counts();
525	if (!layouts) {
526	ret = AVERROR(ENOMEM);
527	goto fail;
528	}
529
530	if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT )) < 0 \|\|
531	(ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP)) < 0 \|\|
532	(ret = ff_set_common_formats (ctx, formats)) < 0 \|\|
533	(ret = ff_set_common_channel_layouts(ctx, layouts)) < 0 \|\|
534	(ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
535	goto fail;
536	return 0;
537	fail:
538	if (layouts)
539	av_freep(&layouts->channel_layouts);
540	av_freep(&layouts);
541	return ret;
542	}
543
544	static const AVFilterPad avfilter_af_amix_outputs[] = {
545	{
546	.name = "default",
547	.type = AVMEDIA_TYPE_AUDIO,
548	.config_props = config_output,
549	.request_frame = request_frame
550	},
551	{ NULL }
552	};
553
554	AVFilter ff_af_amix = {
555	.name = "amix",
556	.description = NULL_IF_CONFIG_SMALL("Audio mixing."),
557	.priv_size = sizeof(MixContext),
558	.priv_class = &amix_class,
559	.init = init,
560	.uninit = uninit,
561	.query_formats = query_formats,
562	.inputs = NULL,
563	.outputs = avfilter_af_amix_outputs,
564	.flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
565	};
566