blob: aa5b28e6475dcdb202a269cba5d4c33372711ffd
1 | /* |
2 | * Dynamic Audio Normalizer |
3 | * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved. |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * Dynamic Audio Normalizer |
25 | */ |
26 | |
27 | #include <float.h> |
28 | |
29 | #include "libavutil/avassert.h" |
30 | #include "libavutil/opt.h" |
31 | |
32 | #define FF_BUFQUEUE_SIZE 302 |
33 | #include "libavfilter/bufferqueue.h" |
34 | |
35 | #include "audio.h" |
36 | #include "avfilter.h" |
37 | #include "internal.h" |
38 | |
39 | typedef struct cqueue { |
40 | double *elements; |
41 | int size; |
42 | int nb_elements; |
43 | int first; |
44 | } cqueue; |
45 | |
46 | typedef struct DynamicAudioNormalizerContext { |
47 | const AVClass *class; |
48 | |
49 | struct FFBufQueue queue; |
50 | |
51 | int frame_len; |
52 | int frame_len_msec; |
53 | int filter_size; |
54 | int dc_correction; |
55 | int channels_coupled; |
56 | int alt_boundary_mode; |
57 | |
58 | double peak_value; |
59 | double max_amplification; |
60 | double target_rms; |
61 | double compress_factor; |
62 | double *prev_amplification_factor; |
63 | double *dc_correction_value; |
64 | double *compress_threshold; |
65 | double *fade_factors[2]; |
66 | double *weights; |
67 | |
68 | int channels; |
69 | int delay; |
70 | |
71 | cqueue **gain_history_original; |
72 | cqueue **gain_history_minimum; |
73 | cqueue **gain_history_smoothed; |
74 | } DynamicAudioNormalizerContext; |
75 | |
76 | #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x) |
77 | #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
78 | |
79 | static const AVOption dynaudnorm_options[] = { |
80 | { "f", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS }, |
81 | { "g", "set the filter size", OFFSET(filter_size), AV_OPT_TYPE_INT, {.i64 = 31}, 3, 301, FLAGS }, |
82 | { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS }, |
83 | { "m", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS }, |
84 | { "r", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS }, |
85 | { "n", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS }, |
86 | { "c", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS }, |
87 | { "b", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS }, |
88 | { "s", "set the compress factor", OFFSET(compress_factor), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 30.0, FLAGS }, |
89 | { NULL } |
90 | }; |
91 | |
92 | AVFILTER_DEFINE_CLASS(dynaudnorm); |
93 | |
94 | static av_cold int init(AVFilterContext *ctx) |
95 | { |
96 | DynamicAudioNormalizerContext *s = ctx->priv; |
97 | |
98 | if (!(s->filter_size & 1)) { |
99 | av_log(ctx, AV_LOG_ERROR, "filter size %d is invalid. Must be an odd value.\n", s->filter_size); |
100 | return AVERROR(EINVAL); |
101 | } |
102 | |
103 | return 0; |
104 | } |
105 | |
106 | static int query_formats(AVFilterContext *ctx) |
107 | { |
108 | AVFilterFormats *formats; |
109 | AVFilterChannelLayouts *layouts; |
110 | static const enum AVSampleFormat sample_fmts[] = { |
111 | AV_SAMPLE_FMT_DBLP, |
112 | AV_SAMPLE_FMT_NONE |
113 | }; |
114 | int ret; |
115 | |
116 | layouts = ff_all_channel_counts(); |
117 | if (!layouts) |
118 | return AVERROR(ENOMEM); |
119 | ret = ff_set_common_channel_layouts(ctx, layouts); |
120 | if (ret < 0) |
121 | return ret; |
122 | |
123 | formats = ff_make_format_list(sample_fmts); |
124 | if (!formats) |
125 | return AVERROR(ENOMEM); |
126 | ret = ff_set_common_formats(ctx, formats); |
127 | if (ret < 0) |
128 | return ret; |
129 | |
130 | formats = ff_all_samplerates(); |
131 | if (!formats) |
132 | return AVERROR(ENOMEM); |
133 | return ff_set_common_samplerates(ctx, formats); |
134 | } |
135 | |
136 | static inline int frame_size(int sample_rate, int frame_len_msec) |
137 | { |
138 | const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0)); |
139 | return frame_size + (frame_size % 2); |
140 | } |
141 | |
142 | static void precalculate_fade_factors(double *fade_factors[2], int frame_len) |
143 | { |
144 | const double step_size = 1.0 / frame_len; |
145 | int pos; |
146 | |
147 | for (pos = 0; pos < frame_len; pos++) { |
148 | fade_factors[0][pos] = 1.0 - (step_size * (pos + 1.0)); |
149 | fade_factors[1][pos] = 1.0 - fade_factors[0][pos]; |
150 | } |
151 | } |
152 | |
153 | static cqueue *cqueue_create(int size) |
154 | { |
155 | cqueue *q; |
156 | |
157 | q = av_malloc(sizeof(cqueue)); |
158 | if (!q) |
159 | return NULL; |
160 | |
161 | q->size = size; |
162 | q->nb_elements = 0; |
163 | q->first = 0; |
164 | |
165 | q->elements = av_malloc_array(size, sizeof(double)); |
166 | if (!q->elements) { |
167 | av_free(q); |
168 | return NULL; |
169 | } |
170 | |
171 | return q; |
172 | } |
173 | |
174 | static void cqueue_free(cqueue *q) |
175 | { |
176 | if (q) |
177 | av_free(q->elements); |
178 | av_free(q); |
179 | } |
180 | |
181 | static int cqueue_size(cqueue *q) |
182 | { |
183 | return q->nb_elements; |
184 | } |
185 | |
186 | static int cqueue_empty(cqueue *q) |
187 | { |
188 | return !q->nb_elements; |
189 | } |
190 | |
191 | static int cqueue_enqueue(cqueue *q, double element) |
192 | { |
193 | int i; |
194 | |
195 | av_assert2(q->nb_elements != q->size); |
196 | |
197 | i = (q->first + q->nb_elements) % q->size; |
198 | q->elements[i] = element; |
199 | q->nb_elements++; |
200 | |
201 | return 0; |
202 | } |
203 | |
204 | static double cqueue_peek(cqueue *q, int index) |
205 | { |
206 | av_assert2(index < q->nb_elements); |
207 | return q->elements[(q->first + index) % q->size]; |
208 | } |
209 | |
210 | static int cqueue_dequeue(cqueue *q, double *element) |
211 | { |
212 | av_assert2(!cqueue_empty(q)); |
213 | |
214 | *element = q->elements[q->first]; |
215 | q->first = (q->first + 1) % q->size; |
216 | q->nb_elements--; |
217 | |
218 | return 0; |
219 | } |
220 | |
221 | static int cqueue_pop(cqueue *q) |
222 | { |
223 | av_assert2(!cqueue_empty(q)); |
224 | |
225 | q->first = (q->first + 1) % q->size; |
226 | q->nb_elements--; |
227 | |
228 | return 0; |
229 | } |
230 | |
231 | static void init_gaussian_filter(DynamicAudioNormalizerContext *s) |
232 | { |
233 | double total_weight = 0.0; |
234 | const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0); |
235 | double adjust; |
236 | int i; |
237 | |
238 | // Pre-compute constants |
239 | const int offset = s->filter_size / 2; |
240 | const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI)); |
241 | const double c2 = 2.0 * sigma * sigma; |
242 | |
243 | // Compute weights |
244 | for (i = 0; i < s->filter_size; i++) { |
245 | const int x = i - offset; |
246 | |
247 | s->weights[i] = c1 * exp(-x * x / c2); |
248 | total_weight += s->weights[i]; |
249 | } |
250 | |
251 | // Adjust weights |
252 | adjust = 1.0 / total_weight; |
253 | for (i = 0; i < s->filter_size; i++) { |
254 | s->weights[i] *= adjust; |
255 | } |
256 | } |
257 | |
258 | static av_cold void uninit(AVFilterContext *ctx) |
259 | { |
260 | DynamicAudioNormalizerContext *s = ctx->priv; |
261 | int c; |
262 | |
263 | av_freep(&s->prev_amplification_factor); |
264 | av_freep(&s->dc_correction_value); |
265 | av_freep(&s->compress_threshold); |
266 | av_freep(&s->fade_factors[0]); |
267 | av_freep(&s->fade_factors[1]); |
268 | |
269 | for (c = 0; c < s->channels; c++) { |
270 | if (s->gain_history_original) |
271 | cqueue_free(s->gain_history_original[c]); |
272 | if (s->gain_history_minimum) |
273 | cqueue_free(s->gain_history_minimum[c]); |
274 | if (s->gain_history_smoothed) |
275 | cqueue_free(s->gain_history_smoothed[c]); |
276 | } |
277 | |
278 | av_freep(&s->gain_history_original); |
279 | av_freep(&s->gain_history_minimum); |
280 | av_freep(&s->gain_history_smoothed); |
281 | |
282 | av_freep(&s->weights); |
283 | |
284 | ff_bufqueue_discard_all(&s->queue); |
285 | } |
286 | |
287 | static int config_input(AVFilterLink *inlink) |
288 | { |
289 | AVFilterContext *ctx = inlink->dst; |
290 | DynamicAudioNormalizerContext *s = ctx->priv; |
291 | int c; |
292 | |
293 | uninit(ctx); |
294 | |
295 | s->frame_len = |
296 | inlink->min_samples = |
297 | inlink->max_samples = |
298 | inlink->partial_buf_size = frame_size(inlink->sample_rate, s->frame_len_msec); |
299 | av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len); |
300 | |
301 | s->fade_factors[0] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[0])); |
302 | s->fade_factors[1] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[1])); |
303 | |
304 | s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor)); |
305 | s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value)); |
306 | s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold)); |
307 | s->gain_history_original = av_calloc(inlink->channels, sizeof(*s->gain_history_original)); |
308 | s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum)); |
309 | s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed)); |
310 | s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights)); |
311 | if (!s->prev_amplification_factor || !s->dc_correction_value || |
312 | !s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] || |
313 | !s->gain_history_original || !s->gain_history_minimum || |
314 | !s->gain_history_smoothed || !s->weights) |
315 | return AVERROR(ENOMEM); |
316 | |
317 | for (c = 0; c < inlink->channels; c++) { |
318 | s->prev_amplification_factor[c] = 1.0; |
319 | |
320 | s->gain_history_original[c] = cqueue_create(s->filter_size); |
321 | s->gain_history_minimum[c] = cqueue_create(s->filter_size); |
322 | s->gain_history_smoothed[c] = cqueue_create(s->filter_size); |
323 | |
324 | if (!s->gain_history_original[c] || !s->gain_history_minimum[c] || |
325 | !s->gain_history_smoothed[c]) |
326 | return AVERROR(ENOMEM); |
327 | } |
328 | |
329 | precalculate_fade_factors(s->fade_factors, s->frame_len); |
330 | init_gaussian_filter(s); |
331 | |
332 | s->channels = inlink->channels; |
333 | s->delay = s->filter_size; |
334 | |
335 | return 0; |
336 | } |
337 | |
338 | static inline double fade(double prev, double next, int pos, |
339 | double *fade_factors[2]) |
340 | { |
341 | return fade_factors[0][pos] * prev + fade_factors[1][pos] * next; |
342 | } |
343 | |
344 | static inline double pow_2(const double value) |
345 | { |
346 | return value * value; |
347 | } |
348 | |
349 | static inline double bound(const double threshold, const double val) |
350 | { |
351 | const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0 |
352 | return erf(CONST * (val / threshold)) * threshold; |
353 | } |
354 | |
355 | static double find_peak_magnitude(AVFrame *frame, int channel) |
356 | { |
357 | double max = DBL_EPSILON; |
358 | int c, i; |
359 | |
360 | if (channel == -1) { |
361 | for (c = 0; c < av_frame_get_channels(frame); c++) { |
362 | double *data_ptr = (double *)frame->extended_data[c]; |
363 | |
364 | for (i = 0; i < frame->nb_samples; i++) |
365 | max = FFMAX(max, fabs(data_ptr[i])); |
366 | } |
367 | } else { |
368 | double *data_ptr = (double *)frame->extended_data[channel]; |
369 | |
370 | for (i = 0; i < frame->nb_samples; i++) |
371 | max = FFMAX(max, fabs(data_ptr[i])); |
372 | } |
373 | |
374 | return max; |
375 | } |
376 | |
377 | static double compute_frame_rms(AVFrame *frame, int channel) |
378 | { |
379 | double rms_value = 0.0; |
380 | int c, i; |
381 | |
382 | if (channel == -1) { |
383 | for (c = 0; c < av_frame_get_channels(frame); c++) { |
384 | const double *data_ptr = (double *)frame->extended_data[c]; |
385 | |
386 | for (i = 0; i < frame->nb_samples; i++) { |
387 | rms_value += pow_2(data_ptr[i]); |
388 | } |
389 | } |
390 | |
391 | rms_value /= frame->nb_samples * av_frame_get_channels(frame); |
392 | } else { |
393 | const double *data_ptr = (double *)frame->extended_data[channel]; |
394 | for (i = 0; i < frame->nb_samples; i++) { |
395 | rms_value += pow_2(data_ptr[i]); |
396 | } |
397 | |
398 | rms_value /= frame->nb_samples; |
399 | } |
400 | |
401 | return FFMAX(sqrt(rms_value), DBL_EPSILON); |
402 | } |
403 | |
404 | static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame, |
405 | int channel) |
406 | { |
407 | const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel); |
408 | const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX; |
409 | return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain)); |
410 | } |
411 | |
412 | static double minimum_filter(cqueue *q) |
413 | { |
414 | double min = DBL_MAX; |
415 | int i; |
416 | |
417 | for (i = 0; i < cqueue_size(q); i++) { |
418 | min = FFMIN(min, cqueue_peek(q, i)); |
419 | } |
420 | |
421 | return min; |
422 | } |
423 | |
424 | static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q) |
425 | { |
426 | double result = 0.0; |
427 | int i; |
428 | |
429 | for (i = 0; i < cqueue_size(q); i++) { |
430 | result += cqueue_peek(q, i) * s->weights[i]; |
431 | } |
432 | |
433 | return result; |
434 | } |
435 | |
436 | static void update_gain_history(DynamicAudioNormalizerContext *s, int channel, |
437 | double current_gain_factor) |
438 | { |
439 | if (cqueue_empty(s->gain_history_original[channel]) || |
440 | cqueue_empty(s->gain_history_minimum[channel])) { |
441 | const int pre_fill_size = s->filter_size / 2; |
442 | const double initial_value = s->alt_boundary_mode ? current_gain_factor : 1.0; |
443 | |
444 | s->prev_amplification_factor[channel] = initial_value; |
445 | |
446 | while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) { |
447 | cqueue_enqueue(s->gain_history_original[channel], initial_value); |
448 | } |
449 | } |
450 | |
451 | cqueue_enqueue(s->gain_history_original[channel], current_gain_factor); |
452 | |
453 | while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) { |
454 | double minimum; |
455 | av_assert0(cqueue_size(s->gain_history_original[channel]) == s->filter_size); |
456 | |
457 | if (cqueue_empty(s->gain_history_minimum[channel])) { |
458 | const int pre_fill_size = s->filter_size / 2; |
459 | double initial_value = s->alt_boundary_mode ? cqueue_peek(s->gain_history_original[channel], 0) : 1.0; |
460 | int input = pre_fill_size; |
461 | |
462 | while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) { |
463 | initial_value = FFMIN(initial_value, cqueue_peek(s->gain_history_original[channel], ++input)); |
464 | cqueue_enqueue(s->gain_history_minimum[channel], initial_value); |
465 | } |
466 | } |
467 | |
468 | minimum = minimum_filter(s->gain_history_original[channel]); |
469 | |
470 | cqueue_enqueue(s->gain_history_minimum[channel], minimum); |
471 | |
472 | cqueue_pop(s->gain_history_original[channel]); |
473 | } |
474 | |
475 | while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) { |
476 | double smoothed; |
477 | av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size); |
478 | smoothed = gaussian_filter(s, s->gain_history_minimum[channel]); |
479 | |
480 | cqueue_enqueue(s->gain_history_smoothed[channel], smoothed); |
481 | |
482 | cqueue_pop(s->gain_history_minimum[channel]); |
483 | } |
484 | } |
485 | |
486 | static inline double update_value(double new, double old, double aggressiveness) |
487 | { |
488 | av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0)); |
489 | return aggressiveness * new + (1.0 - aggressiveness) * old; |
490 | } |
491 | |
492 | static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame) |
493 | { |
494 | const double diff = 1.0 / frame->nb_samples; |
495 | int is_first_frame = cqueue_empty(s->gain_history_original[0]); |
496 | int c, i; |
497 | |
498 | for (c = 0; c < s->channels; c++) { |
499 | double *dst_ptr = (double *)frame->extended_data[c]; |
500 | double current_average_value = 0.0; |
501 | double prev_value; |
502 | |
503 | for (i = 0; i < frame->nb_samples; i++) |
504 | current_average_value += dst_ptr[i] * diff; |
505 | |
506 | prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c]; |
507 | s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1); |
508 | |
509 | for (i = 0; i < frame->nb_samples; i++) { |
510 | dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, s->fade_factors); |
511 | } |
512 | } |
513 | } |
514 | |
515 | static double setup_compress_thresh(double threshold) |
516 | { |
517 | if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) { |
518 | double current_threshold = threshold; |
519 | double step_size = 1.0; |
520 | |
521 | while (step_size > DBL_EPSILON) { |
522 | while ((llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) > |
523 | llrint(current_threshold * (UINT64_C(1) << 63))) && |
524 | (bound(current_threshold + step_size, 1.0) <= threshold)) { |
525 | current_threshold += step_size; |
526 | } |
527 | |
528 | step_size /= 2.0; |
529 | } |
530 | |
531 | return current_threshold; |
532 | } else { |
533 | return threshold; |
534 | } |
535 | } |
536 | |
537 | static double compute_frame_std_dev(DynamicAudioNormalizerContext *s, |
538 | AVFrame *frame, int channel) |
539 | { |
540 | double variance = 0.0; |
541 | int i, c; |
542 | |
543 | if (channel == -1) { |
544 | for (c = 0; c < s->channels; c++) { |
545 | const double *data_ptr = (double *)frame->extended_data[c]; |
546 | |
547 | for (i = 0; i < frame->nb_samples; i++) { |
548 | variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero* |
549 | } |
550 | } |
551 | variance /= (s->channels * frame->nb_samples) - 1; |
552 | } else { |
553 | const double *data_ptr = (double *)frame->extended_data[channel]; |
554 | |
555 | for (i = 0; i < frame->nb_samples; i++) { |
556 | variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero* |
557 | } |
558 | variance /= frame->nb_samples - 1; |
559 | } |
560 | |
561 | return FFMAX(sqrt(variance), DBL_EPSILON); |
562 | } |
563 | |
564 | static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame) |
565 | { |
566 | int is_first_frame = cqueue_empty(s->gain_history_original[0]); |
567 | int c, i; |
568 | |
569 | if (s->channels_coupled) { |
570 | const double standard_deviation = compute_frame_std_dev(s, frame, -1); |
571 | const double current_threshold = FFMIN(1.0, s->compress_factor * standard_deviation); |
572 | |
573 | const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0]; |
574 | double prev_actual_thresh, curr_actual_thresh; |
575 | s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0)); |
576 | |
577 | prev_actual_thresh = setup_compress_thresh(prev_value); |
578 | curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]); |
579 | |
580 | for (c = 0; c < s->channels; c++) { |
581 | double *const dst_ptr = (double *)frame->extended_data[c]; |
582 | for (i = 0; i < frame->nb_samples; i++) { |
583 | const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors); |
584 | dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]); |
585 | } |
586 | } |
587 | } else { |
588 | for (c = 0; c < s->channels; c++) { |
589 | const double standard_deviation = compute_frame_std_dev(s, frame, c); |
590 | const double current_threshold = setup_compress_thresh(FFMIN(1.0, s->compress_factor * standard_deviation)); |
591 | |
592 | const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c]; |
593 | double prev_actual_thresh, curr_actual_thresh; |
594 | double *dst_ptr; |
595 | s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0); |
596 | |
597 | prev_actual_thresh = setup_compress_thresh(prev_value); |
598 | curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]); |
599 | |
600 | dst_ptr = (double *)frame->extended_data[c]; |
601 | for (i = 0; i < frame->nb_samples; i++) { |
602 | const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors); |
603 | dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]); |
604 | } |
605 | } |
606 | } |
607 | } |
608 | |
609 | static void analyze_frame(DynamicAudioNormalizerContext *s, AVFrame *frame) |
610 | { |
611 | if (s->dc_correction) { |
612 | perform_dc_correction(s, frame); |
613 | } |
614 | |
615 | if (s->compress_factor > DBL_EPSILON) { |
616 | perform_compression(s, frame); |
617 | } |
618 | |
619 | if (s->channels_coupled) { |
620 | const double current_gain_factor = get_max_local_gain(s, frame, -1); |
621 | int c; |
622 | |
623 | for (c = 0; c < s->channels; c++) |
624 | update_gain_history(s, c, current_gain_factor); |
625 | } else { |
626 | int c; |
627 | |
628 | for (c = 0; c < s->channels; c++) |
629 | update_gain_history(s, c, get_max_local_gain(s, frame, c)); |
630 | } |
631 | } |
632 | |
633 | static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame) |
634 | { |
635 | int c, i; |
636 | |
637 | for (c = 0; c < s->channels; c++) { |
638 | double *dst_ptr = (double *)frame->extended_data[c]; |
639 | double current_amplification_factor; |
640 | |
641 | cqueue_dequeue(s->gain_history_smoothed[c], ¤t_amplification_factor); |
642 | |
643 | for (i = 0; i < frame->nb_samples; i++) { |
644 | const double amplification_factor = fade(s->prev_amplification_factor[c], |
645 | current_amplification_factor, i, |
646 | s->fade_factors); |
647 | |
648 | dst_ptr[i] *= amplification_factor; |
649 | |
650 | if (fabs(dst_ptr[i]) > s->peak_value) |
651 | dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]); |
652 | } |
653 | |
654 | s->prev_amplification_factor[c] = current_amplification_factor; |
655 | } |
656 | } |
657 | |
658 | static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
659 | { |
660 | AVFilterContext *ctx = inlink->dst; |
661 | DynamicAudioNormalizerContext *s = ctx->priv; |
662 | AVFilterLink *outlink = inlink->dst->outputs[0]; |
663 | int ret = 0; |
664 | |
665 | if (!cqueue_empty(s->gain_history_smoothed[0])) { |
666 | AVFrame *out = ff_bufqueue_get(&s->queue); |
667 | |
668 | amplify_frame(s, out); |
669 | ret = ff_filter_frame(outlink, out); |
670 | } |
671 | |
672 | analyze_frame(s, in); |
673 | ff_bufqueue_add(ctx, &s->queue, in); |
674 | |
675 | return ret; |
676 | } |
677 | |
678 | static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink, |
679 | AVFilterLink *outlink) |
680 | { |
681 | AVFrame *out = ff_get_audio_buffer(outlink, s->frame_len); |
682 | int c, i; |
683 | |
684 | if (!out) |
685 | return AVERROR(ENOMEM); |
686 | |
687 | for (c = 0; c < s->channels; c++) { |
688 | double *dst_ptr = (double *)out->extended_data[c]; |
689 | |
690 | for (i = 0; i < out->nb_samples; i++) { |
691 | dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? FFMIN(s->peak_value, s->target_rms) : s->peak_value); |
692 | if (s->dc_correction) { |
693 | dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1; |
694 | dst_ptr[i] += s->dc_correction_value[c]; |
695 | } |
696 | } |
697 | } |
698 | |
699 | s->delay--; |
700 | return filter_frame(inlink, out); |
701 | } |
702 | |
703 | static int request_frame(AVFilterLink *outlink) |
704 | { |
705 | AVFilterContext *ctx = outlink->src; |
706 | DynamicAudioNormalizerContext *s = ctx->priv; |
707 | int ret = 0; |
708 | |
709 | ret = ff_request_frame(ctx->inputs[0]); |
710 | |
711 | if (ret == AVERROR_EOF && !ctx->is_disabled && s->delay) { |
712 | if (!cqueue_empty(s->gain_history_smoothed[0])) { |
713 | ret = flush_buffer(s, ctx->inputs[0], outlink); |
714 | } else if (s->queue.available) { |
715 | AVFrame *out = ff_bufqueue_get(&s->queue); |
716 | |
717 | ret = ff_filter_frame(outlink, out); |
718 | } |
719 | } |
720 | |
721 | return ret; |
722 | } |
723 | |
724 | static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = { |
725 | { |
726 | .name = "default", |
727 | .type = AVMEDIA_TYPE_AUDIO, |
728 | .filter_frame = filter_frame, |
729 | .config_props = config_input, |
730 | .needs_writable = 1, |
731 | }, |
732 | { NULL } |
733 | }; |
734 | |
735 | static const AVFilterPad avfilter_af_dynaudnorm_outputs[] = { |
736 | { |
737 | .name = "default", |
738 | .type = AVMEDIA_TYPE_AUDIO, |
739 | .request_frame = request_frame, |
740 | }, |
741 | { NULL } |
742 | }; |
743 | |
744 | AVFilter ff_af_dynaudnorm = { |
745 | .name = "dynaudnorm", |
746 | .description = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."), |
747 | .query_formats = query_formats, |
748 | .priv_size = sizeof(DynamicAudioNormalizerContext), |
749 | .init = init, |
750 | .uninit = uninit, |
751 | .inputs = avfilter_af_dynaudnorm_inputs, |
752 | .outputs = avfilter_af_dynaudnorm_outputs, |
753 | .priv_class = &dynaudnorm_class, |
754 | }; |
755 |