summaryrefslogtreecommitdiff
path: root/libavfilter/af_volumedetect.c (plain)
blob: 0143940ef3f3b43a12051f6fc03c6fafad06a69f
1/*
2 * Copyright (c) 2012 Nicolas George
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public License
8 * as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/channel_layout.h"
22#include "libavutil/avassert.h"
23#include "audio.h"
24#include "avfilter.h"
25#include "internal.h"
26
27typedef struct {
28 /**
29 * Number of samples at each PCM value.
30 * histogram[0x8000 + i] is the number of samples at value i.
31 * The extra element is there for symmetry.
32 */
33 uint64_t histogram[0x10001];
34} VolDetectContext;
35
36static int query_formats(AVFilterContext *ctx)
37{
38 static const enum AVSampleFormat sample_fmts[] = {
39 AV_SAMPLE_FMT_S16,
40 AV_SAMPLE_FMT_S16P,
41 AV_SAMPLE_FMT_NONE
42 };
43 AVFilterFormats *formats;
44 AVFilterChannelLayouts *layouts;
45 int ret;
46
47 if (!(formats = ff_make_format_list(sample_fmts)))
48 return AVERROR(ENOMEM);
49
50 layouts = ff_all_channel_counts();
51 if (!layouts)
52 return AVERROR(ENOMEM);
53 ret = ff_set_common_channel_layouts(ctx, layouts);
54 if (ret < 0)
55 return ret;
56
57 return ff_set_common_formats(ctx, formats);
58}
59
60static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
61{
62 AVFilterContext *ctx = inlink->dst;
63 VolDetectContext *vd = ctx->priv;
64 int nb_samples = samples->nb_samples;
65 int nb_channels = av_frame_get_channels(samples);
66 int nb_planes = nb_channels;
67 int plane, i;
68 int16_t *pcm;
69
70 if (!av_sample_fmt_is_planar(samples->format)) {
71 nb_samples *= nb_channels;
72 nb_planes = 1;
73 }
74 for (plane = 0; plane < nb_planes; plane++) {
75 pcm = (int16_t *)samples->extended_data[plane];
76 for (i = 0; i < nb_samples; i++)
77 vd->histogram[pcm[i] + 0x8000]++;
78 }
79
80 return ff_filter_frame(inlink->dst->outputs[0], samples);
81}
82
83#define MAX_DB 91
84
85static inline double logdb(uint64_t v)
86{
87 double d = v / (double)(0x8000 * 0x8000);
88 if (!v)
89 return MAX_DB;
90 return -log10(d) * 10;
91}
92
93static void print_stats(AVFilterContext *ctx)
94{
95 VolDetectContext *vd = ctx->priv;
96 int i, max_volume, shift;
97 uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
98 uint64_t histdb[MAX_DB + 1] = { 0 };
99
100 for (i = 0; i < 0x10000; i++)
101 nb_samples += vd->histogram[i];
102 av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
103 if (!nb_samples)
104 return;
105
106 /* If nb_samples > 1<<34, there is a risk of overflow in the
107 multiplication or the sum: shift all histogram values to avoid that.
108 The total number of samples must be recomputed to avoid rounding
109 errors. */
110 shift = av_log2(nb_samples >> 33);
111 for (i = 0; i < 0x10000; i++) {
112 nb_samples_shift += vd->histogram[i] >> shift;
113 power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
114 }
115 if (!nb_samples_shift)
116 return;
117 power = (power + nb_samples_shift / 2) / nb_samples_shift;
118 av_assert0(power <= 0x8000 * 0x8000);
119 av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
120
121 max_volume = 0x8000;
122 while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
123 !vd->histogram[0x8000 - max_volume])
124 max_volume--;
125 av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
126
127 for (i = 0; i < 0x10000; i++)
128 histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
129 for (i = 0; i <= MAX_DB && !histdb[i]; i++);
130 for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
131 av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
132 sum += histdb[i];
133 }
134}
135
136static av_cold void uninit(AVFilterContext *ctx)
137{
138 print_stats(ctx);
139}
140
141static const AVFilterPad volumedetect_inputs[] = {
142 {
143 .name = "default",
144 .type = AVMEDIA_TYPE_AUDIO,
145 .filter_frame = filter_frame,
146 },
147 { NULL }
148};
149
150static const AVFilterPad volumedetect_outputs[] = {
151 {
152 .name = "default",
153 .type = AVMEDIA_TYPE_AUDIO,
154 },
155 { NULL }
156};
157
158AVFilter ff_af_volumedetect = {
159 .name = "volumedetect",
160 .description = NULL_IF_CONFIG_SMALL("Detect audio volume."),
161 .priv_size = sizeof(VolDetectContext),
162 .query_formats = query_formats,
163 .uninit = uninit,
164 .inputs = volumedetect_inputs,
165 .outputs = volumedetect_outputs,
166};
167