blob: 0143940ef3f3b43a12051f6fc03c6fafad06a69f
1 | /* |
2 | * Copyright (c) 2012 Nicolas George |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public License |
8 | * as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public License |
17 | * along with FFmpeg; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #include "libavutil/channel_layout.h" |
22 | #include "libavutil/avassert.h" |
23 | #include "audio.h" |
24 | #include "avfilter.h" |
25 | #include "internal.h" |
26 | |
27 | typedef struct { |
28 | /** |
29 | * Number of samples at each PCM value. |
30 | * histogram[0x8000 + i] is the number of samples at value i. |
31 | * The extra element is there for symmetry. |
32 | */ |
33 | uint64_t histogram[0x10001]; |
34 | } VolDetectContext; |
35 | |
36 | static int query_formats(AVFilterContext *ctx) |
37 | { |
38 | static const enum AVSampleFormat sample_fmts[] = { |
39 | AV_SAMPLE_FMT_S16, |
40 | AV_SAMPLE_FMT_S16P, |
41 | AV_SAMPLE_FMT_NONE |
42 | }; |
43 | AVFilterFormats *formats; |
44 | AVFilterChannelLayouts *layouts; |
45 | int ret; |
46 | |
47 | if (!(formats = ff_make_format_list(sample_fmts))) |
48 | return AVERROR(ENOMEM); |
49 | |
50 | layouts = ff_all_channel_counts(); |
51 | if (!layouts) |
52 | return AVERROR(ENOMEM); |
53 | ret = ff_set_common_channel_layouts(ctx, layouts); |
54 | if (ret < 0) |
55 | return ret; |
56 | |
57 | return ff_set_common_formats(ctx, formats); |
58 | } |
59 | |
60 | static int filter_frame(AVFilterLink *inlink, AVFrame *samples) |
61 | { |
62 | AVFilterContext *ctx = inlink->dst; |
63 | VolDetectContext *vd = ctx->priv; |
64 | int nb_samples = samples->nb_samples; |
65 | int nb_channels = av_frame_get_channels(samples); |
66 | int nb_planes = nb_channels; |
67 | int plane, i; |
68 | int16_t *pcm; |
69 | |
70 | if (!av_sample_fmt_is_planar(samples->format)) { |
71 | nb_samples *= nb_channels; |
72 | nb_planes = 1; |
73 | } |
74 | for (plane = 0; plane < nb_planes; plane++) { |
75 | pcm = (int16_t *)samples->extended_data[plane]; |
76 | for (i = 0; i < nb_samples; i++) |
77 | vd->histogram[pcm[i] + 0x8000]++; |
78 | } |
79 | |
80 | return ff_filter_frame(inlink->dst->outputs[0], samples); |
81 | } |
82 | |
83 | #define MAX_DB 91 |
84 | |
85 | static inline double logdb(uint64_t v) |
86 | { |
87 | double d = v / (double)(0x8000 * 0x8000); |
88 | if (!v) |
89 | return MAX_DB; |
90 | return -log10(d) * 10; |
91 | } |
92 | |
93 | static void print_stats(AVFilterContext *ctx) |
94 | { |
95 | VolDetectContext *vd = ctx->priv; |
96 | int i, max_volume, shift; |
97 | uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; |
98 | uint64_t histdb[MAX_DB + 1] = { 0 }; |
99 | |
100 | for (i = 0; i < 0x10000; i++) |
101 | nb_samples += vd->histogram[i]; |
102 | av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); |
103 | if (!nb_samples) |
104 | return; |
105 | |
106 | /* If nb_samples > 1<<34, there is a risk of overflow in the |
107 | multiplication or the sum: shift all histogram values to avoid that. |
108 | The total number of samples must be recomputed to avoid rounding |
109 | errors. */ |
110 | shift = av_log2(nb_samples >> 33); |
111 | for (i = 0; i < 0x10000; i++) { |
112 | nb_samples_shift += vd->histogram[i] >> shift; |
113 | power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); |
114 | } |
115 | if (!nb_samples_shift) |
116 | return; |
117 | power = (power + nb_samples_shift / 2) / nb_samples_shift; |
118 | av_assert0(power <= 0x8000 * 0x8000); |
119 | av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); |
120 | |
121 | max_volume = 0x8000; |
122 | while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && |
123 | !vd->histogram[0x8000 - max_volume]) |
124 | max_volume--; |
125 | av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); |
126 | |
127 | for (i = 0; i < 0x10000; i++) |
128 | histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; |
129 | for (i = 0; i <= MAX_DB && !histdb[i]; i++); |
130 | for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { |
131 | av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); |
132 | sum += histdb[i]; |
133 | } |
134 | } |
135 | |
136 | static av_cold void uninit(AVFilterContext *ctx) |
137 | { |
138 | print_stats(ctx); |
139 | } |
140 | |
141 | static const AVFilterPad volumedetect_inputs[] = { |
142 | { |
143 | .name = "default", |
144 | .type = AVMEDIA_TYPE_AUDIO, |
145 | .filter_frame = filter_frame, |
146 | }, |
147 | { NULL } |
148 | }; |
149 | |
150 | static const AVFilterPad volumedetect_outputs[] = { |
151 | { |
152 | .name = "default", |
153 | .type = AVMEDIA_TYPE_AUDIO, |
154 | }, |
155 | { NULL } |
156 | }; |
157 | |
158 | AVFilter ff_af_volumedetect = { |
159 | .name = "volumedetect", |
160 | .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), |
161 | .priv_size = sizeof(VolDetectContext), |
162 | .query_formats = query_formats, |
163 | .uninit = uninit, |
164 | .inputs = volumedetect_inputs, |
165 | .outputs = volumedetect_outputs, |
166 | }; |
167 |