blob: 3fc1a1b4c92237bc1b8cb5da8f3fa1d9d6e691b2
1 | /* |
2 | * Copyright (c) 2016 Paul B Mahol |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU Lesser General Public License as published |
8 | * by the Free Software Foundation; either version 2.1 of the License, |
9 | * or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #include "libavutil/audio_fifo.h" |
22 | #include "libavutil/avstring.h" |
23 | #include "libavfilter/internal.h" |
24 | #include "libavutil/common.h" |
25 | #include "libavutil/opt.h" |
26 | #include "libavcodec/avfft.h" |
27 | #include "libavutil/eval.h" |
28 | #include "audio.h" |
29 | #include "window_func.h" |
30 | |
31 | typedef struct AFFTFiltContext { |
32 | const AVClass *class; |
33 | char *real_str; |
34 | char *img_str; |
35 | int fft_bits; |
36 | |
37 | FFTContext *fft, *ifft; |
38 | FFTComplex **fft_data; |
39 | int nb_exprs; |
40 | int window_size; |
41 | AVExpr **real; |
42 | AVExpr **imag; |
43 | AVAudioFifo *fifo; |
44 | int64_t pts; |
45 | int hop_size; |
46 | float overlap; |
47 | AVFrame *buffer; |
48 | int start, end; |
49 | int win_func; |
50 | float win_scale; |
51 | float *window_func_lut; |
52 | } AFFTFiltContext; |
53 | |
54 | static const char *const var_names[] = { "sr", "b", "nb", "ch", "chs", "pts", NULL }; |
55 | enum { VAR_SAMPLE_RATE, VAR_BIN, VAR_NBBINS, VAR_CHANNEL, VAR_CHANNELS, VAR_PTS, VAR_VARS_NB }; |
56 | |
57 | #define OFFSET(x) offsetof(AFFTFiltContext, x) |
58 | #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
59 | |
60 | static const AVOption afftfilt_options[] = { |
61 | { "real", "set channels real expressions", OFFSET(real_str), AV_OPT_TYPE_STRING, {.str = "1" }, 0, 0, A }, |
62 | { "imag", "set channels imaginary expressions", OFFSET(img_str), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, A }, |
63 | { "win_size", "set window size", OFFSET(fft_bits), AV_OPT_TYPE_INT, {.i64=12}, 4, 17, A, "fft" }, |
64 | { "w16", 0, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, A, "fft" }, |
65 | { "w32", 0, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, A, "fft" }, |
66 | { "w64", 0, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, A, "fft" }, |
67 | { "w128", 0, 0, AV_OPT_TYPE_CONST, {.i64=7}, 0, 0, A, "fft" }, |
68 | { "w256", 0, 0, AV_OPT_TYPE_CONST, {.i64=8}, 0, 0, A, "fft" }, |
69 | { "w512", 0, 0, AV_OPT_TYPE_CONST, {.i64=9}, 0, 0, A, "fft" }, |
70 | { "w1024", 0, 0, AV_OPT_TYPE_CONST, {.i64=10}, 0, 0, A, "fft" }, |
71 | { "w2048", 0, 0, AV_OPT_TYPE_CONST, {.i64=11}, 0, 0, A, "fft" }, |
72 | { "w4096", 0, 0, AV_OPT_TYPE_CONST, {.i64=12}, 0, 0, A, "fft" }, |
73 | { "w8192", 0, 0, AV_OPT_TYPE_CONST, {.i64=13}, 0, 0, A, "fft" }, |
74 | { "w16384", 0, 0, AV_OPT_TYPE_CONST, {.i64=14}, 0, 0, A, "fft" }, |
75 | { "w32768", 0, 0, AV_OPT_TYPE_CONST, {.i64=15}, 0, 0, A, "fft" }, |
76 | { "w65536", 0, 0, AV_OPT_TYPE_CONST, {.i64=16}, 0, 0, A, "fft" }, |
77 | { "w131072",0, 0, AV_OPT_TYPE_CONST, {.i64=17}, 0, 0, A, "fft" }, |
78 | { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, A, "win_func" }, |
79 | { "rect", "Rectangular", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT}, 0, 0, A, "win_func" }, |
80 | { "bartlett", "Bartlett", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, A, "win_func" }, |
81 | { "hann", "Hann", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, A, "win_func" }, |
82 | { "hanning", "Hanning", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING}, 0, 0, A, "win_func" }, |
83 | { "hamming", "Hamming", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING}, 0, 0, A, "win_func" }, |
84 | { "sine", "Sine", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE}, 0, 0, A, "win_func" }, |
85 | { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.75}, 0, 1, A }, |
86 | { NULL }, |
87 | }; |
88 | |
89 | AVFILTER_DEFINE_CLASS(afftfilt); |
90 | |
91 | static int config_input(AVFilterLink *inlink) |
92 | { |
93 | AVFilterContext *ctx = inlink->dst; |
94 | AFFTFiltContext *s = ctx->priv; |
95 | char *saveptr = NULL; |
96 | int ret = 0, ch, i; |
97 | float overlap; |
98 | char *args; |
99 | const char *last_expr = "1"; |
100 | |
101 | s->fft = av_fft_init(s->fft_bits, 0); |
102 | s->ifft = av_fft_init(s->fft_bits, 1); |
103 | if (!s->fft || !s->ifft) |
104 | return AVERROR(ENOMEM); |
105 | |
106 | s->window_size = 1 << s->fft_bits; |
107 | |
108 | s->fft_data = av_calloc(inlink->channels, sizeof(*s->fft_data)); |
109 | if (!s->fft_data) |
110 | return AVERROR(ENOMEM); |
111 | |
112 | for (ch = 0; ch < inlink->channels; ch++) { |
113 | s->fft_data[ch] = av_calloc(s->window_size, sizeof(**s->fft_data)); |
114 | if (!s->fft_data[ch]) |
115 | return AVERROR(ENOMEM); |
116 | } |
117 | |
118 | s->real = av_calloc(inlink->channels, sizeof(*s->real)); |
119 | if (!s->real) |
120 | return AVERROR(ENOMEM); |
121 | |
122 | s->imag = av_calloc(inlink->channels, sizeof(*s->imag)); |
123 | if (!s->imag) |
124 | return AVERROR(ENOMEM); |
125 | |
126 | args = av_strdup(s->real_str); |
127 | if (!args) |
128 | return AVERROR(ENOMEM); |
129 | |
130 | for (ch = 0; ch < inlink->channels; ch++) { |
131 | char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr); |
132 | |
133 | ret = av_expr_parse(&s->real[ch], arg ? arg : last_expr, var_names, |
134 | NULL, NULL, NULL, NULL, 0, ctx); |
135 | if (ret < 0) |
136 | break; |
137 | if (arg) |
138 | last_expr = arg; |
139 | s->nb_exprs++; |
140 | } |
141 | |
142 | av_free(args); |
143 | |
144 | args = av_strdup(s->img_str ? s->img_str : s->real_str); |
145 | if (!args) |
146 | return AVERROR(ENOMEM); |
147 | |
148 | for (ch = 0; ch < inlink->channels; ch++) { |
149 | char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr); |
150 | |
151 | ret = av_expr_parse(&s->imag[ch], arg ? arg : last_expr, var_names, |
152 | NULL, NULL, NULL, NULL, 0, ctx); |
153 | if (ret < 0) |
154 | break; |
155 | if (arg) |
156 | last_expr = arg; |
157 | } |
158 | |
159 | av_free(args); |
160 | |
161 | s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size); |
162 | if (!s->fifo) |
163 | return AVERROR(ENOMEM); |
164 | |
165 | s->window_func_lut = av_realloc_f(s->window_func_lut, s->window_size, |
166 | sizeof(*s->window_func_lut)); |
167 | if (!s->window_func_lut) |
168 | return AVERROR(ENOMEM); |
169 | ff_generate_window_func(s->window_func_lut, s->window_size, s->win_func, &overlap); |
170 | if (s->overlap == 1) |
171 | s->overlap = overlap; |
172 | |
173 | for (s->win_scale = 0, i = 0; i < s->window_size; i++) { |
174 | s->win_scale += s->window_func_lut[i] * s->window_func_lut[i]; |
175 | } |
176 | |
177 | s->hop_size = s->window_size * (1 - s->overlap); |
178 | if (s->hop_size <= 0) |
179 | return AVERROR(EINVAL); |
180 | |
181 | s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2); |
182 | if (!s->buffer) |
183 | return AVERROR(ENOMEM); |
184 | |
185 | return ret; |
186 | } |
187 | |
188 | static int filter_frame(AVFilterLink *inlink, AVFrame *frame) |
189 | { |
190 | AVFilterContext *ctx = inlink->dst; |
191 | AVFilterLink *outlink = ctx->outputs[0]; |
192 | AFFTFiltContext *s = ctx->priv; |
193 | const int window_size = s->window_size; |
194 | const float f = 1. / s->win_scale; |
195 | double values[VAR_VARS_NB]; |
196 | AVFrame *out, *in = NULL; |
197 | int ch, n, ret, i, j, k; |
198 | int start = s->start, end = s->end; |
199 | |
200 | av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples); |
201 | av_frame_free(&frame); |
202 | |
203 | while (av_audio_fifo_size(s->fifo) >= window_size) { |
204 | if (!in) { |
205 | in = ff_get_audio_buffer(outlink, window_size); |
206 | if (!in) |
207 | return AVERROR(ENOMEM); |
208 | } |
209 | |
210 | ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, window_size); |
211 | if (ret < 0) |
212 | break; |
213 | |
214 | for (ch = 0; ch < inlink->channels; ch++) { |
215 | const float *src = (float *)in->extended_data[ch]; |
216 | FFTComplex *fft_data = s->fft_data[ch]; |
217 | |
218 | for (n = 0; n < in->nb_samples; n++) { |
219 | fft_data[n].re = src[n] * s->window_func_lut[n]; |
220 | fft_data[n].im = 0; |
221 | } |
222 | |
223 | for (; n < window_size; n++) { |
224 | fft_data[n].re = 0; |
225 | fft_data[n].im = 0; |
226 | } |
227 | } |
228 | |
229 | values[VAR_PTS] = s->pts; |
230 | values[VAR_SAMPLE_RATE] = inlink->sample_rate; |
231 | values[VAR_NBBINS] = window_size / 2; |
232 | values[VAR_CHANNELS] = inlink->channels; |
233 | |
234 | for (ch = 0; ch < inlink->channels; ch++) { |
235 | FFTComplex *fft_data = s->fft_data[ch]; |
236 | float *buf = (float *)s->buffer->extended_data[ch]; |
237 | int x; |
238 | |
239 | values[VAR_CHANNEL] = ch; |
240 | |
241 | av_fft_permute(s->fft, fft_data); |
242 | av_fft_calc(s->fft, fft_data); |
243 | |
244 | for (n = 0; n < window_size / 2; n++) { |
245 | float fr, fi; |
246 | |
247 | values[VAR_BIN] = n; |
248 | |
249 | fr = av_expr_eval(s->real[ch], values, s); |
250 | fi = av_expr_eval(s->imag[ch], values, s); |
251 | |
252 | fft_data[n].re *= fr; |
253 | fft_data[n].im *= fi; |
254 | } |
255 | |
256 | for (n = window_size / 2 + 1, x = window_size / 2 - 1; n < window_size; n++, x--) { |
257 | fft_data[n].re = fft_data[x].re; |
258 | fft_data[n].im = -fft_data[x].im; |
259 | } |
260 | |
261 | av_fft_permute(s->ifft, fft_data); |
262 | av_fft_calc(s->ifft, fft_data); |
263 | |
264 | start = s->start; |
265 | end = s->end; |
266 | k = end; |
267 | for (i = 0, j = start; j < k && i < window_size; i++, j++) { |
268 | buf[j] += s->fft_data[ch][i].re * f; |
269 | } |
270 | |
271 | for (; i < window_size; i++, j++) { |
272 | buf[j] = s->fft_data[ch][i].re * f; |
273 | } |
274 | |
275 | start += s->hop_size; |
276 | end = j; |
277 | } |
278 | |
279 | s->start = start; |
280 | s->end = end; |
281 | |
282 | if (start >= window_size) { |
283 | float *dst, *buf; |
284 | |
285 | start -= window_size; |
286 | end -= window_size; |
287 | |
288 | s->start = start; |
289 | s->end = end; |
290 | |
291 | out = ff_get_audio_buffer(outlink, window_size); |
292 | if (!out) { |
293 | ret = AVERROR(ENOMEM); |
294 | break; |
295 | } |
296 | |
297 | out->pts = s->pts; |
298 | s->pts += window_size; |
299 | |
300 | for (ch = 0; ch < inlink->channels; ch++) { |
301 | dst = (float *)out->extended_data[ch]; |
302 | buf = (float *)s->buffer->extended_data[ch]; |
303 | |
304 | for (n = 0; n < window_size; n++) { |
305 | dst[n] = buf[n] * (1 - s->overlap); |
306 | } |
307 | memmove(buf, buf + window_size, window_size * 4); |
308 | } |
309 | |
310 | ret = ff_filter_frame(outlink, out); |
311 | if (ret < 0) |
312 | break; |
313 | } |
314 | |
315 | av_audio_fifo_drain(s->fifo, s->hop_size); |
316 | } |
317 | |
318 | av_frame_free(&in); |
319 | return ret; |
320 | } |
321 | |
322 | static int query_formats(AVFilterContext *ctx) |
323 | { |
324 | AVFilterFormats *formats; |
325 | AVFilterChannelLayouts *layouts; |
326 | static const enum AVSampleFormat sample_fmts[] = { |
327 | AV_SAMPLE_FMT_FLTP, |
328 | AV_SAMPLE_FMT_NONE |
329 | }; |
330 | int ret; |
331 | |
332 | layouts = ff_all_channel_counts(); |
333 | if (!layouts) |
334 | return AVERROR(ENOMEM); |
335 | ret = ff_set_common_channel_layouts(ctx, layouts); |
336 | if (ret < 0) |
337 | return ret; |
338 | |
339 | formats = ff_make_format_list(sample_fmts); |
340 | if (!formats) |
341 | return AVERROR(ENOMEM); |
342 | ret = ff_set_common_formats(ctx, formats); |
343 | if (ret < 0) |
344 | return ret; |
345 | |
346 | formats = ff_all_samplerates(); |
347 | if (!formats) |
348 | return AVERROR(ENOMEM); |
349 | return ff_set_common_samplerates(ctx, formats); |
350 | } |
351 | |
352 | static av_cold void uninit(AVFilterContext *ctx) |
353 | { |
354 | AFFTFiltContext *s = ctx->priv; |
355 | int i; |
356 | |
357 | av_fft_end(s->fft); |
358 | av_fft_end(s->ifft); |
359 | |
360 | for (i = 0; i < s->nb_exprs; i++) { |
361 | if (s->fft_data) |
362 | av_freep(&s->fft_data[i]); |
363 | } |
364 | av_freep(&s->fft_data); |
365 | |
366 | for (i = 0; i < s->nb_exprs; i++) { |
367 | av_expr_free(s->real[i]); |
368 | av_expr_free(s->imag[i]); |
369 | } |
370 | |
371 | av_freep(&s->real); |
372 | av_freep(&s->imag); |
373 | av_frame_free(&s->buffer); |
374 | } |
375 | |
376 | static const AVFilterPad inputs[] = { |
377 | { |
378 | .name = "default", |
379 | .type = AVMEDIA_TYPE_AUDIO, |
380 | .config_props = config_input, |
381 | .filter_frame = filter_frame, |
382 | }, |
383 | { NULL } |
384 | }; |
385 | |
386 | static const AVFilterPad outputs[] = { |
387 | { |
388 | .name = "default", |
389 | .type = AVMEDIA_TYPE_AUDIO, |
390 | }, |
391 | { NULL } |
392 | }; |
393 | |
394 | AVFilter ff_af_afftfilt = { |
395 | .name = "afftfilt", |
396 | .description = NULL_IF_CONFIG_SMALL("Apply arbitrary expressions to samples in frequency domain."), |
397 | .priv_size = sizeof(AFFTFiltContext), |
398 | .priv_class = &afftfilt_class, |
399 | .inputs = inputs, |
400 | .outputs = outputs, |
401 | .query_formats = query_formats, |
402 | .uninit = uninit, |
403 | }; |
404 |