blob: e11008078d7f0d55a0776f57c584fbfb34c59c93
1 | /* |
2 | * Copyright (c) 2011 Jan Kokemüller |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | * |
20 | * This file is based on libebur128 which is available at |
21 | * https://github.com/jiixyj/libebur128/ |
22 | * |
23 | * Libebur128 has the following copyright: |
24 | * |
25 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
26 | * of this software and associated documentation files (the "Software"), to deal |
27 | * in the Software without restriction, including without limitation the rights |
28 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
29 | * copies of the Software, and to permit persons to whom the Software is |
30 | * furnished to do so, subject to the following conditions: |
31 | * |
32 | * The above copyright notice and this permission notice shall be included in |
33 | * all copies or substantial portions of the Software. |
34 | * |
35 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
36 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
37 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
38 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
39 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
40 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
41 | * THE SOFTWARE. |
42 | */ |
43 | |
44 | #include "ebur128.h" |
45 | |
46 | #include <float.h> |
47 | #include <limits.h> |
48 | #include <math.h> /* You may have to define _USE_MATH_DEFINES if you use MSVC */ |
49 | |
50 | #include "libavutil/common.h" |
51 | #include "libavutil/mem.h" |
52 | #include "libavutil/thread.h" |
53 | |
54 | #define CHECK_ERROR(condition, errorcode, goto_point) \ |
55 | if ((condition)) { \ |
56 | errcode = (errorcode); \ |
57 | goto goto_point; \ |
58 | } |
59 | |
60 | #define ALMOST_ZERO 0.000001 |
61 | |
62 | #define RELATIVE_GATE (-10.0) |
63 | #define RELATIVE_GATE_FACTOR pow(10.0, RELATIVE_GATE / 10.0) |
64 | #define MINUS_20DB pow(10.0, -20.0 / 10.0) |
65 | |
66 | struct FFEBUR128StateInternal { |
67 | /** Filtered audio data (used as ring buffer). */ |
68 | double *audio_data; |
69 | /** Size of audio_data array. */ |
70 | size_t audio_data_frames; |
71 | /** Current index for audio_data. */ |
72 | size_t audio_data_index; |
73 | /** How many frames are needed for a gating block. Will correspond to 400ms |
74 | * of audio at initialization, and 100ms after the first block (75% overlap |
75 | * as specified in the 2011 revision of BS1770). */ |
76 | unsigned long needed_frames; |
77 | /** The channel map. Has as many elements as there are channels. */ |
78 | int *channel_map; |
79 | /** How many samples fit in 100ms (rounded). */ |
80 | unsigned long samples_in_100ms; |
81 | /** BS.1770 filter coefficients (nominator). */ |
82 | double b[5]; |
83 | /** BS.1770 filter coefficients (denominator). */ |
84 | double a[5]; |
85 | /** BS.1770 filter state. */ |
86 | double v[5][5]; |
87 | /** Histograms, used to calculate LRA. */ |
88 | unsigned long *block_energy_histogram; |
89 | unsigned long *short_term_block_energy_histogram; |
90 | /** Keeps track of when a new short term block is needed. */ |
91 | size_t short_term_frame_counter; |
92 | /** Maximum sample peak, one per channel */ |
93 | double *sample_peak; |
94 | /** The maximum window duration in ms. */ |
95 | unsigned long window; |
96 | /** Data pointer array for interleaved data */ |
97 | void **data_ptrs; |
98 | }; |
99 | |
100 | static AVOnce histogram_init = AV_ONCE_INIT; |
101 | static DECLARE_ALIGNED(32, double, histogram_energies)[1000]; |
102 | static DECLARE_ALIGNED(32, double, histogram_energy_boundaries)[1001]; |
103 | |
104 | static void ebur128_init_filter(FFEBUR128State * st) |
105 | { |
106 | int i, j; |
107 | |
108 | double f0 = 1681.974450955533; |
109 | double G = 3.999843853973347; |
110 | double Q = 0.7071752369554196; |
111 | |
112 | double K = tan(M_PI * f0 / (double) st->samplerate); |
113 | double Vh = pow(10.0, G / 20.0); |
114 | double Vb = pow(Vh, 0.4996667741545416); |
115 | |
116 | double pb[3] = { 0.0, 0.0, 0.0 }; |
117 | double pa[3] = { 1.0, 0.0, 0.0 }; |
118 | double rb[3] = { 1.0, -2.0, 1.0 }; |
119 | double ra[3] = { 1.0, 0.0, 0.0 }; |
120 | |
121 | double a0 = 1.0 + K / Q + K * K; |
122 | pb[0] = (Vh + Vb * K / Q + K * K) / a0; |
123 | pb[1] = 2.0 * (K * K - Vh) / a0; |
124 | pb[2] = (Vh - Vb * K / Q + K * K) / a0; |
125 | pa[1] = 2.0 * (K * K - 1.0) / a0; |
126 | pa[2] = (1.0 - K / Q + K * K) / a0; |
127 | |
128 | f0 = 38.13547087602444; |
129 | Q = 0.5003270373238773; |
130 | K = tan(M_PI * f0 / (double) st->samplerate); |
131 | |
132 | ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); |
133 | ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); |
134 | |
135 | st->d->b[0] = pb[0] * rb[0]; |
136 | st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; |
137 | st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; |
138 | st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; |
139 | st->d->b[4] = pb[2] * rb[2]; |
140 | |
141 | st->d->a[0] = pa[0] * ra[0]; |
142 | st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; |
143 | st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; |
144 | st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; |
145 | st->d->a[4] = pa[2] * ra[2]; |
146 | |
147 | for (i = 0; i < 5; ++i) { |
148 | for (j = 0; j < 5; ++j) { |
149 | st->d->v[i][j] = 0.0; |
150 | } |
151 | } |
152 | } |
153 | |
154 | static int ebur128_init_channel_map(FFEBUR128State * st) |
155 | { |
156 | size_t i; |
157 | st->d->channel_map = |
158 | (int *) av_malloc_array(st->channels, sizeof(int)); |
159 | if (!st->d->channel_map) |
160 | return AVERROR(ENOMEM); |
161 | if (st->channels == 4) { |
162 | st->d->channel_map[0] = FF_EBUR128_LEFT; |
163 | st->d->channel_map[1] = FF_EBUR128_RIGHT; |
164 | st->d->channel_map[2] = FF_EBUR128_LEFT_SURROUND; |
165 | st->d->channel_map[3] = FF_EBUR128_RIGHT_SURROUND; |
166 | } else if (st->channels == 5) { |
167 | st->d->channel_map[0] = FF_EBUR128_LEFT; |
168 | st->d->channel_map[1] = FF_EBUR128_RIGHT; |
169 | st->d->channel_map[2] = FF_EBUR128_CENTER; |
170 | st->d->channel_map[3] = FF_EBUR128_LEFT_SURROUND; |
171 | st->d->channel_map[4] = FF_EBUR128_RIGHT_SURROUND; |
172 | } else { |
173 | for (i = 0; i < st->channels; ++i) { |
174 | switch (i) { |
175 | case 0: |
176 | st->d->channel_map[i] = FF_EBUR128_LEFT; |
177 | break; |
178 | case 1: |
179 | st->d->channel_map[i] = FF_EBUR128_RIGHT; |
180 | break; |
181 | case 2: |
182 | st->d->channel_map[i] = FF_EBUR128_CENTER; |
183 | break; |
184 | case 3: |
185 | st->d->channel_map[i] = FF_EBUR128_UNUSED; |
186 | break; |
187 | case 4: |
188 | st->d->channel_map[i] = FF_EBUR128_LEFT_SURROUND; |
189 | break; |
190 | case 5: |
191 | st->d->channel_map[i] = FF_EBUR128_RIGHT_SURROUND; |
192 | break; |
193 | default: |
194 | st->d->channel_map[i] = FF_EBUR128_UNUSED; |
195 | break; |
196 | } |
197 | } |
198 | } |
199 | return 0; |
200 | } |
201 | |
202 | static inline void init_histogram(void) |
203 | { |
204 | int i; |
205 | /* initialize static constants */ |
206 | histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0); |
207 | for (i = 0; i < 1000; ++i) { |
208 | histogram_energies[i] = |
209 | pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0); |
210 | } |
211 | for (i = 1; i < 1001; ++i) { |
212 | histogram_energy_boundaries[i] = |
213 | pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0); |
214 | } |
215 | } |
216 | |
217 | FFEBUR128State *ff_ebur128_init(unsigned int channels, |
218 | unsigned long samplerate, |
219 | unsigned long window, int mode) |
220 | { |
221 | int errcode; |
222 | FFEBUR128State *st; |
223 | |
224 | st = (FFEBUR128State *) av_malloc(sizeof(FFEBUR128State)); |
225 | CHECK_ERROR(!st, 0, exit) |
226 | st->d = (struct FFEBUR128StateInternal *) |
227 | av_malloc(sizeof(struct FFEBUR128StateInternal)); |
228 | CHECK_ERROR(!st->d, 0, free_state) |
229 | st->channels = channels; |
230 | errcode = ebur128_init_channel_map(st); |
231 | CHECK_ERROR(errcode, 0, free_internal) |
232 | |
233 | st->d->sample_peak = |
234 | (double *) av_mallocz_array(channels, sizeof(double)); |
235 | CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) |
236 | |
237 | st->samplerate = samplerate; |
238 | st->d->samples_in_100ms = (st->samplerate + 5) / 10; |
239 | st->mode = mode; |
240 | if ((mode & FF_EBUR128_MODE_S) == FF_EBUR128_MODE_S) { |
241 | st->d->window = FFMAX(window, 3000); |
242 | } else if ((mode & FF_EBUR128_MODE_M) == FF_EBUR128_MODE_M) { |
243 | st->d->window = FFMAX(window, 400); |
244 | } else { |
245 | goto free_sample_peak; |
246 | } |
247 | st->d->audio_data_frames = st->samplerate * st->d->window / 1000; |
248 | if (st->d->audio_data_frames % st->d->samples_in_100ms) { |
249 | /* round up to multiple of samples_in_100ms */ |
250 | st->d->audio_data_frames = st->d->audio_data_frames |
251 | + st->d->samples_in_100ms |
252 | - (st->d->audio_data_frames % st->d->samples_in_100ms); |
253 | } |
254 | st->d->audio_data = |
255 | (double *) av_mallocz_array(st->d->audio_data_frames, |
256 | st->channels * sizeof(double)); |
257 | CHECK_ERROR(!st->d->audio_data, 0, free_sample_peak) |
258 | |
259 | ebur128_init_filter(st); |
260 | |
261 | st->d->block_energy_histogram = |
262 | av_mallocz(1000 * sizeof(unsigned long)); |
263 | CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data) |
264 | st->d->short_term_block_energy_histogram = |
265 | av_mallocz(1000 * sizeof(unsigned long)); |
266 | CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, |
267 | free_block_energy_histogram) |
268 | st->d->short_term_frame_counter = 0; |
269 | |
270 | /* the first block needs 400ms of audio data */ |
271 | st->d->needed_frames = st->d->samples_in_100ms * 4; |
272 | /* start at the beginning of the buffer */ |
273 | st->d->audio_data_index = 0; |
274 | |
275 | if (ff_thread_once(&histogram_init, &init_histogram) != 0) |
276 | goto free_short_term_block_energy_histogram; |
277 | |
278 | st->d->data_ptrs = av_malloc_array(channels, sizeof(void *)); |
279 | CHECK_ERROR(!st->d->data_ptrs, 0, |
280 | free_short_term_block_energy_histogram); |
281 | |
282 | return st; |
283 | |
284 | free_short_term_block_energy_histogram: |
285 | av_free(st->d->short_term_block_energy_histogram); |
286 | free_block_energy_histogram: |
287 | av_free(st->d->block_energy_histogram); |
288 | free_audio_data: |
289 | av_free(st->d->audio_data); |
290 | free_sample_peak: |
291 | av_free(st->d->sample_peak); |
292 | free_channel_map: |
293 | av_free(st->d->channel_map); |
294 | free_internal: |
295 | av_free(st->d); |
296 | free_state: |
297 | av_free(st); |
298 | exit: |
299 | return NULL; |
300 | } |
301 | |
302 | void ff_ebur128_destroy(FFEBUR128State ** st) |
303 | { |
304 | av_free((*st)->d->block_energy_histogram); |
305 | av_free((*st)->d->short_term_block_energy_histogram); |
306 | av_free((*st)->d->audio_data); |
307 | av_free((*st)->d->channel_map); |
308 | av_free((*st)->d->sample_peak); |
309 | av_free((*st)->d->data_ptrs); |
310 | av_free((*st)->d); |
311 | av_free(*st); |
312 | *st = NULL; |
313 | } |
314 | |
315 | #define EBUR128_FILTER(type, scaling_factor) \ |
316 | static void ebur128_filter_##type(FFEBUR128State* st, const type** srcs, \ |
317 | size_t src_index, size_t frames, \ |
318 | int stride) { \ |
319 | double* audio_data = st->d->audio_data + st->d->audio_data_index; \ |
320 | size_t i, c; \ |
321 | \ |
322 | if ((st->mode & FF_EBUR128_MODE_SAMPLE_PEAK) == FF_EBUR128_MODE_SAMPLE_PEAK) { \ |
323 | for (c = 0; c < st->channels; ++c) { \ |
324 | double max = 0.0; \ |
325 | for (i = 0; i < frames; ++i) { \ |
326 | type v = srcs[c][src_index + i * stride]; \ |
327 | if (v > max) { \ |
328 | max = v; \ |
329 | } else if (-v > max) { \ |
330 | max = -1.0 * v; \ |
331 | } \ |
332 | } \ |
333 | max /= scaling_factor; \ |
334 | if (max > st->d->sample_peak[c]) st->d->sample_peak[c] = max; \ |
335 | } \ |
336 | } \ |
337 | for (c = 0; c < st->channels; ++c) { \ |
338 | int ci = st->d->channel_map[c] - 1; \ |
339 | if (ci < 0) continue; \ |
340 | else if (ci == FF_EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \ |
341 | for (i = 0; i < frames; ++i) { \ |
342 | st->d->v[ci][0] = (double) (srcs[c][src_index + i * stride] / scaling_factor) \ |
343 | - st->d->a[1] * st->d->v[ci][1] \ |
344 | - st->d->a[2] * st->d->v[ci][2] \ |
345 | - st->d->a[3] * st->d->v[ci][3] \ |
346 | - st->d->a[4] * st->d->v[ci][4]; \ |
347 | audio_data[i * st->channels + c] = \ |
348 | st->d->b[0] * st->d->v[ci][0] \ |
349 | + st->d->b[1] * st->d->v[ci][1] \ |
350 | + st->d->b[2] * st->d->v[ci][2] \ |
351 | + st->d->b[3] * st->d->v[ci][3] \ |
352 | + st->d->b[4] * st->d->v[ci][4]; \ |
353 | st->d->v[ci][4] = st->d->v[ci][3]; \ |
354 | st->d->v[ci][3] = st->d->v[ci][2]; \ |
355 | st->d->v[ci][2] = st->d->v[ci][1]; \ |
356 | st->d->v[ci][1] = st->d->v[ci][0]; \ |
357 | } \ |
358 | st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \ |
359 | st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \ |
360 | st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \ |
361 | st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1]; \ |
362 | } \ |
363 | } |
364 | EBUR128_FILTER(short, -((double)SHRT_MIN)) |
365 | EBUR128_FILTER(int, -((double)INT_MIN)) |
366 | EBUR128_FILTER(float, 1.0) |
367 | EBUR128_FILTER(double, 1.0) |
368 | |
369 | static double ebur128_energy_to_loudness(double energy) |
370 | { |
371 | return 10 * (log(energy) / log(10.0)) - 0.691; |
372 | } |
373 | |
374 | static size_t find_histogram_index(double energy) |
375 | { |
376 | size_t index_min = 0; |
377 | size_t index_max = 1000; |
378 | size_t index_mid; |
379 | |
380 | do { |
381 | index_mid = (index_min + index_max) / 2; |
382 | if (energy >= histogram_energy_boundaries[index_mid]) { |
383 | index_min = index_mid; |
384 | } else { |
385 | index_max = index_mid; |
386 | } |
387 | } while (index_max - index_min != 1); |
388 | |
389 | return index_min; |
390 | } |
391 | |
392 | static void ebur128_calc_gating_block(FFEBUR128State * st, |
393 | size_t frames_per_block, |
394 | double *optional_output) |
395 | { |
396 | size_t i, c; |
397 | double sum = 0.0; |
398 | double channel_sum; |
399 | for (c = 0; c < st->channels; ++c) { |
400 | if (st->d->channel_map[c] == FF_EBUR128_UNUSED) |
401 | continue; |
402 | channel_sum = 0.0; |
403 | if (st->d->audio_data_index < frames_per_block * st->channels) { |
404 | for (i = 0; i < st->d->audio_data_index / st->channels; ++i) { |
405 | channel_sum += st->d->audio_data[i * st->channels + c] * |
406 | st->d->audio_data[i * st->channels + c]; |
407 | } |
408 | for (i = st->d->audio_data_frames - |
409 | (frames_per_block - |
410 | st->d->audio_data_index / st->channels); |
411 | i < st->d->audio_data_frames; ++i) { |
412 | channel_sum += st->d->audio_data[i * st->channels + c] * |
413 | st->d->audio_data[i * st->channels + c]; |
414 | } |
415 | } else { |
416 | for (i = |
417 | st->d->audio_data_index / st->channels - frames_per_block; |
418 | i < st->d->audio_data_index / st->channels; ++i) { |
419 | channel_sum += |
420 | st->d->audio_data[i * st->channels + |
421 | c] * st->d->audio_data[i * |
422 | st->channels + |
423 | c]; |
424 | } |
425 | } |
426 | if (st->d->channel_map[c] == FF_EBUR128_Mp110 || |
427 | st->d->channel_map[c] == FF_EBUR128_Mm110 || |
428 | st->d->channel_map[c] == FF_EBUR128_Mp060 || |
429 | st->d->channel_map[c] == FF_EBUR128_Mm060 || |
430 | st->d->channel_map[c] == FF_EBUR128_Mp090 || |
431 | st->d->channel_map[c] == FF_EBUR128_Mm090) { |
432 | channel_sum *= 1.41; |
433 | } else if (st->d->channel_map[c] == FF_EBUR128_DUAL_MONO) { |
434 | channel_sum *= 2.0; |
435 | } |
436 | sum += channel_sum; |
437 | } |
438 | sum /= (double) frames_per_block; |
439 | if (optional_output) { |
440 | *optional_output = sum; |
441 | } else if (sum >= histogram_energy_boundaries[0]) { |
442 | ++st->d->block_energy_histogram[find_histogram_index(sum)]; |
443 | } |
444 | } |
445 | |
446 | int ff_ebur128_set_channel(FFEBUR128State * st, |
447 | unsigned int channel_number, int value) |
448 | { |
449 | if (channel_number >= st->channels) { |
450 | return 1; |
451 | } |
452 | if (value == FF_EBUR128_DUAL_MONO && |
453 | (st->channels != 1 || channel_number != 0)) { |
454 | return 1; |
455 | } |
456 | st->d->channel_map[channel_number] = value; |
457 | return 0; |
458 | } |
459 | |
460 | static int ebur128_energy_shortterm(FFEBUR128State * st, double *out); |
461 | #define FF_EBUR128_ADD_FRAMES_PLANAR(type) \ |
462 | void ff_ebur128_add_frames_planar_##type(FFEBUR128State* st, const type** srcs, \ |
463 | size_t frames, int stride) { \ |
464 | size_t src_index = 0; \ |
465 | while (frames > 0) { \ |
466 | if (frames >= st->d->needed_frames) { \ |
467 | ebur128_filter_##type(st, srcs, src_index, st->d->needed_frames, stride); \ |
468 | src_index += st->d->needed_frames * stride; \ |
469 | frames -= st->d->needed_frames; \ |
470 | st->d->audio_data_index += st->d->needed_frames * st->channels; \ |
471 | /* calculate the new gating block */ \ |
472 | if ((st->mode & FF_EBUR128_MODE_I) == FF_EBUR128_MODE_I) { \ |
473 | ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL); \ |
474 | } \ |
475 | if ((st->mode & FF_EBUR128_MODE_LRA) == FF_EBUR128_MODE_LRA) { \ |
476 | st->d->short_term_frame_counter += st->d->needed_frames; \ |
477 | if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \ |
478 | double st_energy; \ |
479 | ebur128_energy_shortterm(st, &st_energy); \ |
480 | if (st_energy >= histogram_energy_boundaries[0]) { \ |
481 | ++st->d->short_term_block_energy_histogram[ \ |
482 | find_histogram_index(st_energy)]; \ |
483 | } \ |
484 | st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \ |
485 | } \ |
486 | } \ |
487 | /* 100ms are needed for all blocks besides the first one */ \ |
488 | st->d->needed_frames = st->d->samples_in_100ms; \ |
489 | /* reset audio_data_index when buffer full */ \ |
490 | if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) { \ |
491 | st->d->audio_data_index = 0; \ |
492 | } \ |
493 | } else { \ |
494 | ebur128_filter_##type(st, srcs, src_index, frames, stride); \ |
495 | st->d->audio_data_index += frames * st->channels; \ |
496 | if ((st->mode & FF_EBUR128_MODE_LRA) == FF_EBUR128_MODE_LRA) { \ |
497 | st->d->short_term_frame_counter += frames; \ |
498 | } \ |
499 | st->d->needed_frames -= frames; \ |
500 | frames = 0; \ |
501 | } \ |
502 | } \ |
503 | } |
504 | FF_EBUR128_ADD_FRAMES_PLANAR(short) |
505 | FF_EBUR128_ADD_FRAMES_PLANAR(int) |
506 | FF_EBUR128_ADD_FRAMES_PLANAR(float) |
507 | FF_EBUR128_ADD_FRAMES_PLANAR(double) |
508 | #define FF_EBUR128_ADD_FRAMES(type) \ |
509 | void ff_ebur128_add_frames_##type(FFEBUR128State* st, const type* src, \ |
510 | size_t frames) { \ |
511 | int i; \ |
512 | const type **buf = (const type**)st->d->data_ptrs; \ |
513 | for (i = 0; i < st->channels; i++) \ |
514 | buf[i] = src + i; \ |
515 | ff_ebur128_add_frames_planar_##type(st, buf, frames, st->channels); \ |
516 | } |
517 | FF_EBUR128_ADD_FRAMES(short) |
518 | FF_EBUR128_ADD_FRAMES(int) |
519 | FF_EBUR128_ADD_FRAMES(float) |
520 | FF_EBUR128_ADD_FRAMES(double) |
521 | |
522 | static int ebur128_calc_relative_threshold(FFEBUR128State **sts, size_t size, |
523 | double *relative_threshold) |
524 | { |
525 | size_t i, j; |
526 | int above_thresh_counter = 0; |
527 | *relative_threshold = 0.0; |
528 | |
529 | for (i = 0; i < size; i++) { |
530 | unsigned long *block_energy_histogram = sts[i]->d->block_energy_histogram; |
531 | for (j = 0; j < 1000; ++j) { |
532 | *relative_threshold += block_energy_histogram[j] * histogram_energies[j]; |
533 | above_thresh_counter += block_energy_histogram[j]; |
534 | } |
535 | } |
536 | |
537 | if (above_thresh_counter != 0) { |
538 | *relative_threshold /= (double)above_thresh_counter; |
539 | *relative_threshold *= RELATIVE_GATE_FACTOR; |
540 | } |
541 | |
542 | return above_thresh_counter; |
543 | } |
544 | |
545 | static int ebur128_gated_loudness(FFEBUR128State ** sts, size_t size, |
546 | double *out) |
547 | { |
548 | double gated_loudness = 0.0; |
549 | double relative_threshold; |
550 | size_t above_thresh_counter; |
551 | size_t i, j, start_index; |
552 | |
553 | for (i = 0; i < size; i++) |
554 | if ((sts[i]->mode & FF_EBUR128_MODE_I) != FF_EBUR128_MODE_I) |
555 | return AVERROR(EINVAL); |
556 | |
557 | if (!ebur128_calc_relative_threshold(sts, size, &relative_threshold)) { |
558 | *out = -HUGE_VAL; |
559 | return 0; |
560 | } |
561 | |
562 | above_thresh_counter = 0; |
563 | if (relative_threshold < histogram_energy_boundaries[0]) { |
564 | start_index = 0; |
565 | } else { |
566 | start_index = find_histogram_index(relative_threshold); |
567 | if (relative_threshold > histogram_energies[start_index]) { |
568 | ++start_index; |
569 | } |
570 | } |
571 | for (i = 0; i < size; i++) { |
572 | for (j = start_index; j < 1000; ++j) { |
573 | gated_loudness += sts[i]->d->block_energy_histogram[j] * |
574 | histogram_energies[j]; |
575 | above_thresh_counter += sts[i]->d->block_energy_histogram[j]; |
576 | } |
577 | } |
578 | if (!above_thresh_counter) { |
579 | *out = -HUGE_VAL; |
580 | return 0; |
581 | } |
582 | gated_loudness /= (double) above_thresh_counter; |
583 | *out = ebur128_energy_to_loudness(gated_loudness); |
584 | return 0; |
585 | } |
586 | |
587 | int ff_ebur128_relative_threshold(FFEBUR128State * st, double *out) |
588 | { |
589 | double relative_threshold; |
590 | |
591 | if ((st->mode & FF_EBUR128_MODE_I) != FF_EBUR128_MODE_I) |
592 | return AVERROR(EINVAL); |
593 | |
594 | if (!ebur128_calc_relative_threshold(&st, 1, &relative_threshold)) { |
595 | *out = -70.0; |
596 | return 0; |
597 | } |
598 | |
599 | *out = ebur128_energy_to_loudness(relative_threshold); |
600 | return 0; |
601 | } |
602 | |
603 | int ff_ebur128_loudness_global(FFEBUR128State * st, double *out) |
604 | { |
605 | return ebur128_gated_loudness(&st, 1, out); |
606 | } |
607 | |
608 | int ff_ebur128_loudness_global_multiple(FFEBUR128State ** sts, size_t size, |
609 | double *out) |
610 | { |
611 | return ebur128_gated_loudness(sts, size, out); |
612 | } |
613 | |
614 | static int ebur128_energy_in_interval(FFEBUR128State * st, |
615 | size_t interval_frames, double *out) |
616 | { |
617 | if (interval_frames > st->d->audio_data_frames) { |
618 | return AVERROR(EINVAL); |
619 | } |
620 | ebur128_calc_gating_block(st, interval_frames, out); |
621 | return 0; |
622 | } |
623 | |
624 | static int ebur128_energy_shortterm(FFEBUR128State * st, double *out) |
625 | { |
626 | return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, |
627 | out); |
628 | } |
629 | |
630 | int ff_ebur128_loudness_momentary(FFEBUR128State * st, double *out) |
631 | { |
632 | double energy; |
633 | int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4, |
634 | &energy); |
635 | if (error) { |
636 | return error; |
637 | } else if (energy <= 0.0) { |
638 | *out = -HUGE_VAL; |
639 | return 0; |
640 | } |
641 | *out = ebur128_energy_to_loudness(energy); |
642 | return 0; |
643 | } |
644 | |
645 | int ff_ebur128_loudness_shortterm(FFEBUR128State * st, double *out) |
646 | { |
647 | double energy; |
648 | int error = ebur128_energy_shortterm(st, &energy); |
649 | if (error) { |
650 | return error; |
651 | } else if (energy <= 0.0) { |
652 | *out = -HUGE_VAL; |
653 | return 0; |
654 | } |
655 | *out = ebur128_energy_to_loudness(energy); |
656 | return 0; |
657 | } |
658 | |
659 | int ff_ebur128_loudness_window(FFEBUR128State * st, |
660 | unsigned long window, double *out) |
661 | { |
662 | double energy; |
663 | size_t interval_frames = st->samplerate * window / 1000; |
664 | int error = ebur128_energy_in_interval(st, interval_frames, &energy); |
665 | if (error) { |
666 | return error; |
667 | } else if (energy <= 0.0) { |
668 | *out = -HUGE_VAL; |
669 | return 0; |
670 | } |
671 | *out = ebur128_energy_to_loudness(energy); |
672 | return 0; |
673 | } |
674 | |
675 | /* EBU - TECH 3342 */ |
676 | int ff_ebur128_loudness_range_multiple(FFEBUR128State ** sts, size_t size, |
677 | double *out) |
678 | { |
679 | size_t i, j; |
680 | size_t stl_size; |
681 | double stl_power, stl_integrated; |
682 | /* High and low percentile energy */ |
683 | double h_en, l_en; |
684 | unsigned long hist[1000] = { 0 }; |
685 | size_t percentile_low, percentile_high; |
686 | size_t index; |
687 | |
688 | for (i = 0; i < size; ++i) { |
689 | if (sts[i]) { |
690 | if ((sts[i]->mode & FF_EBUR128_MODE_LRA) != |
691 | FF_EBUR128_MODE_LRA) { |
692 | return AVERROR(EINVAL); |
693 | } |
694 | } |
695 | } |
696 | |
697 | stl_size = 0; |
698 | stl_power = 0.0; |
699 | for (i = 0; i < size; ++i) { |
700 | if (!sts[i]) |
701 | continue; |
702 | for (j = 0; j < 1000; ++j) { |
703 | hist[j] += sts[i]->d->short_term_block_energy_histogram[j]; |
704 | stl_size += sts[i]->d->short_term_block_energy_histogram[j]; |
705 | stl_power += sts[i]->d->short_term_block_energy_histogram[j] |
706 | * histogram_energies[j]; |
707 | } |
708 | } |
709 | if (!stl_size) { |
710 | *out = 0.0; |
711 | return 0; |
712 | } |
713 | |
714 | stl_power /= stl_size; |
715 | stl_integrated = MINUS_20DB * stl_power; |
716 | |
717 | if (stl_integrated < histogram_energy_boundaries[0]) { |
718 | index = 0; |
719 | } else { |
720 | index = find_histogram_index(stl_integrated); |
721 | if (stl_integrated > histogram_energies[index]) { |
722 | ++index; |
723 | } |
724 | } |
725 | stl_size = 0; |
726 | for (j = index; j < 1000; ++j) { |
727 | stl_size += hist[j]; |
728 | } |
729 | if (!stl_size) { |
730 | *out = 0.0; |
731 | return 0; |
732 | } |
733 | |
734 | percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5); |
735 | percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5); |
736 | |
737 | stl_size = 0; |
738 | j = index; |
739 | while (stl_size <= percentile_low) { |
740 | stl_size += hist[j++]; |
741 | } |
742 | l_en = histogram_energies[j - 1]; |
743 | while (stl_size <= percentile_high) { |
744 | stl_size += hist[j++]; |
745 | } |
746 | h_en = histogram_energies[j - 1]; |
747 | *out = |
748 | ebur128_energy_to_loudness(h_en) - |
749 | ebur128_energy_to_loudness(l_en); |
750 | return 0; |
751 | } |
752 | |
753 | int ff_ebur128_loudness_range(FFEBUR128State * st, double *out) |
754 | { |
755 | return ff_ebur128_loudness_range_multiple(&st, 1, out); |
756 | } |
757 | |
758 | int ff_ebur128_sample_peak(FFEBUR128State * st, |
759 | unsigned int channel_number, double *out) |
760 | { |
761 | if ((st->mode & FF_EBUR128_MODE_SAMPLE_PEAK) != |
762 | FF_EBUR128_MODE_SAMPLE_PEAK) { |
763 | return AVERROR(EINVAL); |
764 | } else if (channel_number >= st->channels) { |
765 | return AVERROR(EINVAL); |
766 | } |
767 | *out = st->d->sample_peak[channel_number]; |
768 | return 0; |
769 | } |
770 |