summaryrefslogtreecommitdiff
path: root/libavcodec/aacdec.c (plain)
blob: 726ea03dc42c8475d9eea36526f3ddaecee0fcc6
1/*
2 * AAC decoder
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6 *
7 * AAC LATM decoder
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
10 *
11 * This file is part of FFmpeg.
12 *
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 */
27
28/**
29 * @file
30 * AAC decoder
31 * @author Oded Shimon ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33 */
34
35#define FFT_FLOAT 1
36#define FFT_FIXED_32 0
37#define USE_FIXED 0
38
39#include "libavutil/float_dsp.h"
40#include "libavutil/opt.h"
41#include "avcodec.h"
42#include "internal.h"
43#include "get_bits.h"
44#include "fft.h"
45#include "mdct15.h"
46#include "lpc.h"
47#include "kbdwin.h"
48#include "sinewin.h"
49
50#include "aac.h"
51#include "aactab.h"
52#include "aacdectab.h"
53#include "cbrt_data.h"
54#include "sbr.h"
55#include "aacsbr.h"
56#include "mpeg4audio.h"
57#include "aacadtsdec.h"
58#include "profiles.h"
59#include "libavutil/intfloat.h"
60
61#include <errno.h>
62#include <math.h>
63#include <stdint.h>
64#include <string.h>
65
66#if ARCH_ARM
67# include "arm/aac.h"
68#elif ARCH_MIPS
69# include "mips/aacdec_mips.h"
70#endif
71
72static av_always_inline void reset_predict_state(PredictorState *ps)
73{
74 ps->r0 = 0.0f;
75 ps->r1 = 0.0f;
76 ps->cor0 = 0.0f;
77 ps->cor1 = 0.0f;
78 ps->var0 = 1.0f;
79 ps->var1 = 1.0f;
80}
81
82#ifndef VMUL2
83static inline float *VMUL2(float *dst, const float *v, unsigned idx,
84 const float *scale)
85{
86 float s = *scale;
87 *dst++ = v[idx & 15] * s;
88 *dst++ = v[idx>>4 & 15] * s;
89 return dst;
90}
91#endif
92
93#ifndef VMUL4
94static inline float *VMUL4(float *dst, const float *v, unsigned idx,
95 const float *scale)
96{
97 float s = *scale;
98 *dst++ = v[idx & 3] * s;
99 *dst++ = v[idx>>2 & 3] * s;
100 *dst++ = v[idx>>4 & 3] * s;
101 *dst++ = v[idx>>6 & 3] * s;
102 return dst;
103}
104#endif
105
106#ifndef VMUL2S
107static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
108 unsigned sign, const float *scale)
109{
110 union av_intfloat32 s0, s1;
111
112 s0.f = s1.f = *scale;
113 s0.i ^= sign >> 1 << 31;
114 s1.i ^= sign << 31;
115
116 *dst++ = v[idx & 15] * s0.f;
117 *dst++ = v[idx>>4 & 15] * s1.f;
118
119 return dst;
120}
121#endif
122
123#ifndef VMUL4S
124static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
125 unsigned sign, const float *scale)
126{
127 unsigned nz = idx >> 12;
128 union av_intfloat32 s = { .f = *scale };
129 union av_intfloat32 t;
130
131 t.i = s.i ^ (sign & 1U<<31);
132 *dst++ = v[idx & 3] * t.f;
133
134 sign <<= nz & 1; nz >>= 1;
135 t.i = s.i ^ (sign & 1U<<31);
136 *dst++ = v[idx>>2 & 3] * t.f;
137
138 sign <<= nz & 1; nz >>= 1;
139 t.i = s.i ^ (sign & 1U<<31);
140 *dst++ = v[idx>>4 & 3] * t.f;
141
142 sign <<= nz & 1;
143 t.i = s.i ^ (sign & 1U<<31);
144 *dst++ = v[idx>>6 & 3] * t.f;
145
146 return dst;
147}
148#endif
149
150static av_always_inline float flt16_round(float pf)
151{
152 union av_intfloat32 tmp;
153 tmp.f = pf;
154 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
155 return tmp.f;
156}
157
158static av_always_inline float flt16_even(float pf)
159{
160 union av_intfloat32 tmp;
161 tmp.f = pf;
162 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
163 return tmp.f;
164}
165
166static av_always_inline float flt16_trunc(float pf)
167{
168 union av_intfloat32 pun;
169 pun.f = pf;
170 pun.i &= 0xFFFF0000U;
171 return pun.f;
172}
173
174static av_always_inline void predict(PredictorState *ps, float *coef,
175 int output_enable)
176{
177 const float a = 0.953125; // 61.0 / 64
178 const float alpha = 0.90625; // 29.0 / 32
179 float e0, e1;
180 float pv;
181 float k1, k2;
182 float r0 = ps->r0, r1 = ps->r1;
183 float cor0 = ps->cor0, cor1 = ps->cor1;
184 float var0 = ps->var0, var1 = ps->var1;
185
186 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
187 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
188
189 pv = flt16_round(k1 * r0 + k2 * r1);
190 if (output_enable)
191 *coef += pv;
192
193 e0 = *coef;
194 e1 = e0 - k1 * r0;
195
196 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
197 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
198 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
199 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
200
201 ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
202 ps->r0 = flt16_trunc(a * e0);
203}
204
205/**
206 * Apply dependent channel coupling (applied before IMDCT).
207 *
208 * @param index index into coupling gain array
209 */
210static void apply_dependent_coupling(AACContext *ac,
211 SingleChannelElement *target,
212 ChannelElement *cce, int index)
213{
214 IndividualChannelStream *ics = &cce->ch[0].ics;
215 const uint16_t *offsets = ics->swb_offset;
216 float *dest = target->coeffs;
217 const float *src = cce->ch[0].coeffs;
218 int g, i, group, k, idx = 0;
219 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
220 av_log(ac->avctx, AV_LOG_ERROR,
221 "Dependent coupling is not supported together with LTP\n");
222 return;
223 }
224 for (g = 0; g < ics->num_window_groups; g++) {
225 for (i = 0; i < ics->max_sfb; i++, idx++) {
226 if (cce->ch[0].band_type[idx] != ZERO_BT) {
227 const float gain = cce->coup.gain[index][idx];
228 for (group = 0; group < ics->group_len[g]; group++) {
229 for (k = offsets[i]; k < offsets[i + 1]; k++) {
230 // FIXME: SIMDify
231 dest[group * 128 + k] += gain * src[group * 128 + k];
232 }
233 }
234 }
235 }
236 dest += ics->group_len[g] * 128;
237 src += ics->group_len[g] * 128;
238 }
239}
240
241/**
242 * Apply independent channel coupling (applied after IMDCT).
243 *
244 * @param index index into coupling gain array
245 */
246static void apply_independent_coupling(AACContext *ac,
247 SingleChannelElement *target,
248 ChannelElement *cce, int index)
249{
250 int i;
251 const float gain = cce->coup.gain[index][0];
252 const float *src = cce->ch[0].ret;
253 float *dest = target->ret;
254 const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
255
256 for (i = 0; i < len; i++)
257 dest[i] += gain * src[i];
258}
259
260#include "aacdec_template.c"
261
262#define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
263
264struct LATMContext {
265 AACContext aac_ctx; ///< containing AACContext
266 int initialized; ///< initialized after a valid extradata was seen
267
268 // parser data
269 int audio_mux_version_A; ///< LATM syntax version
270 int frame_length_type; ///< 0/1 variable/fixed frame length
271 int frame_length; ///< frame length for fixed frame length
272};
273
274static inline uint32_t latm_get_value(GetBitContext *b)
275{
276 int length = get_bits(b, 2);
277
278 return get_bits_long(b, (length+1)*8);
279}
280
281static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
282 GetBitContext *gb, int asclen)
283{
284 AACContext *ac = &latmctx->aac_ctx;
285 AVCodecContext *avctx = ac->avctx;
286 MPEG4AudioConfig m4ac = { 0 };
287 GetBitContext gbc;
288 int config_start_bit = get_bits_count(gb);
289 int sync_extension = 0;
290 int bits_consumed, esize, i;
291
292 if (asclen > 0) {
293 sync_extension = 1;
294 asclen = FFMIN(asclen, get_bits_left(gb));
295 init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
296 skip_bits_long(&gbc, config_start_bit);
297 } else if (asclen == 0) {
298 gbc = *gb;
299 } else {
300 return AVERROR_INVALIDDATA;
301 }
302
303 if (get_bits_left(gb) <= 0)
304 return AVERROR_INVALIDDATA;
305
306 bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
307 &gbc, config_start_bit,
308 sync_extension);
309
310 if (bits_consumed < config_start_bit)
311 return AVERROR_INVALIDDATA;
312 bits_consumed -= config_start_bit;
313
314 if (asclen == 0)
315 asclen = bits_consumed;
316
317 if (!latmctx->initialized ||
318 ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
319 ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
320
321 if(latmctx->initialized) {
322 av_log(avctx, AV_LOG_INFO, "audio config changed\n");
323 } else {
324 av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
325 }
326 latmctx->initialized = 0;
327
328 esize = (asclen + 7) / 8;
329
330 if (avctx->extradata_size < esize) {
331 av_free(avctx->extradata);
332 avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
333 if (!avctx->extradata)
334 return AVERROR(ENOMEM);
335 }
336
337 avctx->extradata_size = esize;
338 gbc = *gb;
339 for (i = 0; i < esize; i++) {
340 avctx->extradata[i] = get_bits(&gbc, 8);
341 }
342 memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
343 }
344 skip_bits_long(gb, asclen);
345
346 return 0;
347}
348
349static int read_stream_mux_config(struct LATMContext *latmctx,
350 GetBitContext *gb)
351{
352 int ret, audio_mux_version = get_bits(gb, 1);
353
354 latmctx->audio_mux_version_A = 0;
355 if (audio_mux_version)
356 latmctx->audio_mux_version_A = get_bits(gb, 1);
357
358 if (!latmctx->audio_mux_version_A) {
359
360 if (audio_mux_version)
361 latm_get_value(gb); // taraFullness
362
363 skip_bits(gb, 1); // allStreamSameTimeFraming
364 skip_bits(gb, 6); // numSubFrames
365 // numPrograms
366 if (get_bits(gb, 4)) { // numPrograms
367 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
368 return AVERROR_PATCHWELCOME;
369 }
370
371 // for each program (which there is only one in DVB)
372
373 // for each layer (which there is only one in DVB)
374 if (get_bits(gb, 3)) { // numLayer
375 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
376 return AVERROR_PATCHWELCOME;
377 }
378
379 // for all but first stream: use_same_config = get_bits(gb, 1);
380 if (!audio_mux_version) {
381 if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
382 return ret;
383 } else {
384 int ascLen = latm_get_value(gb);
385 if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
386 return ret;
387 }
388
389 latmctx->frame_length_type = get_bits(gb, 3);
390 switch (latmctx->frame_length_type) {
391 case 0:
392 skip_bits(gb, 8); // latmBufferFullness
393 break;
394 case 1:
395 latmctx->frame_length = get_bits(gb, 9);
396 break;
397 case 3:
398 case 4:
399 case 5:
400 skip_bits(gb, 6); // CELP frame length table index
401 break;
402 case 6:
403 case 7:
404 skip_bits(gb, 1); // HVXC frame length table index
405 break;
406 }
407
408 if (get_bits(gb, 1)) { // other data
409 if (audio_mux_version) {
410 latm_get_value(gb); // other_data_bits
411 } else {
412 int esc;
413 do {
414 esc = get_bits(gb, 1);
415 skip_bits(gb, 8);
416 } while (esc);
417 }
418 }
419
420 if (get_bits(gb, 1)) // crc present
421 skip_bits(gb, 8); // config_crc
422 }
423
424 return 0;
425}
426
427static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
428{
429 uint8_t tmp;
430
431 if (ctx->frame_length_type == 0) {
432 int mux_slot_length = 0;
433 do {
434 tmp = get_bits(gb, 8);
435 mux_slot_length += tmp;
436 } while (tmp == 255);
437 return mux_slot_length;
438 } else if (ctx->frame_length_type == 1) {
439 return ctx->frame_length;
440 } else if (ctx->frame_length_type == 3 ||
441 ctx->frame_length_type == 5 ||
442 ctx->frame_length_type == 7) {
443 skip_bits(gb, 2); // mux_slot_length_coded
444 }
445 return 0;
446}
447
448static int read_audio_mux_element(struct LATMContext *latmctx,
449 GetBitContext *gb)
450{
451 int err;
452 uint8_t use_same_mux = get_bits(gb, 1);
453 if (!use_same_mux) {
454 if ((err = read_stream_mux_config(latmctx, gb)) < 0)
455 return err;
456 } else if (!latmctx->aac_ctx.avctx->extradata) {
457 av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
458 "no decoder config found\n");
459 return AVERROR(EAGAIN);
460 }
461 if (latmctx->audio_mux_version_A == 0) {
462 int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
463 if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
464 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
465 return AVERROR_INVALIDDATA;
466 } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
467 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
468 "frame length mismatch %d << %d\n",
469 mux_slot_length_bytes * 8, get_bits_left(gb));
470 return AVERROR_INVALIDDATA;
471 }
472 }
473 return 0;
474}
475
476
477static int latm_decode_frame(AVCodecContext *avctx, void *out,
478 int *got_frame_ptr, AVPacket *avpkt)
479{
480 struct LATMContext *latmctx = avctx->priv_data;
481 int muxlength, err;
482 GetBitContext gb;
483
484 if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
485 return err;
486
487 // check for LOAS sync word
488 if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
489 return AVERROR_INVALIDDATA;
490
491 muxlength = get_bits(&gb, 13) + 3;
492 // not enough data, the parser should have sorted this out
493 if (muxlength > avpkt->size)
494 return AVERROR_INVALIDDATA;
495
496 if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
497 return err;
498
499 if (!latmctx->initialized) {
500 if (!avctx->extradata) {
501 *got_frame_ptr = 0;
502 return avpkt->size;
503 } else {
504 push_output_configuration(&latmctx->aac_ctx);
505 if ((err = decode_audio_specific_config(
506 &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
507 avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
508 pop_output_configuration(&latmctx->aac_ctx);
509 return err;
510 }
511 latmctx->initialized = 1;
512 }
513 }
514
515 if (show_bits(&gb, 12) == 0xfff) {
516 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
517 "ADTS header detected, probably as result of configuration "
518 "misparsing\n");
519 return AVERROR_INVALIDDATA;
520 }
521
522 switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
523 case AOT_ER_AAC_LC:
524 case AOT_ER_AAC_LTP:
525 case AOT_ER_AAC_LD:
526 case AOT_ER_AAC_ELD:
527 err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
528 break;
529 default:
530 err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
531 }
532 if (err < 0)
533 return err;
534
535 return muxlength;
536}
537
538static av_cold int latm_decode_init(AVCodecContext *avctx)
539{
540 struct LATMContext *latmctx = avctx->priv_data;
541 int ret = aac_decode_init(avctx);
542
543 if (avctx->extradata_size > 0)
544 latmctx->initialized = !ret;
545
546 return ret;
547}
548
549AVCodec ff_aac_decoder = {
550 .name = "aac",
551 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
552 .type = AVMEDIA_TYPE_AUDIO,
553 .id = AV_CODEC_ID_AAC,
554 .priv_data_size = sizeof(AACContext),
555 .init = aac_decode_init,
556 .close = aac_decode_close,
557 .decode = aac_decode_frame,
558 .sample_fmts = (const enum AVSampleFormat[]) {
559 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
560 },
561 .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
562 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
563 .channel_layouts = aac_channel_layout,
564 .flush = flush,
565 .priv_class = &aac_decoder_class,
566 .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
567};
568
569/*
570 Note: This decoder filter is intended to decode LATM streams transferred
571 in MPEG transport streams which only contain one program.
572 To do a more complex LATM demuxing a separate LATM demuxer should be used.
573*/
574AVCodec ff_aac_latm_decoder = {
575 .name = "aac_latm",
576 .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
577 .type = AVMEDIA_TYPE_AUDIO,
578 .id = AV_CODEC_ID_AAC_LATM,
579 .priv_data_size = sizeof(struct LATMContext),
580 .init = latm_decode_init,
581 .close = aac_decode_close,
582 .decode = latm_decode_frame,
583 .sample_fmts = (const enum AVSampleFormat[]) {
584 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
585 },
586 .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
587 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
588 .channel_layouts = aac_channel_layout,
589 .flush = flush,
590 .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
591};
592