blob: 6cdcdf19644e8f613a9aa3e534dd804126df50a6
1 | /* |
2 | * ATRAC3 compatible decoder |
3 | * Copyright (c) 2006-2008 Maxim Poliakovski |
4 | * Copyright (c) 2006-2008 Benjamin Larsson |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | /** |
24 | * @file |
25 | * ATRAC3 compatible decoder. |
26 | * This decoder handles Sony's ATRAC3 data. |
27 | * |
28 | * Container formats used to store ATRAC3 data: |
29 | * RealMedia (.rm), RIFF WAV (.wav, .at3), Sony OpenMG (.oma, .aa3). |
30 | * |
31 | * To use this decoder, a calling application must supply the extradata |
32 | * bytes provided in the containers above. |
33 | */ |
34 | |
35 | #include <math.h> |
36 | #include <stddef.h> |
37 | #include <stdio.h> |
38 | |
39 | #include "libavutil/attributes.h" |
40 | #include "libavutil/float_dsp.h" |
41 | #include "libavutil/libm.h" |
42 | #include "avcodec.h" |
43 | #include "bytestream.h" |
44 | #include "fft.h" |
45 | #include "get_bits.h" |
46 | #include "internal.h" |
47 | |
48 | #include "atrac.h" |
49 | #include "atrac3data.h" |
50 | |
51 | #define MIN_CHANNELS 1 |
52 | #define MAX_CHANNELS 8 |
53 | #define MAX_JS_PAIRS 8 / 2 |
54 | |
55 | #define JOINT_STEREO 0x12 |
56 | #define SINGLE 0x2 |
57 | |
58 | #define SAMPLES_PER_FRAME 1024 |
59 | #define MDCT_SIZE 512 |
60 | |
61 | typedef struct GainBlock { |
62 | AtracGainInfo g_block[4]; |
63 | } GainBlock; |
64 | |
65 | typedef struct TonalComponent { |
66 | int pos; |
67 | int num_coefs; |
68 | float coef[8]; |
69 | } TonalComponent; |
70 | |
71 | typedef struct ChannelUnit { |
72 | int bands_coded; |
73 | int num_components; |
74 | float prev_frame[SAMPLES_PER_FRAME]; |
75 | int gc_blk_switch; |
76 | TonalComponent components[64]; |
77 | GainBlock gain_block[2]; |
78 | |
79 | DECLARE_ALIGNED(32, float, spectrum)[SAMPLES_PER_FRAME]; |
80 | DECLARE_ALIGNED(32, float, imdct_buf)[SAMPLES_PER_FRAME]; |
81 | |
82 | float delay_buf1[46]; ///<qmf delay buffers |
83 | float delay_buf2[46]; |
84 | float delay_buf3[46]; |
85 | } ChannelUnit; |
86 | |
87 | typedef struct ATRAC3Context { |
88 | GetBitContext gb; |
89 | //@{ |
90 | /** stream data */ |
91 | int coding_mode; |
92 | |
93 | ChannelUnit *units; |
94 | //@} |
95 | //@{ |
96 | /** joint-stereo related variables */ |
97 | int matrix_coeff_index_prev[MAX_JS_PAIRS][4]; |
98 | int matrix_coeff_index_now[MAX_JS_PAIRS][4]; |
99 | int matrix_coeff_index_next[MAX_JS_PAIRS][4]; |
100 | int weighting_delay[MAX_JS_PAIRS][6]; |
101 | //@} |
102 | //@{ |
103 | /** data buffers */ |
104 | uint8_t *decoded_bytes_buffer; |
105 | float temp_buf[1070]; |
106 | //@} |
107 | //@{ |
108 | /** extradata */ |
109 | int scrambled_stream; |
110 | //@} |
111 | |
112 | AtracGCContext gainc_ctx; |
113 | FFTContext mdct_ctx; |
114 | AVFloatDSPContext *fdsp; |
115 | } ATRAC3Context; |
116 | |
117 | static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE]; |
118 | static VLC_TYPE atrac3_vlc_table[4096][2]; |
119 | static VLC spectral_coeff_tab[7]; |
120 | |
121 | /** |
122 | * Regular 512 points IMDCT without overlapping, with the exception of the |
123 | * swapping of odd bands caused by the reverse spectra of the QMF. |
124 | * |
125 | * @param odd_band 1 if the band is an odd band |
126 | */ |
127 | static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band) |
128 | { |
129 | int i; |
130 | |
131 | if (odd_band) { |
132 | /** |
133 | * Reverse the odd bands before IMDCT, this is an effect of the QMF |
134 | * transform or it gives better compression to do it this way. |
135 | * FIXME: It should be possible to handle this in imdct_calc |
136 | * for that to happen a modification of the prerotation step of |
137 | * all SIMD code and C code is needed. |
138 | * Or fix the functions before so they generate a pre reversed spectrum. |
139 | */ |
140 | for (i = 0; i < 128; i++) |
141 | FFSWAP(float, input[i], input[255 - i]); |
142 | } |
143 | |
144 | q->mdct_ctx.imdct_calc(&q->mdct_ctx, output, input); |
145 | |
146 | /* Perform windowing on the output. */ |
147 | q->fdsp->vector_fmul(output, output, mdct_window, MDCT_SIZE); |
148 | } |
149 | |
150 | /* |
151 | * indata descrambling, only used for data coming from the rm container |
152 | */ |
153 | static int decode_bytes(const uint8_t *input, uint8_t *out, int bytes) |
154 | { |
155 | int i, off; |
156 | uint32_t c; |
157 | const uint32_t *buf; |
158 | uint32_t *output = (uint32_t *)out; |
159 | |
160 | off = (intptr_t)input & 3; |
161 | buf = (const uint32_t *)(input - off); |
162 | if (off) |
163 | c = av_be2ne32((0x537F6103U >> (off * 8)) | (0x537F6103U << (32 - (off * 8)))); |
164 | else |
165 | c = av_be2ne32(0x537F6103U); |
166 | bytes += 3 + off; |
167 | for (i = 0; i < bytes / 4; i++) |
168 | output[i] = c ^ buf[i]; |
169 | |
170 | if (off) |
171 | avpriv_request_sample(NULL, "Offset of %d", off); |
172 | |
173 | return off; |
174 | } |
175 | |
176 | static av_cold void init_imdct_window(void) |
177 | { |
178 | int i, j; |
179 | |
180 | /* generate the mdct window, for details see |
181 | * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ |
182 | for (i = 0, j = 255; i < 128; i++, j--) { |
183 | float wi = sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0; |
184 | float wj = sin(((j + 0.5) / 256.0 - 0.5) * M_PI) + 1.0; |
185 | float w = 0.5 * (wi * wi + wj * wj); |
186 | mdct_window[i] = mdct_window[511 - i] = wi / w; |
187 | mdct_window[j] = mdct_window[511 - j] = wj / w; |
188 | } |
189 | } |
190 | |
191 | static av_cold int atrac3_decode_close(AVCodecContext *avctx) |
192 | { |
193 | ATRAC3Context *q = avctx->priv_data; |
194 | |
195 | av_freep(&q->units); |
196 | av_freep(&q->decoded_bytes_buffer); |
197 | av_freep(&q->fdsp); |
198 | |
199 | ff_mdct_end(&q->mdct_ctx); |
200 | |
201 | return 0; |
202 | } |
203 | |
204 | /** |
205 | * Mantissa decoding |
206 | * |
207 | * @param selector which table the output values are coded with |
208 | * @param coding_flag constant length coding or variable length coding |
209 | * @param mantissas mantissa output table |
210 | * @param num_codes number of values to get |
211 | */ |
212 | static void read_quant_spectral_coeffs(GetBitContext *gb, int selector, |
213 | int coding_flag, int *mantissas, |
214 | int num_codes) |
215 | { |
216 | int i, code, huff_symb; |
217 | |
218 | if (selector == 1) |
219 | num_codes /= 2; |
220 | |
221 | if (coding_flag != 0) { |
222 | /* constant length coding (CLC) */ |
223 | int num_bits = clc_length_tab[selector]; |
224 | |
225 | if (selector > 1) { |
226 | for (i = 0; i < num_codes; i++) { |
227 | if (num_bits) |
228 | code = get_sbits(gb, num_bits); |
229 | else |
230 | code = 0; |
231 | mantissas[i] = code; |
232 | } |
233 | } else { |
234 | for (i = 0; i < num_codes; i++) { |
235 | if (num_bits) |
236 | code = get_bits(gb, num_bits); // num_bits is always 4 in this case |
237 | else |
238 | code = 0; |
239 | mantissas[i * 2 ] = mantissa_clc_tab[code >> 2]; |
240 | mantissas[i * 2 + 1] = mantissa_clc_tab[code & 3]; |
241 | } |
242 | } |
243 | } else { |
244 | /* variable length coding (VLC) */ |
245 | if (selector != 1) { |
246 | for (i = 0; i < num_codes; i++) { |
247 | huff_symb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, |
248 | spectral_coeff_tab[selector-1].bits, 3); |
249 | huff_symb += 1; |
250 | code = huff_symb >> 1; |
251 | if (huff_symb & 1) |
252 | code = -code; |
253 | mantissas[i] = code; |
254 | } |
255 | } else { |
256 | for (i = 0; i < num_codes; i++) { |
257 | huff_symb = get_vlc2(gb, spectral_coeff_tab[selector - 1].table, |
258 | spectral_coeff_tab[selector - 1].bits, 3); |
259 | mantissas[i * 2 ] = mantissa_vlc_tab[huff_symb * 2 ]; |
260 | mantissas[i * 2 + 1] = mantissa_vlc_tab[huff_symb * 2 + 1]; |
261 | } |
262 | } |
263 | } |
264 | } |
265 | |
266 | /** |
267 | * Restore the quantized band spectrum coefficients |
268 | * |
269 | * @return subband count, fix for broken specification/files |
270 | */ |
271 | static int decode_spectrum(GetBitContext *gb, float *output) |
272 | { |
273 | int num_subbands, coding_mode, i, j, first, last, subband_size; |
274 | int subband_vlc_index[32], sf_index[32]; |
275 | int mantissas[128]; |
276 | float scale_factor; |
277 | |
278 | num_subbands = get_bits(gb, 5); // number of coded subbands |
279 | coding_mode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC |
280 | |
281 | /* get the VLC selector table for the subbands, 0 means not coded */ |
282 | for (i = 0; i <= num_subbands; i++) |
283 | subband_vlc_index[i] = get_bits(gb, 3); |
284 | |
285 | /* read the scale factor indexes from the stream */ |
286 | for (i = 0; i <= num_subbands; i++) { |
287 | if (subband_vlc_index[i] != 0) |
288 | sf_index[i] = get_bits(gb, 6); |
289 | } |
290 | |
291 | for (i = 0; i <= num_subbands; i++) { |
292 | first = subband_tab[i ]; |
293 | last = subband_tab[i + 1]; |
294 | |
295 | subband_size = last - first; |
296 | |
297 | if (subband_vlc_index[i] != 0) { |
298 | /* decode spectral coefficients for this subband */ |
299 | /* TODO: This can be done faster is several blocks share the |
300 | * same VLC selector (subband_vlc_index) */ |
301 | read_quant_spectral_coeffs(gb, subband_vlc_index[i], coding_mode, |
302 | mantissas, subband_size); |
303 | |
304 | /* decode the scale factor for this subband */ |
305 | scale_factor = ff_atrac_sf_table[sf_index[i]] * |
306 | inv_max_quant[subband_vlc_index[i]]; |
307 | |
308 | /* inverse quantize the coefficients */ |
309 | for (j = 0; first < last; first++, j++) |
310 | output[first] = mantissas[j] * scale_factor; |
311 | } else { |
312 | /* this subband was not coded, so zero the entire subband */ |
313 | memset(output + first, 0, subband_size * sizeof(*output)); |
314 | } |
315 | } |
316 | |
317 | /* clear the subbands that were not coded */ |
318 | first = subband_tab[i]; |
319 | memset(output + first, 0, (SAMPLES_PER_FRAME - first) * sizeof(*output)); |
320 | return num_subbands; |
321 | } |
322 | |
323 | /** |
324 | * Restore the quantized tonal components |
325 | * |
326 | * @param components tonal components |
327 | * @param num_bands number of coded bands |
328 | */ |
329 | static int decode_tonal_components(GetBitContext *gb, |
330 | TonalComponent *components, int num_bands) |
331 | { |
332 | int i, b, c, m; |
333 | int nb_components, coding_mode_selector, coding_mode; |
334 | int band_flags[4], mantissa[8]; |
335 | int component_count = 0; |
336 | |
337 | nb_components = get_bits(gb, 5); |
338 | |
339 | /* no tonal components */ |
340 | if (nb_components == 0) |
341 | return 0; |
342 | |
343 | coding_mode_selector = get_bits(gb, 2); |
344 | if (coding_mode_selector == 2) |
345 | return AVERROR_INVALIDDATA; |
346 | |
347 | coding_mode = coding_mode_selector & 1; |
348 | |
349 | for (i = 0; i < nb_components; i++) { |
350 | int coded_values_per_component, quant_step_index; |
351 | |
352 | for (b = 0; b <= num_bands; b++) |
353 | band_flags[b] = get_bits1(gb); |
354 | |
355 | coded_values_per_component = get_bits(gb, 3); |
356 | |
357 | quant_step_index = get_bits(gb, 3); |
358 | if (quant_step_index <= 1) |
359 | return AVERROR_INVALIDDATA; |
360 | |
361 | if (coding_mode_selector == 3) |
362 | coding_mode = get_bits1(gb); |
363 | |
364 | for (b = 0; b < (num_bands + 1) * 4; b++) { |
365 | int coded_components; |
366 | |
367 | if (band_flags[b >> 2] == 0) |
368 | continue; |
369 | |
370 | coded_components = get_bits(gb, 3); |
371 | |
372 | for (c = 0; c < coded_components; c++) { |
373 | TonalComponent *cmp = &components[component_count]; |
374 | int sf_index, coded_values, max_coded_values; |
375 | float scale_factor; |
376 | |
377 | sf_index = get_bits(gb, 6); |
378 | if (component_count >= 64) |
379 | return AVERROR_INVALIDDATA; |
380 | |
381 | cmp->pos = b * 64 + get_bits(gb, 6); |
382 | |
383 | max_coded_values = SAMPLES_PER_FRAME - cmp->pos; |
384 | coded_values = coded_values_per_component + 1; |
385 | coded_values = FFMIN(max_coded_values, coded_values); |
386 | |
387 | scale_factor = ff_atrac_sf_table[sf_index] * |
388 | inv_max_quant[quant_step_index]; |
389 | |
390 | read_quant_spectral_coeffs(gb, quant_step_index, coding_mode, |
391 | mantissa, coded_values); |
392 | |
393 | cmp->num_coefs = coded_values; |
394 | |
395 | /* inverse quant */ |
396 | for (m = 0; m < coded_values; m++) |
397 | cmp->coef[m] = mantissa[m] * scale_factor; |
398 | |
399 | component_count++; |
400 | } |
401 | } |
402 | } |
403 | |
404 | return component_count; |
405 | } |
406 | |
407 | /** |
408 | * Decode gain parameters for the coded bands |
409 | * |
410 | * @param block the gainblock for the current band |
411 | * @param num_bands amount of coded bands |
412 | */ |
413 | static int decode_gain_control(GetBitContext *gb, GainBlock *block, |
414 | int num_bands) |
415 | { |
416 | int b, j; |
417 | int *level, *loc; |
418 | |
419 | AtracGainInfo *gain = block->g_block; |
420 | |
421 | for (b = 0; b <= num_bands; b++) { |
422 | gain[b].num_points = get_bits(gb, 3); |
423 | level = gain[b].lev_code; |
424 | loc = gain[b].loc_code; |
425 | |
426 | for (j = 0; j < gain[b].num_points; j++) { |
427 | level[j] = get_bits(gb, 4); |
428 | loc[j] = get_bits(gb, 5); |
429 | if (j && loc[j] <= loc[j - 1]) |
430 | return AVERROR_INVALIDDATA; |
431 | } |
432 | } |
433 | |
434 | /* Clear the unused blocks. */ |
435 | for (; b < 4 ; b++) |
436 | gain[b].num_points = 0; |
437 | |
438 | return 0; |
439 | } |
440 | |
441 | /** |
442 | * Combine the tonal band spectrum and regular band spectrum |
443 | * |
444 | * @param spectrum output spectrum buffer |
445 | * @param num_components number of tonal components |
446 | * @param components tonal components for this band |
447 | * @return position of the last tonal coefficient |
448 | */ |
449 | static int add_tonal_components(float *spectrum, int num_components, |
450 | TonalComponent *components) |
451 | { |
452 | int i, j, last_pos = -1; |
453 | float *input, *output; |
454 | |
455 | for (i = 0; i < num_components; i++) { |
456 | last_pos = FFMAX(components[i].pos + components[i].num_coefs, last_pos); |
457 | input = components[i].coef; |
458 | output = &spectrum[components[i].pos]; |
459 | |
460 | for (j = 0; j < components[i].num_coefs; j++) |
461 | output[j] += input[j]; |
462 | } |
463 | |
464 | return last_pos; |
465 | } |
466 | |
467 | #define INTERPOLATE(old, new, nsample) \ |
468 | ((old) + (nsample) * 0.125 * ((new) - (old))) |
469 | |
470 | static void reverse_matrixing(float *su1, float *su2, int *prev_code, |
471 | int *curr_code) |
472 | { |
473 | int i, nsample, band; |
474 | float mc1_l, mc1_r, mc2_l, mc2_r; |
475 | |
476 | for (i = 0, band = 0; band < 4 * 256; band += 256, i++) { |
477 | int s1 = prev_code[i]; |
478 | int s2 = curr_code[i]; |
479 | nsample = band; |
480 | |
481 | if (s1 != s2) { |
482 | /* Selector value changed, interpolation needed. */ |
483 | mc1_l = matrix_coeffs[s1 * 2 ]; |
484 | mc1_r = matrix_coeffs[s1 * 2 + 1]; |
485 | mc2_l = matrix_coeffs[s2 * 2 ]; |
486 | mc2_r = matrix_coeffs[s2 * 2 + 1]; |
487 | |
488 | /* Interpolation is done over the first eight samples. */ |
489 | for (; nsample < band + 8; nsample++) { |
490 | float c1 = su1[nsample]; |
491 | float c2 = su2[nsample]; |
492 | c2 = c1 * INTERPOLATE(mc1_l, mc2_l, nsample - band) + |
493 | c2 * INTERPOLATE(mc1_r, mc2_r, nsample - band); |
494 | su1[nsample] = c2; |
495 | su2[nsample] = c1 * 2.0 - c2; |
496 | } |
497 | } |
498 | |
499 | /* Apply the matrix without interpolation. */ |
500 | switch (s2) { |
501 | case 0: /* M/S decoding */ |
502 | for (; nsample < band + 256; nsample++) { |
503 | float c1 = su1[nsample]; |
504 | float c2 = su2[nsample]; |
505 | su1[nsample] = c2 * 2.0; |
506 | su2[nsample] = (c1 - c2) * 2.0; |
507 | } |
508 | break; |
509 | case 1: |
510 | for (; nsample < band + 256; nsample++) { |
511 | float c1 = su1[nsample]; |
512 | float c2 = su2[nsample]; |
513 | su1[nsample] = (c1 + c2) * 2.0; |
514 | su2[nsample] = c2 * -2.0; |
515 | } |
516 | break; |
517 | case 2: |
518 | case 3: |
519 | for (; nsample < band + 256; nsample++) { |
520 | float c1 = su1[nsample]; |
521 | float c2 = su2[nsample]; |
522 | su1[nsample] = c1 + c2; |
523 | su2[nsample] = c1 - c2; |
524 | } |
525 | break; |
526 | default: |
527 | av_assert1(0); |
528 | } |
529 | } |
530 | } |
531 | |
532 | static void get_channel_weights(int index, int flag, float ch[2]) |
533 | { |
534 | if (index == 7) { |
535 | ch[0] = 1.0; |
536 | ch[1] = 1.0; |
537 | } else { |
538 | ch[0] = (index & 7) / 7.0; |
539 | ch[1] = sqrt(2 - ch[0] * ch[0]); |
540 | if (flag) |
541 | FFSWAP(float, ch[0], ch[1]); |
542 | } |
543 | } |
544 | |
545 | static void channel_weighting(float *su1, float *su2, int *p3) |
546 | { |
547 | int band, nsample; |
548 | /* w[x][y] y=0 is left y=1 is right */ |
549 | float w[2][2]; |
550 | |
551 | if (p3[1] != 7 || p3[3] != 7) { |
552 | get_channel_weights(p3[1], p3[0], w[0]); |
553 | get_channel_weights(p3[3], p3[2], w[1]); |
554 | |
555 | for (band = 256; band < 4 * 256; band += 256) { |
556 | for (nsample = band; nsample < band + 8; nsample++) { |
557 | su1[nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample - band); |
558 | su2[nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample - band); |
559 | } |
560 | for(; nsample < band + 256; nsample++) { |
561 | su1[nsample] *= w[1][0]; |
562 | su2[nsample] *= w[1][1]; |
563 | } |
564 | } |
565 | } |
566 | } |
567 | |
568 | /** |
569 | * Decode a Sound Unit |
570 | * |
571 | * @param snd the channel unit to be used |
572 | * @param output the decoded samples before IQMF in float representation |
573 | * @param channel_num channel number |
574 | * @param coding_mode the coding mode (JOINT_STEREO or single channels) |
575 | */ |
576 | static int decode_channel_sound_unit(ATRAC3Context *q, GetBitContext *gb, |
577 | ChannelUnit *snd, float *output, |
578 | int channel_num, int coding_mode) |
579 | { |
580 | int band, ret, num_subbands, last_tonal, num_bands; |
581 | GainBlock *gain1 = &snd->gain_block[ snd->gc_blk_switch]; |
582 | GainBlock *gain2 = &snd->gain_block[1 - snd->gc_blk_switch]; |
583 | |
584 | if (coding_mode == JOINT_STEREO && (channel_num % 2) == 1) { |
585 | if (get_bits(gb, 2) != 3) { |
586 | av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); |
587 | return AVERROR_INVALIDDATA; |
588 | } |
589 | } else { |
590 | if (get_bits(gb, 6) != 0x28) { |
591 | av_log(NULL,AV_LOG_ERROR,"Sound Unit id != 0x28.\n"); |
592 | return AVERROR_INVALIDDATA; |
593 | } |
594 | } |
595 | |
596 | /* number of coded QMF bands */ |
597 | snd->bands_coded = get_bits(gb, 2); |
598 | |
599 | ret = decode_gain_control(gb, gain2, snd->bands_coded); |
600 | if (ret) |
601 | return ret; |
602 | |
603 | snd->num_components = decode_tonal_components(gb, snd->components, |
604 | snd->bands_coded); |
605 | if (snd->num_components < 0) |
606 | return snd->num_components; |
607 | |
608 | num_subbands = decode_spectrum(gb, snd->spectrum); |
609 | |
610 | /* Merge the decoded spectrum and tonal components. */ |
611 | last_tonal = add_tonal_components(snd->spectrum, snd->num_components, |
612 | snd->components); |
613 | |
614 | |
615 | /* calculate number of used MLT/QMF bands according to the amount of coded |
616 | spectral lines */ |
617 | num_bands = (subband_tab[num_subbands] - 1) >> 8; |
618 | if (last_tonal >= 0) |
619 | num_bands = FFMAX((last_tonal + 256) >> 8, num_bands); |
620 | |
621 | |
622 | /* Reconstruct time domain samples. */ |
623 | for (band = 0; band < 4; band++) { |
624 | /* Perform the IMDCT step without overlapping. */ |
625 | if (band <= num_bands) |
626 | imlt(q, &snd->spectrum[band * 256], snd->imdct_buf, band & 1); |
627 | else |
628 | memset(snd->imdct_buf, 0, 512 * sizeof(*snd->imdct_buf)); |
629 | |
630 | /* gain compensation and overlapping */ |
631 | ff_atrac_gain_compensation(&q->gainc_ctx, snd->imdct_buf, |
632 | &snd->prev_frame[band * 256], |
633 | &gain1->g_block[band], &gain2->g_block[band], |
634 | 256, &output[band * 256]); |
635 | } |
636 | |
637 | /* Swap the gain control buffers for the next frame. */ |
638 | snd->gc_blk_switch ^= 1; |
639 | |
640 | return 0; |
641 | } |
642 | |
643 | static int decode_frame(AVCodecContext *avctx, const uint8_t *databuf, |
644 | float **out_samples) |
645 | { |
646 | ATRAC3Context *q = avctx->priv_data; |
647 | int ret, i, ch; |
648 | uint8_t *ptr1; |
649 | |
650 | if (q->coding_mode == JOINT_STEREO) { |
651 | /* channel coupling mode */ |
652 | |
653 | /* Decode sound unit pairs (channels are expected to be even). |
654 | * Multichannel joint stereo interleaves pairs (6ch: 2ch + 2ch + 2ch) */ |
655 | const uint8_t *js_databuf; |
656 | int js_pair, js_block_align; |
657 | |
658 | js_block_align = (avctx->block_align / avctx->channels) * 2; /* block pair */ |
659 | |
660 | for (ch = 0; ch < avctx->channels; ch = ch + 2) { |
661 | js_pair = ch/2; |
662 | js_databuf = databuf + js_pair * js_block_align; /* align to current pair */ |
663 | |
664 | /* Set the bitstream reader at the start of first channel sound unit. */ |
665 | init_get_bits(&q->gb, |
666 | js_databuf, js_block_align * 8); |
667 | |
668 | /* decode Sound Unit 1 */ |
669 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[ch], |
670 | out_samples[ch], ch, JOINT_STEREO); |
671 | if (ret != 0) |
672 | return ret; |
673 | |
674 | /* Framedata of the su2 in the joint-stereo mode is encoded in |
675 | * reverse byte order so we need to swap it first. */ |
676 | if (js_databuf == q->decoded_bytes_buffer) { |
677 | uint8_t *ptr2 = q->decoded_bytes_buffer + js_block_align - 1; |
678 | ptr1 = q->decoded_bytes_buffer; |
679 | for (i = 0; i < js_block_align / 2; i++, ptr1++, ptr2--) |
680 | FFSWAP(uint8_t, *ptr1, *ptr2); |
681 | } else { |
682 | const uint8_t *ptr2 = js_databuf + js_block_align - 1; |
683 | for (i = 0; i < js_block_align; i++) |
684 | q->decoded_bytes_buffer[i] = *ptr2--; |
685 | } |
686 | |
687 | /* Skip the sync codes (0xF8). */ |
688 | ptr1 = q->decoded_bytes_buffer; |
689 | for (i = 4; *ptr1 == 0xF8; i++, ptr1++) { |
690 | if (i >= js_block_align) |
691 | return AVERROR_INVALIDDATA; |
692 | } |
693 | |
694 | |
695 | /* set the bitstream reader at the start of the second Sound Unit */ |
696 | ret = init_get_bits8(&q->gb, |
697 | ptr1, q->decoded_bytes_buffer + js_block_align - ptr1); |
698 | if (ret < 0) |
699 | return ret; |
700 | |
701 | /* Fill the Weighting coeffs delay buffer */ |
702 | memmove(q->weighting_delay[js_pair], &q->weighting_delay[js_pair][2], |
703 | 4 * sizeof(*q->weighting_delay[js_pair])); |
704 | q->weighting_delay[js_pair][4] = get_bits1(&q->gb); |
705 | q->weighting_delay[js_pair][5] = get_bits(&q->gb, 3); |
706 | |
707 | for (i = 0; i < 4; i++) { |
708 | q->matrix_coeff_index_prev[js_pair][i] = q->matrix_coeff_index_now[js_pair][i]; |
709 | q->matrix_coeff_index_now[js_pair][i] = q->matrix_coeff_index_next[js_pair][i]; |
710 | q->matrix_coeff_index_next[js_pair][i] = get_bits(&q->gb, 2); |
711 | } |
712 | |
713 | /* Decode Sound Unit 2. */ |
714 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[ch+1], |
715 | out_samples[ch+1], ch+1, JOINT_STEREO); |
716 | if (ret != 0) |
717 | return ret; |
718 | |
719 | /* Reconstruct the channel coefficients. */ |
720 | reverse_matrixing(out_samples[ch], out_samples[ch+1], |
721 | q->matrix_coeff_index_prev[js_pair], |
722 | q->matrix_coeff_index_now[js_pair]); |
723 | |
724 | channel_weighting(out_samples[ch], out_samples[ch+1], q->weighting_delay[js_pair]); |
725 | } |
726 | } else { |
727 | /* single channels */ |
728 | /* Decode the channel sound units. */ |
729 | for (i = 0; i < avctx->channels; i++) { |
730 | /* Set the bitstream reader at the start of a channel sound unit. */ |
731 | init_get_bits(&q->gb, |
732 | databuf + i * avctx->block_align / avctx->channels, |
733 | avctx->block_align * 8 / avctx->channels); |
734 | |
735 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[i], |
736 | out_samples[i], i, q->coding_mode); |
737 | if (ret != 0) |
738 | return ret; |
739 | } |
740 | } |
741 | |
742 | /* Apply the iQMF synthesis filter. */ |
743 | for (i = 0; i < avctx->channels; i++) { |
744 | float *p1 = out_samples[i]; |
745 | float *p2 = p1 + 256; |
746 | float *p3 = p2 + 256; |
747 | float *p4 = p3 + 256; |
748 | ff_atrac_iqmf(p1, p2, 256, p1, q->units[i].delay_buf1, q->temp_buf); |
749 | ff_atrac_iqmf(p4, p3, 256, p3, q->units[i].delay_buf2, q->temp_buf); |
750 | ff_atrac_iqmf(p1, p3, 512, p1, q->units[i].delay_buf3, q->temp_buf); |
751 | } |
752 | |
753 | return 0; |
754 | } |
755 | |
756 | static int al_decode_frame(AVCodecContext *avctx, const uint8_t *databuf, |
757 | int size, float **out_samples) |
758 | { |
759 | ATRAC3Context *q = avctx->priv_data; |
760 | int ret, i; |
761 | |
762 | /* Set the bitstream reader at the start of a channel sound unit. */ |
763 | init_get_bits(&q->gb, databuf, size * 8); |
764 | /* single channels */ |
765 | /* Decode the channel sound units. */ |
766 | for (i = 0; i < avctx->channels; i++) { |
767 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[i], |
768 | out_samples[i], i, q->coding_mode); |
769 | if (ret != 0) |
770 | return ret; |
771 | while (i < avctx->channels && get_bits_left(&q->gb) > 6 && show_bits(&q->gb, 6) != 0x28) { |
772 | skip_bits(&q->gb, 1); |
773 | } |
774 | } |
775 | |
776 | /* Apply the iQMF synthesis filter. */ |
777 | for (i = 0; i < avctx->channels; i++) { |
778 | float *p1 = out_samples[i]; |
779 | float *p2 = p1 + 256; |
780 | float *p3 = p2 + 256; |
781 | float *p4 = p3 + 256; |
782 | ff_atrac_iqmf(p1, p2, 256, p1, q->units[i].delay_buf1, q->temp_buf); |
783 | ff_atrac_iqmf(p4, p3, 256, p3, q->units[i].delay_buf2, q->temp_buf); |
784 | ff_atrac_iqmf(p1, p3, 512, p1, q->units[i].delay_buf3, q->temp_buf); |
785 | } |
786 | |
787 | return 0; |
788 | } |
789 | |
790 | static int atrac3_decode_frame(AVCodecContext *avctx, void *data, |
791 | int *got_frame_ptr, AVPacket *avpkt) |
792 | { |
793 | AVFrame *frame = data; |
794 | const uint8_t *buf = avpkt->data; |
795 | int buf_size = avpkt->size; |
796 | ATRAC3Context *q = avctx->priv_data; |
797 | int ret; |
798 | const uint8_t *databuf; |
799 | |
800 | if (buf_size < avctx->block_align) { |
801 | av_log(avctx, AV_LOG_ERROR, |
802 | "Frame too small (%d bytes). Truncated file?\n", buf_size); |
803 | return AVERROR_INVALIDDATA; |
804 | } |
805 | |
806 | /* get output buffer */ |
807 | frame->nb_samples = SAMPLES_PER_FRAME; |
808 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
809 | return ret; |
810 | |
811 | /* Check if we need to descramble and what buffer to pass on. */ |
812 | if (q->scrambled_stream) { |
813 | decode_bytes(buf, q->decoded_bytes_buffer, avctx->block_align); |
814 | databuf = q->decoded_bytes_buffer; |
815 | } else { |
816 | databuf = buf; |
817 | } |
818 | |
819 | ret = decode_frame(avctx, databuf, (float **)frame->extended_data); |
820 | if (ret) { |
821 | av_log(avctx, AV_LOG_ERROR, "Frame decoding error!\n"); |
822 | return ret; |
823 | } |
824 | |
825 | *got_frame_ptr = 1; |
826 | |
827 | return avctx->block_align; |
828 | } |
829 | |
830 | static int atrac3al_decode_frame(AVCodecContext *avctx, void *data, |
831 | int *got_frame_ptr, AVPacket *avpkt) |
832 | { |
833 | AVFrame *frame = data; |
834 | int ret; |
835 | |
836 | frame->nb_samples = SAMPLES_PER_FRAME; |
837 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
838 | return ret; |
839 | |
840 | ret = al_decode_frame(avctx, avpkt->data, avpkt->size, |
841 | (float **)frame->extended_data); |
842 | if (ret) { |
843 | av_log(avctx, AV_LOG_ERROR, "Frame decoding error!\n"); |
844 | return ret; |
845 | } |
846 | |
847 | *got_frame_ptr = 1; |
848 | |
849 | return avpkt->size; |
850 | } |
851 | |
852 | static av_cold void atrac3_init_static_data(void) |
853 | { |
854 | int i; |
855 | |
856 | init_imdct_window(); |
857 | ff_atrac_generate_tables(); |
858 | |
859 | /* Initialize the VLC tables. */ |
860 | for (i = 0; i < 7; i++) { |
861 | spectral_coeff_tab[i].table = &atrac3_vlc_table[atrac3_vlc_offs[i]]; |
862 | spectral_coeff_tab[i].table_allocated = atrac3_vlc_offs[i + 1] - |
863 | atrac3_vlc_offs[i ]; |
864 | init_vlc(&spectral_coeff_tab[i], 9, huff_tab_sizes[i], |
865 | huff_bits[i], 1, 1, |
866 | huff_codes[i], 1, 1, INIT_VLC_USE_NEW_STATIC); |
867 | } |
868 | } |
869 | |
870 | static av_cold int atrac3_decode_init(AVCodecContext *avctx) |
871 | { |
872 | static int static_init_done; |
873 | int i, js_pair, ret; |
874 | int version, delay, samples_per_frame, frame_factor; |
875 | const uint8_t *edata_ptr = avctx->extradata; |
876 | ATRAC3Context *q = avctx->priv_data; |
877 | |
878 | if (avctx->channels < MIN_CHANNELS || avctx->channels > MAX_CHANNELS) { |
879 | av_log(avctx, AV_LOG_ERROR, "Channel configuration error!\n"); |
880 | return AVERROR(EINVAL); |
881 | } |
882 | |
883 | if (!static_init_done) |
884 | atrac3_init_static_data(); |
885 | static_init_done = 1; |
886 | |
887 | /* Take care of the codec-specific extradata. */ |
888 | if (avctx->codec_id == AV_CODEC_ID_ATRAC3AL) { |
889 | version = 4; |
890 | samples_per_frame = SAMPLES_PER_FRAME * avctx->channels; |
891 | delay = 0x88E; |
892 | q->coding_mode = SINGLE; |
893 | } else if (avctx->extradata_size == 14) { |
894 | /* Parse the extradata, WAV format */ |
895 | av_log(avctx, AV_LOG_DEBUG, "[0-1] %d\n", |
896 | bytestream_get_le16(&edata_ptr)); // Unknown value always 1 |
897 | edata_ptr += 4; // samples per channel |
898 | q->coding_mode = bytestream_get_le16(&edata_ptr); |
899 | av_log(avctx, AV_LOG_DEBUG,"[8-9] %d\n", |
900 | bytestream_get_le16(&edata_ptr)); //Dupe of coding mode |
901 | frame_factor = bytestream_get_le16(&edata_ptr); // Unknown always 1 |
902 | av_log(avctx, AV_LOG_DEBUG,"[12-13] %d\n", |
903 | bytestream_get_le16(&edata_ptr)); // Unknown always 0 |
904 | |
905 | /* setup */ |
906 | samples_per_frame = SAMPLES_PER_FRAME * avctx->channels; |
907 | version = 4; |
908 | delay = 0x88E; |
909 | q->coding_mode = q->coding_mode ? JOINT_STEREO : SINGLE; |
910 | q->scrambled_stream = 0; |
911 | |
912 | if (avctx->block_align != 96 * avctx->channels * frame_factor && |
913 | avctx->block_align != 152 * avctx->channels * frame_factor && |
914 | avctx->block_align != 192 * avctx->channels * frame_factor) { |
915 | av_log(avctx, AV_LOG_ERROR, "Unknown frame/channel/frame_factor " |
916 | "configuration %d/%d/%d\n", avctx->block_align, |
917 | avctx->channels, frame_factor); |
918 | return AVERROR_INVALIDDATA; |
919 | } |
920 | } else if (avctx->extradata_size == 12 || avctx->extradata_size == 10) { |
921 | /* Parse the extradata, RM format. */ |
922 | version = bytestream_get_be32(&edata_ptr); |
923 | samples_per_frame = bytestream_get_be16(&edata_ptr); |
924 | delay = bytestream_get_be16(&edata_ptr); |
925 | q->coding_mode = bytestream_get_be16(&edata_ptr); |
926 | q->scrambled_stream = 1; |
927 | |
928 | } else { |
929 | av_log(avctx, AV_LOG_ERROR, "Unknown extradata size %d.\n", |
930 | avctx->extradata_size); |
931 | return AVERROR(EINVAL); |
932 | } |
933 | |
934 | /* Check the extradata */ |
935 | |
936 | if (version != 4) { |
937 | av_log(avctx, AV_LOG_ERROR, "Version %d != 4.\n", version); |
938 | return AVERROR_INVALIDDATA; |
939 | } |
940 | |
941 | if (samples_per_frame != SAMPLES_PER_FRAME * avctx->channels) { |
942 | av_log(avctx, AV_LOG_ERROR, "Unknown amount of samples per frame %d.\n", |
943 | samples_per_frame); |
944 | return AVERROR_INVALIDDATA; |
945 | } |
946 | |
947 | if (delay != 0x88E) { |
948 | av_log(avctx, AV_LOG_ERROR, "Unknown amount of delay %x != 0x88E.\n", |
949 | delay); |
950 | return AVERROR_INVALIDDATA; |
951 | } |
952 | |
953 | if (q->coding_mode == SINGLE) |
954 | av_log(avctx, AV_LOG_DEBUG, "Single channels detected.\n"); |
955 | else if (q->coding_mode == JOINT_STEREO) { |
956 | if (avctx->channels % 2 == 1) { /* Joint stereo channels must be even */ |
957 | av_log(avctx, AV_LOG_ERROR, "Invalid joint stereo channel configuration.\n"); |
958 | return AVERROR_INVALIDDATA; |
959 | } |
960 | av_log(avctx, AV_LOG_DEBUG, "Joint stereo detected.\n"); |
961 | } else { |
962 | av_log(avctx, AV_LOG_ERROR, "Unknown channel coding mode %x!\n", |
963 | q->coding_mode); |
964 | return AVERROR_INVALIDDATA; |
965 | } |
966 | |
967 | if (avctx->block_align >= UINT_MAX / 2) |
968 | return AVERROR(EINVAL); |
969 | |
970 | q->decoded_bytes_buffer = av_mallocz(FFALIGN(avctx->block_align, 4) + |
971 | AV_INPUT_BUFFER_PADDING_SIZE); |
972 | if (!q->decoded_bytes_buffer) |
973 | return AVERROR(ENOMEM); |
974 | |
975 | avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; |
976 | |
977 | /* initialize the MDCT transform */ |
978 | if ((ret = ff_mdct_init(&q->mdct_ctx, 9, 1, 1.0 / 32768)) < 0) { |
979 | av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); |
980 | av_freep(&q->decoded_bytes_buffer); |
981 | return ret; |
982 | } |
983 | |
984 | /* init the joint-stereo decoding data */ |
985 | for (js_pair = 0; js_pair < MAX_JS_PAIRS; js_pair++) { |
986 | q->weighting_delay[js_pair][0] = 0; |
987 | q->weighting_delay[js_pair][1] = 7; |
988 | q->weighting_delay[js_pair][2] = 0; |
989 | q->weighting_delay[js_pair][3] = 7; |
990 | q->weighting_delay[js_pair][4] = 0; |
991 | q->weighting_delay[js_pair][5] = 7; |
992 | |
993 | for (i = 0; i < 4; i++) { |
994 | q->matrix_coeff_index_prev[js_pair][i] = 3; |
995 | q->matrix_coeff_index_now[js_pair][i] = 3; |
996 | q->matrix_coeff_index_next[js_pair][i] = 3; |
997 | } |
998 | } |
999 | |
1000 | ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3); |
1001 | q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); |
1002 | |
1003 | q->units = av_mallocz_array(avctx->channels, sizeof(*q->units)); |
1004 | if (!q->units || !q->fdsp) { |
1005 | atrac3_decode_close(avctx); |
1006 | return AVERROR(ENOMEM); |
1007 | } |
1008 | |
1009 | return 0; |
1010 | } |
1011 | |
1012 | AVCodec ff_atrac3_decoder = { |
1013 | .name = "atrac3", |
1014 | .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"), |
1015 | .type = AVMEDIA_TYPE_AUDIO, |
1016 | .id = AV_CODEC_ID_ATRAC3, |
1017 | .priv_data_size = sizeof(ATRAC3Context), |
1018 | .init = atrac3_decode_init, |
1019 | .close = atrac3_decode_close, |
1020 | .decode = atrac3_decode_frame, |
1021 | .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1, |
1022 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
1023 | AV_SAMPLE_FMT_NONE }, |
1024 | }; |
1025 | |
1026 | AVCodec ff_atrac3al_decoder = { |
1027 | .name = "atrac3al", |
1028 | .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 AL (Adaptive TRansform Acoustic Coding 3 Advanced Lossless)"), |
1029 | .type = AVMEDIA_TYPE_AUDIO, |
1030 | .id = AV_CODEC_ID_ATRAC3AL, |
1031 | .priv_data_size = sizeof(ATRAC3Context), |
1032 | .init = atrac3_decode_init, |
1033 | .close = atrac3_decode_close, |
1034 | .decode = atrac3al_decode_frame, |
1035 | .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1, |
1036 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
1037 | AV_SAMPLE_FMT_NONE }, |
1038 | }; |
1039 |