blob: 1a20f28d486fb0389681c7aad872348ecee9ecbe
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: imdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $ |
3 | * |
4 | * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. |
5 | * |
6 | * The contents of this file, and the files included with this file, |
7 | * are subject to the current version of the RealNetworks Public |
8 | * Source License (the "RPSL") available at |
9 | * http://www.helixcommunity.org/content/rpsl unless you have licensed |
10 | * the file under the current version of the RealNetworks Community |
11 | * Source License (the "RCSL") available at |
12 | * http://www.helixcommunity.org/content/rcsl, in which case the RCSL |
13 | * will apply. You may also obtain the license terms directly from |
14 | * RealNetworks. You may not use this file except in compliance with |
15 | * the RPSL or, if you have a valid RCSL with RealNetworks applicable |
16 | * to this file, the RCSL. Please see the applicable RPSL or RCSL for |
17 | * the rights, obligations and limitations governing use of the |
18 | * contents of the file. |
19 | * |
20 | * This file is part of the Helix DNA Technology. RealNetworks is the |
21 | * developer of the Original Code and owns the copyrights in the |
22 | * portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed |
25 | * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY |
26 | * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS |
27 | * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES |
28 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET |
29 | * ENJOYMENT OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * http://www.helixcommunity.org/content/tck |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point HE-AAC decoder |
40 | * Jon Recker (jrecker@real.com) |
41 | * February 2005 |
42 | * |
43 | * imdct.c - inverse MDCT |
44 | **************************************************************************************/ |
45 | |
46 | #include "coder.h" |
47 | #include "assembly.h" |
48 | #include "aacdec.h" |
49 | |
50 | #define RND_VAL (1 << (FBITS_OUT_IMDCT-1)) |
51 | |
52 | #ifndef AAC_ENABLE_SBR |
53 | |
54 | /************************************************************************************** |
55 | * Function: DecWindowOverlap |
56 | * |
57 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
58 | * for winSequence LONG-LONG |
59 | * |
60 | * Inputs: input buffer (output of type-IV DCT) |
61 | * overlap buffer (saved from last time) |
62 | * number of channels |
63 | * window type (sin or KBD) for input buffer |
64 | * window type (sin or KBD) for overlap buffer |
65 | * |
66 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
67 | * |
68 | * Return: none |
69 | * |
70 | * Notes: this processes one channel at a time, but skips every other sample in |
71 | * the output buffer (pcm) for stereo interleaving |
72 | * this should fit in registers on ARM |
73 | * |
74 | * TODO: ARM5E version with saturating overlap/add (QADD) |
75 | * asm code with free pointer updates, better load scheduling |
76 | **************************************************************************************/ |
77 | static void DecWindowOverlap(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
78 | { |
79 | int in, w0, w1, f0, f1; |
80 | int *buf1, *over1; |
81 | short *pcm1; |
82 | const int *wndPrev, *wndCurr; |
83 | |
84 | buf0 += (1024 >> 1); |
85 | buf1 = buf0 - 1; |
86 | pcm1 = pcm0 + (1024 - 1) * nChans; |
87 | over1 = over0 + 1024 - 1; |
88 | |
89 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
90 | if (winTypeCurr == winTypePrev) { |
91 | /* cut window loads in half since current and overlap sections use same symmetric window */ |
92 | do { |
93 | w0 = *wndPrev++; |
94 | w1 = *wndPrev++; |
95 | in = *buf0++; |
96 | |
97 | f0 = MULSHIFT32(w0, in); |
98 | f1 = MULSHIFT32(w1, in); |
99 | |
100 | in = *over0; |
101 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
102 | pcm0 += nChans; |
103 | |
104 | in = *over1; |
105 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
106 | pcm1 -= nChans; |
107 | |
108 | in = *buf1--; |
109 | *over1-- = MULSHIFT32(w0, in); |
110 | *over0++ = MULSHIFT32(w1, in); |
111 | } while (over0 < over1); |
112 | } else { |
113 | /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */ |
114 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
115 | do { |
116 | w0 = *wndPrev++; |
117 | w1 = *wndPrev++; |
118 | in = *buf0++; |
119 | |
120 | f0 = MULSHIFT32(w0, in); |
121 | f1 = MULSHIFT32(w1, in); |
122 | |
123 | in = *over0; |
124 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
125 | pcm0 += nChans; |
126 | |
127 | in = *over1; |
128 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
129 | pcm1 -= nChans; |
130 | |
131 | w0 = *wndCurr++; |
132 | w1 = *wndCurr++; |
133 | in = *buf1--; |
134 | |
135 | *over1-- = MULSHIFT32(w0, in); |
136 | *over0++ = MULSHIFT32(w1, in); |
137 | } while (over0 < over1); |
138 | } |
139 | } |
140 | |
141 | /************************************************************************************** |
142 | * Function: DecWindowOverlapLongStart |
143 | * |
144 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
145 | * for winSequence LONG-START |
146 | * |
147 | * Inputs: input buffer (output of type-IV DCT) |
148 | * overlap buffer (saved from last time) |
149 | * number of channels |
150 | * window type (sin or KBD) for input buffer |
151 | * window type (sin or KBD) for overlap buffer |
152 | * |
153 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
154 | * |
155 | * Return: none |
156 | * |
157 | * Notes: this processes one channel at a time, but skips every other sample in |
158 | * the output buffer (pcm) for stereo interleaving |
159 | * this should fit in registers on ARM |
160 | * |
161 | * TODO: ARM5E version with saturating overlap/add (QADD) |
162 | * asm code with free pointer updates, better load scheduling |
163 | **************************************************************************************/ |
164 | static void DecWindowOverlapLongStart(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
165 | { |
166 | int i, in, w0, w1, f0, f1; |
167 | int *buf1, *over1; |
168 | short *pcm1; |
169 | const int *wndPrev, *wndCurr; |
170 | |
171 | buf0 += (1024 >> 1); |
172 | buf1 = buf0 - 1; |
173 | pcm1 = pcm0 + (1024 - 1) * nChans; |
174 | over1 = over0 + 1024 - 1; |
175 | |
176 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
177 | i = 448; /* 2 outputs, 2 overlaps per loop */ |
178 | do { |
179 | w0 = *wndPrev++; |
180 | w1 = *wndPrev++; |
181 | in = *buf0++; |
182 | |
183 | f0 = MULSHIFT32(w0, in); |
184 | f1 = MULSHIFT32(w1, in); |
185 | |
186 | in = *over0; |
187 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
188 | pcm0 += nChans; |
189 | |
190 | in = *over1; |
191 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
192 | pcm1 -= nChans; |
193 | |
194 | in = *buf1--; |
195 | |
196 | *over1-- = 0; /* Wn = 0 for n = (2047, 2046, ... 1600) */ |
197 | *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */ |
198 | } while (--i); |
199 | |
200 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
201 | |
202 | /* do 64 more loops - 2 outputs, 2 overlaps per loop */ |
203 | do { |
204 | w0 = *wndPrev++; |
205 | w1 = *wndPrev++; |
206 | in = *buf0++; |
207 | |
208 | f0 = MULSHIFT32(w0, in); |
209 | f1 = MULSHIFT32(w1, in); |
210 | |
211 | in = *over0; |
212 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
213 | pcm0 += nChans; |
214 | |
215 | in = *over1; |
216 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
217 | pcm1 -= nChans; |
218 | |
219 | w0 = *wndCurr++; /* W[0], W[1], ... --> W[255], W[254], ... */ |
220 | w1 = *wndCurr++; /* W[127], W[126], ... --> W[128], W[129], ... */ |
221 | in = *buf1--; |
222 | |
223 | *over1-- = MULSHIFT32(w0, in); /* Wn = short window for n = (1599, 1598, ... , 1536) */ |
224 | *over0++ = MULSHIFT32(w1, in); /* Wn = short window for n = (1472, 1473, ... , 1535) */ |
225 | } while (over0 < over1); |
226 | } |
227 | |
228 | /************************************************************************************** |
229 | * Function: DecWindowOverlapLongStop |
230 | * |
231 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
232 | * for winSequence LONG-STOP |
233 | * |
234 | * Inputs: input buffer (output of type-IV DCT) |
235 | * overlap buffer (saved from last time) |
236 | * number of channels |
237 | * window type (sin or KBD) for input buffer |
238 | * window type (sin or KBD) for overlap buffer |
239 | * |
240 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
241 | * |
242 | * Return: none |
243 | * |
244 | * Notes: this processes one channel at a time, but skips every other sample in |
245 | * the output buffer (pcm) for stereo interleaving |
246 | * this should fit in registers on ARM |
247 | * |
248 | * TODO: ARM5E version with saturating overlap/add (QADD) |
249 | * asm code with free pointer updates, better load scheduling |
250 | **************************************************************************************/ |
251 | static void DecWindowOverlapLongStop(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
252 | { |
253 | int i, in, w0, w1, f0, f1; |
254 | int *buf1, *over1; |
255 | short *pcm1; |
256 | const int *wndPrev, *wndCurr; |
257 | |
258 | buf0 += (1024 >> 1); |
259 | buf1 = buf0 - 1; |
260 | pcm1 = pcm0 + (1024 - 1) * nChans; |
261 | over1 = over0 + 1024 - 1; |
262 | |
263 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
264 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
265 | |
266 | i = 448; /* 2 outputs, 2 overlaps per loop */ |
267 | do { |
268 | /* Wn = 0 for n = (0, 1, ... 447) */ |
269 | /* Wn = 1 for n = (576, 577, ... 1023) */ |
270 | in = *buf0++; |
271 | f1 = in >> 1; /* scale since skipping multiply by Q31 */ |
272 | |
273 | in = *over0; |
274 | *pcm0 = CLIPTOSHORT((in + RND_VAL) >> FBITS_OUT_IMDCT); |
275 | pcm0 += nChans; |
276 | |
277 | in = *over1; |
278 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
279 | pcm1 -= nChans; |
280 | |
281 | w0 = *wndCurr++; |
282 | w1 = *wndCurr++; |
283 | in = *buf1--; |
284 | |
285 | *over1-- = MULSHIFT32(w0, in); |
286 | *over0++ = MULSHIFT32(w1, in); |
287 | } while (--i); |
288 | |
289 | /* do 64 more loops - 2 outputs, 2 overlaps per loop */ |
290 | do { |
291 | w0 = *wndPrev++; /* W[0], W[1], ...W[63] */ |
292 | w1 = *wndPrev++; /* W[127], W[126], ... W[64] */ |
293 | in = *buf0++; |
294 | |
295 | f0 = MULSHIFT32(w0, in); |
296 | f1 = MULSHIFT32(w1, in); |
297 | |
298 | in = *over0; |
299 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
300 | pcm0 += nChans; |
301 | |
302 | in = *over1; |
303 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
304 | pcm1 -= nChans; |
305 | |
306 | w0 = *wndCurr++; |
307 | w1 = *wndCurr++; |
308 | in = *buf1--; |
309 | |
310 | *over1-- = MULSHIFT32(w0, in); |
311 | *over0++ = MULSHIFT32(w1, in); |
312 | } while (over0 < over1); |
313 | } |
314 | |
315 | /************************************************************************************** |
316 | * Function: DecWindowOverlapShort |
317 | * |
318 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
319 | * for winSequence EIGHT-SHORT (does all 8 short blocks) |
320 | * |
321 | * Inputs: input buffer (output of type-IV DCT) |
322 | * overlap buffer (saved from last time) |
323 | * number of channels |
324 | * window type (sin or KBD) for input buffer |
325 | * window type (sin or KBD) for overlap buffer |
326 | * |
327 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
328 | * |
329 | * Return: none |
330 | * |
331 | * Notes: this processes one channel at a time, but skips every other sample in |
332 | * the output buffer (pcm) for stereo interleaving |
333 | * this should fit in registers on ARM |
334 | * |
335 | * TODO: ARM5E version with saturating overlap/add (QADD) |
336 | * asm code with free pointer updates, better load scheduling |
337 | **************************************************************************************/ |
338 | static void DecWindowOverlapShort(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
339 | { |
340 | int i, in, w0, w1, f0, f1; |
341 | int *buf1, *over1; |
342 | short *pcm1; |
343 | const int *wndPrev, *wndCurr; |
344 | |
345 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
346 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
347 | |
348 | /* pcm[0-447] = 0 + overlap[0-447] */ |
349 | i = 448; |
350 | do { |
351 | f0 = *over0++; |
352 | f1 = *over0++; |
353 | *pcm0 = CLIPTOSHORT((f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
354 | pcm0 += nChans; |
355 | *pcm0 = CLIPTOSHORT((f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
356 | pcm0 += nChans; |
357 | i -= 2; |
358 | } while (i); |
359 | |
360 | /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */ |
361 | pcm1 = pcm0 + (128 - 1) * nChans; |
362 | over1 = over0 + 128 - 1; |
363 | buf0 += 64; |
364 | buf1 = buf0 - 1; |
365 | do { |
366 | w0 = *wndPrev++; /* W[0], W[1], ...W[63] */ |
367 | w1 = *wndPrev++; /* W[127], W[126], ... W[64] */ |
368 | in = *buf0++; |
369 | |
370 | f0 = MULSHIFT32(w0, in); |
371 | f1 = MULSHIFT32(w1, in); |
372 | |
373 | in = *over0; |
374 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
375 | pcm0 += nChans; |
376 | |
377 | in = *over1; |
378 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
379 | pcm1 -= nChans; |
380 | |
381 | w0 = *wndCurr++; |
382 | w1 = *wndCurr++; |
383 | in = *buf1--; |
384 | |
385 | /* save over0/over1 for next short block, in the slots just vacated */ |
386 | *over1-- = MULSHIFT32(w0, in); |
387 | *over0++ = MULSHIFT32(w1, in); |
388 | } while (over0 < over1); |
389 | |
390 | /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703] |
391 | * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831] |
392 | * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959] |
393 | */ |
394 | for (i = 0; i < 3; i++) { |
395 | pcm0 += 64 * nChans; |
396 | pcm1 = pcm0 + (128 - 1) * nChans; |
397 | over0 += 64; |
398 | over1 = over0 + 128 - 1; |
399 | buf0 += 64; |
400 | buf1 = buf0 - 1; |
401 | wndCurr -= 128; |
402 | |
403 | do { |
404 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
405 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
406 | in = *buf0++; |
407 | |
408 | f0 = MULSHIFT32(w0, in); |
409 | f1 = MULSHIFT32(w1, in); |
410 | |
411 | in = *(over0 - 128); /* from last short block */ |
412 | in += *(over0 + 0); /* from last full frame */ |
413 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
414 | pcm0 += nChans; |
415 | |
416 | in = *(over1 - 128); /* from last short block */ |
417 | in += *(over1 + 0); /* from last full frame */ |
418 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
419 | pcm1 -= nChans; |
420 | |
421 | /* save over0/over1 for next short block, in the slots just vacated */ |
422 | in = *buf1--; |
423 | *over1-- = MULSHIFT32(w0, in); |
424 | *over0++ = MULSHIFT32(w1, in); |
425 | } while (over0 < over1); |
426 | } |
427 | |
428 | /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023] |
429 | * over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127] |
430 | */ |
431 | pcm0 += 64 * nChans; |
432 | over0 -= 832; /* points at overlap[64] */ |
433 | over1 = over0 + 128 - 1; /* points at overlap[191] */ |
434 | buf0 += 64; |
435 | buf1 = buf0 - 1; |
436 | wndCurr -= 128; |
437 | do { |
438 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
439 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
440 | in = *buf0++; |
441 | |
442 | f0 = MULSHIFT32(w0, in); |
443 | f1 = MULSHIFT32(w1, in); |
444 | |
445 | in = *(over0 + 768); /* from last short block */ |
446 | in += *(over0 + 896); /* from last full frame */ |
447 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
448 | pcm0 += nChans; |
449 | |
450 | in = *(over1 + 768); /* from last short block */ |
451 | *(over1 - 128) = in + f1; |
452 | |
453 | in = *buf1--; |
454 | *over1-- = MULSHIFT32(w0, in); /* save in overlap[128-191] */ |
455 | *over0++ = MULSHIFT32(w1, in); /* save in overlap[64-127] */ |
456 | } while (over0 < over1); |
457 | |
458 | /* over0 now points at overlap[128] */ |
459 | |
460 | /* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127] |
461 | * over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127] |
462 | * over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127] |
463 | * over[448-576] = Wc[128-255] * block7[128-255] |
464 | */ |
465 | for (i = 0; i < 3; i++) { |
466 | over0 += 64; |
467 | over1 = over0 + 128 - 1; |
468 | buf0 += 64; |
469 | buf1 = buf0 - 1; |
470 | wndCurr -= 128; |
471 | do { |
472 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
473 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
474 | in = *buf0++; |
475 | |
476 | f0 = MULSHIFT32(w0, in); |
477 | f1 = MULSHIFT32(w1, in); |
478 | |
479 | /* from last short block */ |
480 | *(over0 - 128) -= f0; |
481 | *(over1 - 128) += f1; |
482 | |
483 | in = *buf1--; |
484 | *over1-- = MULSHIFT32(w0, in); |
485 | *over0++ = MULSHIFT32(w1, in); |
486 | } while (over0 < over1); |
487 | } |
488 | |
489 | /* over[576-1024] = 0 */ |
490 | i = 448; |
491 | over0 += 64; |
492 | do { |
493 | *over0++ = 0; |
494 | *over0++ = 0; |
495 | *over0++ = 0; |
496 | *over0++ = 0; |
497 | i -= 4; |
498 | } while (i); |
499 | } |
500 | |
501 | #endif /* !AAC_ENABLE_SBR */ |
502 | |
503 | /************************************************************************************** |
504 | * Function: IMDCT |
505 | * |
506 | * Description: inverse transform and convert to 16-bit PCM |
507 | * |
508 | * Inputs: valid AACDecInfo struct |
509 | * index of current channel (0 for SCE/LFE, 0 or 1 for CPE) |
510 | * output channel (range = [0, nChans-1]) |
511 | * |
512 | * Outputs: complete frame of decoded PCM, after inverse transform |
513 | * |
514 | * Return: 0 if successful, -1 if error |
515 | * |
516 | * Notes: If AAC_ENABLE_SBR is defined at compile time then window + overlap |
517 | * does NOT clip to 16-bit PCM and does NOT interleave channels |
518 | * If AAC_ENABLE_SBR is NOT defined at compile time, then window + overlap |
519 | * does clip to 16-bit PCM and interleaves channels |
520 | * If SBR is enabled at compile time, but we don't know whether it is |
521 | * actually used for this frame (e.g. the first frame of a stream), |
522 | * we need to produce both clipped 16-bit PCM in outbuf AND |
523 | * unclipped 32-bit PCM in the SBR input buffer. In this case we make |
524 | * a separate pass over the 32-bit PCM to produce 16-bit PCM output. |
525 | * This inflicts a slight performance hit when decoding non-SBR files. |
526 | **************************************************************************************/ |
527 | int IMDCT(AACDecInfo *aacDecInfo, int ch, int chOut, short *outbuf) |
528 | { |
529 | int i; |
530 | PSInfoBase *psi; |
531 | ICSInfo *icsInfo; |
532 | |
533 | /* validate pointers */ |
534 | if (!aacDecInfo || !aacDecInfo->psInfoBase) { |
535 | return -1; |
536 | } |
537 | psi = (PSInfoBase *)(aacDecInfo->psInfoBase); |
538 | icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]); |
539 | outbuf += chOut; |
540 | |
541 | /* optimized type-IV DCT (operates inplace) */ |
542 | if (icsInfo->winSequence == 2) { |
543 | /* 8 short blocks */ |
544 | for (i = 0; i < 8; i++) { |
545 | DCT4(0, psi->coef[ch] + i * 128, psi->gbCurrent[ch]); |
546 | } |
547 | } else { |
548 | /* 1 long block */ |
549 | DCT4(1, psi->coef[ch], psi->gbCurrent[ch]); |
550 | } |
551 | |
552 | #ifdef AAC_ENABLE_SBR |
553 | /* window, overlap-add, don't clip to short (send to SBR decoder) |
554 | * store the decoded 32-bit samples in top half (second AAC_MAX_NSAMPS samples) of coef buffer |
555 | */ |
556 | if (icsInfo->winSequence == 0) { |
557 | DecWindowOverlapNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
558 | } else if (icsInfo->winSequence == 1) { |
559 | DecWindowOverlapLongStartNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
560 | } else if (icsInfo->winSequence == 2) { |
561 | DecWindowOverlapShortNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
562 | } else if (icsInfo->winSequence == 3) { |
563 | DecWindowOverlapLongStopNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
564 | } |
565 | |
566 | if (!aacDecInfo->sbrEnabled) { |
567 | for (i = 0; i < AAC_MAX_NSAMPS; i++) { |
568 | *outbuf = CLIPTOSHORT((psi->sbrWorkBuf[ch][i] + RND_VAL) >> FBITS_OUT_IMDCT); |
569 | outbuf += aacDecInfo->nChans; |
570 | } |
571 | } |
572 | |
573 | aacDecInfo->rawSampleBuf[ch] = psi->sbrWorkBuf[ch]; |
574 | aacDecInfo->rawSampleBytes = sizeof(int); |
575 | aacDecInfo->rawSampleFBits = FBITS_OUT_IMDCT; |
576 | #else |
577 | /* window, overlap-add, round to PCM - optimized for each window sequence */ |
578 | if (icsInfo->winSequence == 0) { |
579 | DecWindowOverlap(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
580 | } else if (icsInfo->winSequence == 1) { |
581 | DecWindowOverlapLongStart(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
582 | } else if (icsInfo->winSequence == 2) { |
583 | DecWindowOverlapShort(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
584 | } else if (icsInfo->winSequence == 3) { |
585 | DecWindowOverlapLongStop(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
586 | } |
587 | |
588 | aacDecInfo->rawSampleBuf[ch] = 0; |
589 | aacDecInfo->rawSampleBytes = 0; |
590 | aacDecInfo->rawSampleFBits = 0; |
591 | #endif |
592 | |
593 | psi->prevWinShape[chOut] = icsInfo->winShape; |
594 | |
595 | return 0; |
596 | } |
597 |