platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: imdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * imdct.c - inverse MDCT
44  **************************************************************************************/
45
46 #include "coder.h"
47 #include "assembly.h"
48 #include "aacdec.h"
49
50 #define RND_VAL     (1 << (FBITS_OUT_IMDCT-1))
51
52 #ifndef AAC_ENABLE_SBR
53
54 /**************************************************************************************
55  * Function:    DecWindowOverlap
56  *
57  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
58  *                for winSequence LONG-LONG
59  *
60  * Inputs:      input buffer (output of type-IV DCT)
61  *              overlap buffer (saved from last time)
62  *              number of channels
63  *              window type (sin or KBD) for input buffer
64  *              window type (sin or KBD) for overlap buffer
65  *
66  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
67  *
68  * Return:      none
69  *
70  * Notes:       this processes one channel at a time, but skips every other sample in
71  *                the output buffer (pcm) for stereo interleaving
72  *              this should fit in registers on ARM
73  *
74  * TODO:        ARM5E version with saturating overlap/add (QADD)
75  *              asm code with free pointer updates, better load scheduling
76  **************************************************************************************/
77 static void DecWindowOverlap(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
78 {
79     int in, w0, w1, f0, f1;
80     int *buf1, *over1;
81     short *pcm1;
82     const int *wndPrev, *wndCurr;
83
84     buf0 += (1024 >> 1);
85     buf1  = buf0  - 1;
86     pcm1  = pcm0 + (1024 - 1) * nChans;
87     over1 = over0 + 1024 - 1;
88
89     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
90     if (winTypeCurr == winTypePrev) {
91         /* cut window loads in half since current and overlap sections use same symmetric window */
92         do {
93             w0 = *wndPrev++;
94             w1 = *wndPrev++;
95             in = *buf0++;
96
97             f0 = MULSHIFT32(w0, in);
98             f1 = MULSHIFT32(w1, in);
99
100             in = *over0;
101             *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
102             pcm0 += nChans;
103
104             in = *over1;
105             *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
106             pcm1 -= nChans;
107
108             in = *buf1--;
109             *over1-- = MULSHIFT32(w0, in);
110             *over0++ = MULSHIFT32(w1, in);
111         } while (over0 < over1);
112     } else {
113         /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */
114         wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
115         do {
116             w0 = *wndPrev++;
117             w1 = *wndPrev++;
118             in = *buf0++;
119
120             f0 = MULSHIFT32(w0, in);
121             f1 = MULSHIFT32(w1, in);
122
123             in = *over0;
124             *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
125             pcm0 += nChans;
126
127             in = *over1;
128             *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
129             pcm1 -= nChans;
130
131             w0 = *wndCurr++;
132             w1 = *wndCurr++;
133             in = *buf1--;
134
135             *over1-- = MULSHIFT32(w0, in);
136             *over0++ = MULSHIFT32(w1, in);
137         } while (over0 < over1);
138     }
139 }
140
141 /**************************************************************************************
142  * Function:    DecWindowOverlapLongStart
143  *
144  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
145  *                for winSequence LONG-START
146  *
147  * Inputs:      input buffer (output of type-IV DCT)
148  *              overlap buffer (saved from last time)
149  *              number of channels
150  *              window type (sin or KBD) for input buffer
151  *              window type (sin or KBD) for overlap buffer
152  *
153  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
154  *
155  * Return:      none
156  *
157  * Notes:       this processes one channel at a time, but skips every other sample in
158  *                the output buffer (pcm) for stereo interleaving
159  *              this should fit in registers on ARM
160  *
161  * TODO:        ARM5E version with saturating overlap/add (QADD)
162  *              asm code with free pointer updates, better load scheduling
163  **************************************************************************************/
164 static void DecWindowOverlapLongStart(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
165 {
166     int i,  in, w0, w1, f0, f1;
167     int *buf1, *over1;
168     short *pcm1;
169     const int *wndPrev, *wndCurr;
170
171     buf0 += (1024 >> 1);
172     buf1  = buf0  - 1;
173     pcm1  = pcm0 + (1024 - 1) * nChans;
174     over1 = over0 + 1024 - 1;
175
176     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
177     i = 448;    /* 2 outputs, 2 overlaps per loop */
178     do {
179         w0 = *wndPrev++;
180         w1 = *wndPrev++;
181         in = *buf0++;
182
183         f0 = MULSHIFT32(w0, in);
184         f1 = MULSHIFT32(w1, in);
185
186         in = *over0;
187         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
188         pcm0 += nChans;
189
190         in = *over1;
191         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
192         pcm1 -= nChans;
193
194         in = *buf1--;
195
196         *over1-- = 0;       /* Wn = 0 for n = (2047, 2046, ... 1600) */
197         *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */
198     } while (--i);
199
200     wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
201
202     /* do 64 more loops - 2 outputs, 2 overlaps per loop */
203     do {
204         w0 = *wndPrev++;
205         w1 = *wndPrev++;
206         in = *buf0++;
207
208         f0 = MULSHIFT32(w0, in);
209         f1 = MULSHIFT32(w1, in);
210
211         in = *over0;
212         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
213         pcm0 += nChans;
214
215         in = *over1;
216         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
217         pcm1 -= nChans;
218
219         w0 = *wndCurr++;    /* W[0], W[1], ... --> W[255], W[254], ... */
220         w1 = *wndCurr++;    /* W[127], W[126], ... --> W[128], W[129], ... */
221         in = *buf1--;
222
223         *over1-- = MULSHIFT32(w0, in);  /* Wn = short window for n = (1599, 1598, ... , 1536) */
224         *over0++ = MULSHIFT32(w1, in);  /* Wn = short window for n = (1472, 1473, ... , 1535) */
225     } while (over0 < over1);
226 }
227
228 /**************************************************************************************
229  * Function:    DecWindowOverlapLongStop
230  *
231  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
232  *                for winSequence LONG-STOP
233  *
234  * Inputs:      input buffer (output of type-IV DCT)
235  *              overlap buffer (saved from last time)
236  *              number of channels
237  *              window type (sin or KBD) for input buffer
238  *              window type (sin or KBD) for overlap buffer
239  *
240  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
241  *
242  * Return:      none
243  *
244  * Notes:       this processes one channel at a time, but skips every other sample in
245  *                the output buffer (pcm) for stereo interleaving
246  *              this should fit in registers on ARM
247  *
248  * TODO:        ARM5E version with saturating overlap/add (QADD)
249  *              asm code with free pointer updates, better load scheduling
250  **************************************************************************************/
251 static void DecWindowOverlapLongStop(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
252 {
253     int i, in, w0, w1, f0, f1;
254     int *buf1, *over1;
255     short *pcm1;
256     const int *wndPrev, *wndCurr;
257
258     buf0 += (1024 >> 1);
259     buf1  = buf0  - 1;
260     pcm1  = pcm0 + (1024 - 1) * nChans;
261     over1 = over0 + 1024 - 1;
262
263     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
264     wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
265
266     i = 448;    /* 2 outputs, 2 overlaps per loop */
267     do {
268         /* Wn = 0 for n = (0, 1, ... 447) */
269         /* Wn = 1 for n = (576, 577, ... 1023) */
270         in = *buf0++;
271         f1 = in >> 1;   /* scale since skipping multiply by Q31 */
272
273         in = *over0;
274         *pcm0 = CLIPTOSHORT((in + RND_VAL) >> FBITS_OUT_IMDCT);
275         pcm0 += nChans;
276
277         in = *over1;
278         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
279         pcm1 -= nChans;
280
281         w0 = *wndCurr++;
282         w1 = *wndCurr++;
283         in = *buf1--;
284
285         *over1-- = MULSHIFT32(w0, in);
286         *over0++ = MULSHIFT32(w1, in);
287     } while (--i);
288
289     /* do 64 more loops - 2 outputs, 2 overlaps per loop */
290     do {
291         w0 = *wndPrev++;    /* W[0], W[1], ...W[63] */
292         w1 = *wndPrev++;    /* W[127], W[126], ... W[64] */
293         in = *buf0++;
294
295         f0 = MULSHIFT32(w0, in);
296         f1 = MULSHIFT32(w1, in);
297
298         in = *over0;
299         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
300         pcm0 += nChans;
301
302         in = *over1;
303         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
304         pcm1 -= nChans;
305
306         w0 = *wndCurr++;
307         w1 = *wndCurr++;
308         in = *buf1--;
309
310         *over1-- = MULSHIFT32(w0, in);
311         *over0++ = MULSHIFT32(w1, in);
312     } while (over0 < over1);
313 }
314
315 /**************************************************************************************
316  * Function:    DecWindowOverlapShort
317  *
318  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
319  *                for winSequence EIGHT-SHORT (does all 8 short blocks)
320  *
321  * Inputs:      input buffer (output of type-IV DCT)
322  *              overlap buffer (saved from last time)
323  *              number of channels
324  *              window type (sin or KBD) for input buffer
325  *              window type (sin or KBD) for overlap buffer
326  *
327  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
328  *
329  * Return:      none
330  *
331  * Notes:       this processes one channel at a time, but skips every other sample in
332  *                the output buffer (pcm) for stereo interleaving
333  *              this should fit in registers on ARM
334  *
335  * TODO:        ARM5E version with saturating overlap/add (QADD)
336  *              asm code with free pointer updates, better load scheduling
337  **************************************************************************************/
338 static void DecWindowOverlapShort(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
339 {
340     int i, in, w0, w1, f0, f1;
341     int *buf1, *over1;
342     short *pcm1;
343     const int *wndPrev, *wndCurr;
344
345     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
346     wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
347
348     /* pcm[0-447] = 0 + overlap[0-447] */
349     i = 448;
350     do {
351         f0 = *over0++;
352         f1 = *over0++;
353         *pcm0 = CLIPTOSHORT((f0 + RND_VAL) >> FBITS_OUT_IMDCT);
354         pcm0 += nChans;
355         *pcm0 = CLIPTOSHORT((f1 + RND_VAL) >> FBITS_OUT_IMDCT);
356         pcm0 += nChans;
357         i -= 2;
358     } while (i);
359
360     /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */
361     pcm1  = pcm0 + (128 - 1) * nChans;
362     over1 = over0 + 128 - 1;
363     buf0 += 64;
364     buf1  = buf0  - 1;
365     do {
366         w0 = *wndPrev++;    /* W[0], W[1], ...W[63] */
367         w1 = *wndPrev++;    /* W[127], W[126], ... W[64] */
368         in = *buf0++;
369
370         f0 = MULSHIFT32(w0, in);
371         f1 = MULSHIFT32(w1, in);
372
373         in = *over0;
374         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
375         pcm0 += nChans;
376
377         in = *over1;
378         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
379         pcm1 -= nChans;
380
381         w0 = *wndCurr++;
382         w1 = *wndCurr++;
383         in = *buf1--;
384
385         /* save over0/over1 for next short block, in the slots just vacated */
386         *over1-- = MULSHIFT32(w0, in);
387         *over0++ = MULSHIFT32(w1, in);
388     } while (over0 < over1);
389
390     /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703]
391      * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831]
392      * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959]
393      */
394     for (i = 0; i < 3; i++) {
395         pcm0 += 64 * nChans;
396         pcm1 = pcm0 + (128 - 1) * nChans;
397         over0 += 64;
398         over1 = over0 + 128 - 1;
399         buf0 += 64;
400         buf1 = buf0 - 1;
401         wndCurr -= 128;
402
403         do {
404             w0 = *wndCurr++;    /* W[0], W[1], ...W[63] */
405             w1 = *wndCurr++;    /* W[127], W[126], ... W[64] */
406             in = *buf0++;
407
408             f0 = MULSHIFT32(w0, in);
409             f1 = MULSHIFT32(w1, in);
410
411             in  = *(over0 - 128);   /* from last short block */
412             in += *(over0 + 0);     /* from last full frame */
413             *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
414             pcm0 += nChans;
415
416             in  = *(over1 - 128);   /* from last short block */
417             in += *(over1 + 0);     /* from last full frame */
418             *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
419             pcm1 -= nChans;
420
421             /* save over0/over1 for next short block, in the slots just vacated */
422             in = *buf1--;
423             *over1-- = MULSHIFT32(w0, in);
424             *over0++ = MULSHIFT32(w1, in);
425         } while (over0 < over1);
426     }
427
428     /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63]   * block4[0-63] + overlap[960-1023]
429      * over[0-63]    = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127]
430      */
431     pcm0 += 64 * nChans;
432     over0 -= 832;               /* points at overlap[64] */
433     over1 = over0 + 128 - 1;    /* points at overlap[191] */
434     buf0 += 64;
435     buf1 = buf0 - 1;
436     wndCurr -= 128;
437     do {
438         w0 = *wndCurr++;    /* W[0], W[1], ...W[63] */
439         w1 = *wndCurr++;    /* W[127], W[126], ... W[64] */
440         in = *buf0++;
441
442         f0 = MULSHIFT32(w0, in);
443         f1 = MULSHIFT32(w1, in);
444
445         in  = *(over0 + 768);   /* from last short block */
446         in += *(over0 + 896);   /* from last full frame */
447         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
448         pcm0 += nChans;
449
450         in  = *(over1 + 768);   /* from last short block */
451         *(over1 - 128) = in + f1;
452
453         in = *buf1--;
454         *over1-- = MULSHIFT32(w0, in);  /* save in overlap[128-191] */
455         *over0++ = MULSHIFT32(w1, in);  /* save in overlap[64-127] */
456     } while (over0 < over1);
457
458     /* over0 now points at overlap[128] */
459
460     /* over[64-191]   = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127]
461      * over[192-319]  = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127]
462      * over[320-447]  = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127]
463      * over[448-576]  = Wc[128-255] * block7[128-255]
464      */
465     for (i = 0; i < 3; i++) {
466         over0 += 64;
467         over1 = over0 + 128 - 1;
468         buf0 += 64;
469         buf1 = buf0 - 1;
470         wndCurr -= 128;
471         do {
472             w0 = *wndCurr++;    /* W[0], W[1], ...W[63] */
473             w1 = *wndCurr++;    /* W[127], W[126], ... W[64] */
474             in = *buf0++;
475
476             f0 = MULSHIFT32(w0, in);
477             f1 = MULSHIFT32(w1, in);
478
479             /* from last short block */
480             *(over0 - 128) -= f0;
481             *(over1 - 128) += f1;
482
483             in = *buf1--;
484             *over1-- = MULSHIFT32(w0, in);
485             *over0++ = MULSHIFT32(w1, in);
486         } while (over0 < over1);
487     }
488
489     /* over[576-1024] = 0 */
490     i = 448;
491     over0 += 64;
492     do {
493         *over0++ = 0;
494         *over0++ = 0;
495         *over0++ = 0;
496         *over0++ = 0;
497         i -= 4;
498     } while (i);
499 }
500
501 #endif  /* !AAC_ENABLE_SBR */
502
503 /**************************************************************************************
504  * Function:    IMDCT
505  *
506  * Description: inverse transform and convert to 16-bit PCM
507  *
508  * Inputs:      valid AACDecInfo struct
509  *              index of current channel (0 for SCE/LFE, 0 or 1 for CPE)
510  *              output channel (range = [0, nChans-1])
511  *
512  * Outputs:     complete frame of decoded PCM, after inverse transform
513  *
514  * Return:      0 if successful, -1 if error
515  *
516  * Notes:       If AAC_ENABLE_SBR is defined at compile time then window + overlap
517  *                does NOT clip to 16-bit PCM and does NOT interleave channels
518  *              If AAC_ENABLE_SBR is NOT defined at compile time, then window + overlap
519  *                does clip to 16-bit PCM and interleaves channels
520  *              If SBR is enabled at compile time, but we don't know whether it is
521  *                actually used for this frame (e.g. the first frame of a stream),
522  *                we need to produce both clipped 16-bit PCM in outbuf AND
523  *                unclipped 32-bit PCM in the SBR input buffer. In this case we make
524  *                a separate pass over the 32-bit PCM to produce 16-bit PCM output.
525  *                This inflicts a slight performance hit when decoding non-SBR files.
526  **************************************************************************************/
527 int IMDCT(AACDecInfo *aacDecInfo, int ch, int chOut, short *outbuf)
528 {
529     int i;
530     PSInfoBase *psi;
531     ICSInfo *icsInfo;
532
533     /* validate pointers */
534     if (!aacDecInfo || !aacDecInfo->psInfoBase) {
535         return -1;
536     }
537     psi = (PSInfoBase *)(aacDecInfo->psInfoBase);
538     icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]);
539     outbuf += chOut;
540
541     /* optimized type-IV DCT (operates inplace) */
542     if (icsInfo->winSequence == 2) {
543         /* 8 short blocks */
544         for (i = 0; i < 8; i++) {
545             DCT4(0, psi->coef[ch] + i * 128, psi->gbCurrent[ch]);
546         }
547     } else {
548         /* 1 long block */
549         DCT4(1, psi->coef[ch], psi->gbCurrent[ch]);
550     }
551
552 #ifdef AAC_ENABLE_SBR
553     /* window, overlap-add, don't clip to short (send to SBR decoder)
554      * store the decoded 32-bit samples in top half (second AAC_MAX_NSAMPS samples) of coef buffer
555      */
556     if (icsInfo->winSequence == 0) {
557         DecWindowOverlapNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
558     } else if (icsInfo->winSequence == 1) {
559         DecWindowOverlapLongStartNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
560     } else if (icsInfo->winSequence == 2) {
561         DecWindowOverlapShortNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
562     } else if (icsInfo->winSequence == 3) {
563         DecWindowOverlapLongStopNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
564     }
565
566     if (!aacDecInfo->sbrEnabled) {
567         for (i = 0; i < AAC_MAX_NSAMPS; i++) {
568             *outbuf = CLIPTOSHORT((psi->sbrWorkBuf[ch][i] + RND_VAL) >> FBITS_OUT_IMDCT);
569             outbuf += aacDecInfo->nChans;
570         }
571     }
572
573     aacDecInfo->rawSampleBuf[ch] = psi->sbrWorkBuf[ch];
574     aacDecInfo->rawSampleBytes = sizeof(int);
575     aacDecInfo->rawSampleFBits = FBITS_OUT_IMDCT;
576 #else
577     /* window, overlap-add, round to PCM - optimized for each window sequence */
578     if (icsInfo->winSequence == 0) {
579         DecWindowOverlap(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
580     } else if (icsInfo->winSequence == 1) {
581         DecWindowOverlapLongStart(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
582     } else if (icsInfo->winSequence == 2) {
583         DecWindowOverlapShort(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
584     } else if (icsInfo->winSequence == 3) {
585         DecWindowOverlapLongStop(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
586     }
587
588     aacDecInfo->rawSampleBuf[ch] = 0;
589     aacDecInfo->rawSampleBytes = 0;
590     aacDecInfo->rawSampleFBits = 0;
591 #endif
592
593     psi->prevWinShape[chOut] = icsInfo->winShape;
594
595     return 0;
596 }
597
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: imdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $
3	*
4	* Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5	*
6	* The contents of this file, and the files included with this file,
7	* are subject to the current version of the RealNetworks Public
8	* Source License (the "RPSL") available at
9	* http://www.helixcommunity.org/content/rpsl unless you have licensed
10	* the file under the current version of the RealNetworks Community
11	* Source License (the "RCSL") available at
12	* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13	* will apply. You may also obtain the license terms directly from
14	* RealNetworks. You may not use this file except in compliance with
15	* the RPSL or, if you have a valid RCSL with RealNetworks applicable
16	* to this file, the RCSL. Please see the applicable RPSL or RCSL for
17	* the rights, obligations and limitations governing use of the
18	* contents of the file.
19	*
20	* This file is part of the Helix DNA Technology. RealNetworks is the
21	* developer of the Original Code and owns the copyrights in the
22	* portions it created.
23	*
24	* This file, and the files included with this file, is distributed
25	* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26	* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27	* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29	* ENJOYMENT OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* http://www.helixcommunity.org/content/tck
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point HE-AAC decoder
40	* Jon Recker (jrecker@real.com)
41	* February 2005
42	*
43	* imdct.c - inverse MDCT
44	**************************************************************************************/
45
46	#include "coder.h"
47	#include "assembly.h"
48	#include "aacdec.h"
49
50	#define RND_VAL (1 << (FBITS_OUT_IMDCT-1))
51
52	#ifndef AAC_ENABLE_SBR
53
54	/**************************************************************************************
55	* Function: DecWindowOverlap
56	*
57	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
58	* for winSequence LONG-LONG
59	*
60	* Inputs: input buffer (output of type-IV DCT)
61	* overlap buffer (saved from last time)
62	* number of channels
63	* window type (sin or KBD) for input buffer
64	* window type (sin or KBD) for overlap buffer
65	*
66	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
67	*
68	* Return: none
69	*
70	* Notes: this processes one channel at a time, but skips every other sample in
71	* the output buffer (pcm) for stereo interleaving
72	* this should fit in registers on ARM
73	*
74	* TODO: ARM5E version with saturating overlap/add (QADD)
75	* asm code with free pointer updates, better load scheduling
76	**************************************************************************************/
77	static void DecWindowOverlap(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
78	{
79	int in, w0, w1, f0, f1;
80	int buf1, over1;
81	short *pcm1;
82	const int wndPrev, wndCurr;
83
84	buf0 += (1024 >> 1);
85	buf1 = buf0 - 1;
86	pcm1 = pcm0 + (1024 - 1) * nChans;
87	over1 = over0 + 1024 - 1;
88
89	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
90	if (winTypeCurr == winTypePrev) {
91	/* cut window loads in half since current and overlap sections use same symmetric window */
92	do {
93	w0 = *wndPrev++;
94	w1 = *wndPrev++;
95	in = *buf0++;
96
97	f0 = MULSHIFT32(w0, in);
98	f1 = MULSHIFT32(w1, in);
99
100	in = *over0;
101	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
102	pcm0 += nChans;
103
104	in = *over1;
105	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
106	pcm1 -= nChans;
107
108	in = *buf1--;
109	*over1-- = MULSHIFT32(w0, in);
110	*over0++ = MULSHIFT32(w1, in);
111	} while (over0 < over1);
112	} else {
113	/* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */
114	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
115	do {
116	w0 = *wndPrev++;
117	w1 = *wndPrev++;
118	in = *buf0++;
119
120	f0 = MULSHIFT32(w0, in);
121	f1 = MULSHIFT32(w1, in);
122
123	in = *over0;
124	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
125	pcm0 += nChans;
126
127	in = *over1;
128	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
129	pcm1 -= nChans;
130
131	w0 = *wndCurr++;
132	w1 = *wndCurr++;
133	in = *buf1--;
134
135	*over1-- = MULSHIFT32(w0, in);
136	*over0++ = MULSHIFT32(w1, in);
137	} while (over0 < over1);
138	}
139	}
140
141	/**************************************************************************************
142	* Function: DecWindowOverlapLongStart
143	*
144	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
145	* for winSequence LONG-START
146	*
147	* Inputs: input buffer (output of type-IV DCT)
148	* overlap buffer (saved from last time)
149	* number of channels
150	* window type (sin or KBD) for input buffer
151	* window type (sin or KBD) for overlap buffer
152	*
153	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
154	*
155	* Return: none
156	*
157	* Notes: this processes one channel at a time, but skips every other sample in
158	* the output buffer (pcm) for stereo interleaving
159	* this should fit in registers on ARM
160	*
161	* TODO: ARM5E version with saturating overlap/add (QADD)
162	* asm code with free pointer updates, better load scheduling
163	**************************************************************************************/
164	static void DecWindowOverlapLongStart(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
165	{
166	int i, in, w0, w1, f0, f1;
167	int buf1, over1;
168	short *pcm1;
169	const int wndPrev, wndCurr;
170
171	buf0 += (1024 >> 1);
172	buf1 = buf0 - 1;
173	pcm1 = pcm0 + (1024 - 1) * nChans;
174	over1 = over0 + 1024 - 1;
175
176	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
177	i = 448; /* 2 outputs, 2 overlaps per loop */
178	do {
179	w0 = *wndPrev++;
180	w1 = *wndPrev++;
181	in = *buf0++;
182
183	f0 = MULSHIFT32(w0, in);
184	f1 = MULSHIFT32(w1, in);
185
186	in = *over0;
187	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
188	pcm0 += nChans;
189
190	in = *over1;
191	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
192	pcm1 -= nChans;
193
194	in = *buf1--;
195
196	over1-- = 0; / Wn = 0 for n = (2047, 2046, ... 1600) */
197	over0++ = in >> 1; / Wn = 1 for n = (1024, 1025, ... 1471) */
198	} while (--i);
199
200	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
201
202	/* do 64 more loops - 2 outputs, 2 overlaps per loop */
203	do {
204	w0 = *wndPrev++;
205	w1 = *wndPrev++;
206	in = *buf0++;
207
208	f0 = MULSHIFT32(w0, in);
209	f1 = MULSHIFT32(w1, in);
210
211	in = *over0;
212	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
213	pcm0 += nChans;
214
215	in = *over1;
216	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
217	pcm1 -= nChans;
218
219	w0 = wndCurr++; / W[0], W[1], ... --> W[255], W[254], ... */
220	w1 = wndCurr++; / W[127], W[126], ... --> W[128], W[129], ... */
221	in = *buf1--;
222
223	over1-- = MULSHIFT32(w0, in); / Wn = short window for n = (1599, 1598, ... , 1536) */
224	over0++ = MULSHIFT32(w1, in); / Wn = short window for n = (1472, 1473, ... , 1535) */
225	} while (over0 < over1);
226	}
227
228	/**************************************************************************************
229	* Function: DecWindowOverlapLongStop
230	*
231	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
232	* for winSequence LONG-STOP
233	*
234	* Inputs: input buffer (output of type-IV DCT)
235	* overlap buffer (saved from last time)
236	* number of channels
237	* window type (sin or KBD) for input buffer
238	* window type (sin or KBD) for overlap buffer
239	*
240	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
241	*
242	* Return: none
243	*
244	* Notes: this processes one channel at a time, but skips every other sample in
245	* the output buffer (pcm) for stereo interleaving
246	* this should fit in registers on ARM
247	*
248	* TODO: ARM5E version with saturating overlap/add (QADD)
249	* asm code with free pointer updates, better load scheduling
250	**************************************************************************************/
251	static void DecWindowOverlapLongStop(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
252	{
253	int i, in, w0, w1, f0, f1;
254	int buf1, over1;
255	short *pcm1;
256	const int wndPrev, wndCurr;
257
258	buf0 += (1024 >> 1);
259	buf1 = buf0 - 1;
260	pcm1 = pcm0 + (1024 - 1) * nChans;
261	over1 = over0 + 1024 - 1;
262
263	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
264	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
265
266	i = 448; /* 2 outputs, 2 overlaps per loop */
267	do {
268	/* Wn = 0 for n = (0, 1, ... 447) */
269	/* Wn = 1 for n = (576, 577, ... 1023) */
270	in = *buf0++;
271	f1 = in >> 1; /* scale since skipping multiply by Q31 */
272
273	in = *over0;
274	*pcm0 = CLIPTOSHORT((in + RND_VAL) >> FBITS_OUT_IMDCT);
275	pcm0 += nChans;
276
277	in = *over1;
278	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
279	pcm1 -= nChans;
280
281	w0 = *wndCurr++;
282	w1 = *wndCurr++;
283	in = *buf1--;
284
285	*over1-- = MULSHIFT32(w0, in);
286	*over0++ = MULSHIFT32(w1, in);
287	} while (--i);
288
289	/* do 64 more loops - 2 outputs, 2 overlaps per loop */
290	do {
291	w0 = wndPrev++; / W[0], W[1], ...W[63] */
292	w1 = wndPrev++; / W[127], W[126], ... W[64] */
293	in = *buf0++;
294
295	f0 = MULSHIFT32(w0, in);
296	f1 = MULSHIFT32(w1, in);
297
298	in = *over0;
299	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
300	pcm0 += nChans;
301
302	in = *over1;
303	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
304	pcm1 -= nChans;
305
306	w0 = *wndCurr++;
307	w1 = *wndCurr++;
308	in = *buf1--;
309
310	*over1-- = MULSHIFT32(w0, in);
311	*over0++ = MULSHIFT32(w1, in);
312	} while (over0 < over1);
313	}
314
315	/**************************************************************************************
316	* Function: DecWindowOverlapShort
317	*
318	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
319	* for winSequence EIGHT-SHORT (does all 8 short blocks)
320	*
321	* Inputs: input buffer (output of type-IV DCT)
322	* overlap buffer (saved from last time)
323	* number of channels
324	* window type (sin or KBD) for input buffer
325	* window type (sin or KBD) for overlap buffer
326	*
327	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
328	*
329	* Return: none
330	*
331	* Notes: this processes one channel at a time, but skips every other sample in
332	* the output buffer (pcm) for stereo interleaving
333	* this should fit in registers on ARM
334	*
335	* TODO: ARM5E version with saturating overlap/add (QADD)
336	* asm code with free pointer updates, better load scheduling
337	**************************************************************************************/
338	static void DecWindowOverlapShort(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
339	{
340	int i, in, w0, w1, f0, f1;
341	int buf1, over1;
342	short *pcm1;
343	const int wndPrev, wndCurr;
344
345	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
346	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
347
348	/* pcm[0-447] = 0 + overlap[0-447] */
349	i = 448;
350	do {
351	f0 = *over0++;
352	f1 = *over0++;
353	*pcm0 = CLIPTOSHORT((f0 + RND_VAL) >> FBITS_OUT_IMDCT);
354	pcm0 += nChans;
355	*pcm0 = CLIPTOSHORT((f1 + RND_VAL) >> FBITS_OUT_IMDCT);
356	pcm0 += nChans;
357	i -= 2;
358	} while (i);
359
360	/* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */
361	pcm1 = pcm0 + (128 - 1) * nChans;
362	over1 = over0 + 128 - 1;
363	buf0 += 64;
364	buf1 = buf0 - 1;
365	do {
366	w0 = wndPrev++; / W[0], W[1], ...W[63] */
367	w1 = wndPrev++; / W[127], W[126], ... W[64] */
368	in = *buf0++;
369
370	f0 = MULSHIFT32(w0, in);
371	f1 = MULSHIFT32(w1, in);
372
373	in = *over0;
374	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
375	pcm0 += nChans;
376
377	in = *over1;
378	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
379	pcm1 -= nChans;
380
381	w0 = *wndCurr++;
382	w1 = *wndCurr++;
383	in = *buf1--;
384
385	/* save over0/over1 for next short block, in the slots just vacated */
386	*over1-- = MULSHIFT32(w0, in);
387	*over0++ = MULSHIFT32(w1, in);
388	} while (over0 < over1);
389
390	/* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703]
391	* pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831]
392	* pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959]
393	*/
394	for (i = 0; i < 3; i++) {
395	pcm0 += 64 * nChans;
396	pcm1 = pcm0 + (128 - 1) * nChans;
397	over0 += 64;
398	over1 = over0 + 128 - 1;
399	buf0 += 64;
400	buf1 = buf0 - 1;
401	wndCurr -= 128;
402
403	do {
404	w0 = wndCurr++; / W[0], W[1], ...W[63] */
405	w1 = wndCurr++; / W[127], W[126], ... W[64] */
406	in = *buf0++;
407
408	f0 = MULSHIFT32(w0, in);
409	f1 = MULSHIFT32(w1, in);
410
411	in = (over0 - 128); / from last short block */
412	in += (over0 + 0); / from last full frame */
413	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
414	pcm0 += nChans;
415
416	in = (over1 - 128); / from last short block */
417	in += (over1 + 0); / from last full frame */
418	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
419	pcm1 -= nChans;
420
421	/* save over0/over1 for next short block, in the slots just vacated */
422	in = *buf1--;
423	*over1-- = MULSHIFT32(w0, in);
424	*over0++ = MULSHIFT32(w1, in);
425	} while (over0 < over1);
426	}
427
428	/* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023]
429	* over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127]
430	*/
431	pcm0 += 64 * nChans;
432	over0 -= 832; /* points at overlap[64] */
433	over1 = over0 + 128 - 1; /* points at overlap[191] */
434	buf0 += 64;
435	buf1 = buf0 - 1;
436	wndCurr -= 128;
437	do {
438	w0 = wndCurr++; / W[0], W[1], ...W[63] */
439	w1 = wndCurr++; / W[127], W[126], ... W[64] */
440	in = *buf0++;
441
442	f0 = MULSHIFT32(w0, in);
443	f1 = MULSHIFT32(w1, in);
444
445	in = (over0 + 768); / from last short block */
446	in += (over0 + 896); / from last full frame */
447	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
448	pcm0 += nChans;
449
450	in = (over1 + 768); / from last short block */
451	*(over1 - 128) = in + f1;
452
453	in = *buf1--;
454	over1-- = MULSHIFT32(w0, in); / save in overlap[128-191] */
455	over0++ = MULSHIFT32(w1, in); / save in overlap[64-127] */
456	} while (over0 < over1);
457
458	/* over0 now points at overlap[128] */
459
460	/* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127]
461	* over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127]
462	* over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127]
463	* over[448-576] = Wc[128-255] * block7[128-255]
464	*/
465	for (i = 0; i < 3; i++) {
466	over0 += 64;
467	over1 = over0 + 128 - 1;
468	buf0 += 64;
469	buf1 = buf0 - 1;
470	wndCurr -= 128;
471	do {
472	w0 = wndCurr++; / W[0], W[1], ...W[63] */
473	w1 = wndCurr++; / W[127], W[126], ... W[64] */
474	in = *buf0++;
475
476	f0 = MULSHIFT32(w0, in);
477	f1 = MULSHIFT32(w1, in);
478
479	/* from last short block */
480	*(over0 - 128) -= f0;
481	*(over1 - 128) += f1;
482
483	in = *buf1--;
484	*over1-- = MULSHIFT32(w0, in);
485	*over0++ = MULSHIFT32(w1, in);
486	} while (over0 < over1);
487	}
488
489	/* over[576-1024] = 0 */
490	i = 448;
491	over0 += 64;
492	do {
493	*over0++ = 0;
494	*over0++ = 0;
495	*over0++ = 0;
496	*over0++ = 0;
497	i -= 4;
498	} while (i);
499	}
500
501	#endif /* !AAC_ENABLE_SBR */
502
503	/**************************************************************************************
504	* Function: IMDCT
505	*
506	* Description: inverse transform and convert to 16-bit PCM
507	*
508	* Inputs: valid AACDecInfo struct
509	* index of current channel (0 for SCE/LFE, 0 or 1 for CPE)
510	* output channel (range = [0, nChans-1])
511	*
512	* Outputs: complete frame of decoded PCM, after inverse transform
513	*
514	* Return: 0 if successful, -1 if error
515	*
516	* Notes: If AAC_ENABLE_SBR is defined at compile time then window + overlap
517	* does NOT clip to 16-bit PCM and does NOT interleave channels
518	* If AAC_ENABLE_SBR is NOT defined at compile time, then window + overlap
519	* does clip to 16-bit PCM and interleaves channels
520	* If SBR is enabled at compile time, but we don't know whether it is
521	* actually used for this frame (e.g. the first frame of a stream),
522	* we need to produce both clipped 16-bit PCM in outbuf AND
523	* unclipped 32-bit PCM in the SBR input buffer. In this case we make
524	* a separate pass over the 32-bit PCM to produce 16-bit PCM output.
525	* This inflicts a slight performance hit when decoding non-SBR files.
526	**************************************************************************************/
527	int IMDCT(AACDecInfo aacDecInfo, int ch, int chOut, short outbuf)
528	{
529	int i;
530	PSInfoBase *psi;
531	ICSInfo *icsInfo;
532
533	/* validate pointers */
534	if (!aacDecInfo \|\| !aacDecInfo->psInfoBase) {
535	return -1;
536	}
537	psi = (PSInfoBase *)(aacDecInfo->psInfoBase);
538	icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]);
539	outbuf += chOut;
540
541	/* optimized type-IV DCT (operates inplace) */
542	if (icsInfo->winSequence == 2) {
543	/* 8 short blocks */
544	for (i = 0; i < 8; i++) {
545	DCT4(0, psi->coef[ch] + i * 128, psi->gbCurrent[ch]);
546	}
547	} else {
548	/* 1 long block */
549	DCT4(1, psi->coef[ch], psi->gbCurrent[ch]);
550	}
551
552	#ifdef AAC_ENABLE_SBR
553	/* window, overlap-add, don't clip to short (send to SBR decoder)
554	* store the decoded 32-bit samples in top half (second AAC_MAX_NSAMPS samples) of coef buffer
555	*/
556	if (icsInfo->winSequence == 0) {
557	DecWindowOverlapNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
558	} else if (icsInfo->winSequence == 1) {
559	DecWindowOverlapLongStartNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
560	} else if (icsInfo->winSequence == 2) {
561	DecWindowOverlapShortNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
562	} else if (icsInfo->winSequence == 3) {
563	DecWindowOverlapLongStopNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
564	}
565
566	if (!aacDecInfo->sbrEnabled) {
567	for (i = 0; i < AAC_MAX_NSAMPS; i++) {
568	*outbuf = CLIPTOSHORT((psi->sbrWorkBuf[ch][i] + RND_VAL) >> FBITS_OUT_IMDCT);
569	outbuf += aacDecInfo->nChans;
570	}
571	}
572
573	aacDecInfo->rawSampleBuf[ch] = psi->sbrWorkBuf[ch];
574	aacDecInfo->rawSampleBytes = sizeof(int);
575	aacDecInfo->rawSampleFBits = FBITS_OUT_IMDCT;
576	#else
577	/* window, overlap-add, round to PCM - optimized for each window sequence */
578	if (icsInfo->winSequence == 0) {
579	DecWindowOverlap(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
580	} else if (icsInfo->winSequence == 1) {
581	DecWindowOverlapLongStart(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
582	} else if (icsInfo->winSequence == 2) {
583	DecWindowOverlapShort(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
584	} else if (icsInfo->winSequence == 3) {
585	DecWindowOverlapLongStop(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
586	}
587
588	aacDecInfo->rawSampleBuf[ch] = 0;
589	aacDecInfo->rawSampleBytes = 0;
590	aacDecInfo->rawSampleFBits = 0;
591	#endif
592
593	psi->prevWinShape[chOut] = icsInfo->winShape;
594
595	return 0;
596	}
597