platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: imdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * imdct.c - inverse MDCT
44  **************************************************************************************/
45
46 #include "coder.h"
47
48 #include "assembly.h"
49
50 #include "aacdec.h"
51
52 #define RND_VAL     (1 << (FBITS_OUT_IMDCT-1))
53
54 #ifndef AAC_ENABLE_SBR
55
56 /**************************************************************************************
57  * Function:    DecWindowOverlap
58  *
59  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
60  *                for winSequence LONG-LONG
61  *
62  * Inputs:      input buffer (output of type-IV DCT)
63  *              overlap buffer (saved from last time)
64  *              number of channels
65  *              window type (sin or KBD) for input buffer
66  *              window type (sin or KBD) for overlap buffer
67  *
68  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
69  *
70  * Return:      none
71  *
72  * Notes:       this processes one channel at a time, but skips every other sample in
73  *                the output buffer (pcm) for stereo interleaving
74  *              this should fit in registers on ARM
75  *
76  * TODO:        ARM5E version with saturating overlap/add (QADD)
77  *              asm code with free pointer updates, better load scheduling
78  **************************************************************************************/
79 static void DecWindowOverlap(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
80 {
81     int in, w0, w1, f0, f1;
82     int *buf1, *over1;
83     short *pcm1;
84     const int *wndPrev, *wndCurr;
85
86     buf0 += (1024 >> 1);
87     buf1  = buf0  - 1;
88     pcm1  = pcm0 + (1024 - 1) * nChans;
89     over1 = over0 + 1024 - 1;
90
91     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
92     if (winTypeCurr == winTypePrev) {
93         /* cut window loads in half since current and overlap sections use same symmetric window */
94         do {
95             w0 = *wndPrev++;
96             w1 = *wndPrev++;
97             in = *buf0++;
98
99             f0 = MULSHIFT32(w0, in);
100             f1 = MULSHIFT32(w1, in);
101
102             in = *over0;
103             *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
104             pcm0 += nChans;
105
106             in = *over1;
107             *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
108             pcm1 -= nChans;
109
110             in = *buf1--;
111             *over1-- = MULSHIFT32(w0, in);
112             *over0++ = MULSHIFT32(w1, in);
113         } while (over0 < over1);
114     } else {
115         /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */
116         wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
117         do {
118             w0 = *wndPrev++;
119             w1 = *wndPrev++;
120             in = *buf0++;
121
122             f0 = MULSHIFT32(w0, in);
123             f1 = MULSHIFT32(w1, in);
124
125             in = *over0;
126             *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
127             pcm0 += nChans;
128
129             in = *over1;
130             *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
131             pcm1 -= nChans;
132
133             w0 = *wndCurr++;
134             w1 = *wndCurr++;
135             in = *buf1--;
136
137             *over1-- = MULSHIFT32(w0, in);
138             *over0++ = MULSHIFT32(w1, in);
139         } while (over0 < over1);
140     }
141 }
142
143 /**************************************************************************************
144  * Function:    DecWindowOverlapLongStart
145  *
146  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
147  *                for winSequence LONG-START
148  *
149  * Inputs:      input buffer (output of type-IV DCT)
150  *              overlap buffer (saved from last time)
151  *              number of channels
152  *              window type (sin or KBD) for input buffer
153  *              window type (sin or KBD) for overlap buffer
154  *
155  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
156  *
157  * Return:      none
158  *
159  * Notes:       this processes one channel at a time, but skips every other sample in
160  *                the output buffer (pcm) for stereo interleaving
161  *              this should fit in registers on ARM
162  *
163  * TODO:        ARM5E version with saturating overlap/add (QADD)
164  *              asm code with free pointer updates, better load scheduling
165  **************************************************************************************/
166 static void DecWindowOverlapLongStart(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
167 {
168     int i,  in, w0, w1, f0, f1;
169     int *buf1, *over1;
170     short *pcm1;
171     const int *wndPrev, *wndCurr;
172
173     buf0 += (1024 >> 1);
174     buf1  = buf0  - 1;
175     pcm1  = pcm0 + (1024 - 1) * nChans;
176     over1 = over0 + 1024 - 1;
177
178     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
179     i = 448;    /* 2 outputs, 2 overlaps per loop */
180     do {
181         w0 = *wndPrev++;
182         w1 = *wndPrev++;
183         in = *buf0++;
184
185         f0 = MULSHIFT32(w0, in);
186         f1 = MULSHIFT32(w1, in);
187
188         in = *over0;
189         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
190         pcm0 += nChans;
191
192         in = *over1;
193         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
194         pcm1 -= nChans;
195
196         in = *buf1--;
197
198         *over1-- = 0;       /* Wn = 0 for n = (2047, 2046, ... 1600) */
199         *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */
200     } while (--i);
201
202     wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
203
204     /* do 64 more loops - 2 outputs, 2 overlaps per loop */
205     do {
206         w0 = *wndPrev++;
207         w1 = *wndPrev++;
208         in = *buf0++;
209
210         f0 = MULSHIFT32(w0, in);
211         f1 = MULSHIFT32(w1, in);
212
213         in = *over0;
214         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
215         pcm0 += nChans;
216
217         in = *over1;
218         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
219         pcm1 -= nChans;
220
221         w0 = *wndCurr++;    /* W[0], W[1], ... --> W[255], W[254], ... */
222         w1 = *wndCurr++;    /* W[127], W[126], ... --> W[128], W[129], ... */
223         in = *buf1--;
224
225         *over1-- = MULSHIFT32(w0, in);  /* Wn = short window for n = (1599, 1598, ... , 1536) */
226         *over0++ = MULSHIFT32(w1, in);  /* Wn = short window for n = (1472, 1473, ... , 1535) */
227     } while (over0 < over1);
228 }
229
230 /**************************************************************************************
231  * Function:    DecWindowOverlapLongStop
232  *
233  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
234  *                for winSequence LONG-STOP
235  *
236  * Inputs:      input buffer (output of type-IV DCT)
237  *              overlap buffer (saved from last time)
238  *              number of channels
239  *              window type (sin or KBD) for input buffer
240  *              window type (sin or KBD) for overlap buffer
241  *
242  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
243  *
244  * Return:      none
245  *
246  * Notes:       this processes one channel at a time, but skips every other sample in
247  *                the output buffer (pcm) for stereo interleaving
248  *              this should fit in registers on ARM
249  *
250  * TODO:        ARM5E version with saturating overlap/add (QADD)
251  *              asm code with free pointer updates, better load scheduling
252  **************************************************************************************/
253 static void DecWindowOverlapLongStop(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
254 {
255     int i, in, w0, w1, f0, f1;
256     int *buf1, *over1;
257     short *pcm1;
258     const int *wndPrev, *wndCurr;
259
260     buf0 += (1024 >> 1);
261     buf1  = buf0  - 1;
262     pcm1  = pcm0 + (1024 - 1) * nChans;
263     over1 = over0 + 1024 - 1;
264
265     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
266     wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
267
268     i = 448;    /* 2 outputs, 2 overlaps per loop */
269     do {
270         /* Wn = 0 for n = (0, 1, ... 447) */
271         /* Wn = 1 for n = (576, 577, ... 1023) */
272         in = *buf0++;
273         f1 = in >> 1;   /* scale since skipping multiply by Q31 */
274
275         in = *over0;
276         *pcm0 = CLIPTOSHORT((in + RND_VAL) >> FBITS_OUT_IMDCT);
277         pcm0 += nChans;
278
279         in = *over1;
280         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
281         pcm1 -= nChans;
282
283         w0 = *wndCurr++;
284         w1 = *wndCurr++;
285         in = *buf1--;
286
287         *over1-- = MULSHIFT32(w0, in);
288         *over0++ = MULSHIFT32(w1, in);
289     } while (--i);
290
291     /* do 64 more loops - 2 outputs, 2 overlaps per loop */
292     do {
293         w0 = *wndPrev++;    /* W[0], W[1], ...W[63] */
294         w1 = *wndPrev++;    /* W[127], W[126], ... W[64] */
295         in = *buf0++;
296
297         f0 = MULSHIFT32(w0, in);
298         f1 = MULSHIFT32(w1, in);
299
300         in = *over0;
301         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
302         pcm0 += nChans;
303
304         in = *over1;
305         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
306         pcm1 -= nChans;
307
308         w0 = *wndCurr++;
309         w1 = *wndCurr++;
310         in = *buf1--;
311
312         *over1-- = MULSHIFT32(w0, in);
313         *over0++ = MULSHIFT32(w1, in);
314     } while (over0 < over1);
315 }
316
317 /**************************************************************************************
318  * Function:    DecWindowOverlapShort
319  *
320  * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
321  *                for winSequence EIGHT-SHORT (does all 8 short blocks)
322  *
323  * Inputs:      input buffer (output of type-IV DCT)
324  *              overlap buffer (saved from last time)
325  *              number of channels
326  *              window type (sin or KBD) for input buffer
327  *              window type (sin or KBD) for overlap buffer
328  *
329  * Outputs:     one channel, one frame of 16-bit PCM, interleaved by nChans
330  *
331  * Return:      none
332  *
333  * Notes:       this processes one channel at a time, but skips every other sample in
334  *                the output buffer (pcm) for stereo interleaving
335  *              this should fit in registers on ARM
336  *
337  * TODO:        ARM5E version with saturating overlap/add (QADD)
338  *              asm code with free pointer updates, better load scheduling
339  **************************************************************************************/
340 static void DecWindowOverlapShort(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
341 {
342     int i, in, w0, w1, f0, f1;
343     int *buf1, *over1;
344     short *pcm1;
345     const int *wndPrev, *wndCurr;
346
347     wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
348     wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
349
350     /* pcm[0-447] = 0 + overlap[0-447] */
351     i = 448;
352     do {
353         f0 = *over0++;
354         f1 = *over0++;
355         *pcm0 = CLIPTOSHORT((f0 + RND_VAL) >> FBITS_OUT_IMDCT);
356         pcm0 += nChans;
357         *pcm0 = CLIPTOSHORT((f1 + RND_VAL) >> FBITS_OUT_IMDCT);
358         pcm0 += nChans;
359         i -= 2;
360     } while (i);
361
362     /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */
363     pcm1  = pcm0 + (128 - 1) * nChans;
364     over1 = over0 + 128 - 1;
365     buf0 += 64;
366     buf1  = buf0  - 1;
367     do {
368         w0 = *wndPrev++;    /* W[0], W[1], ...W[63] */
369         w1 = *wndPrev++;    /* W[127], W[126], ... W[64] */
370         in = *buf0++;
371
372         f0 = MULSHIFT32(w0, in);
373         f1 = MULSHIFT32(w1, in);
374
375         in = *over0;
376         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
377         pcm0 += nChans;
378
379         in = *over1;
380         *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
381         pcm1 -= nChans;
382
383         w0 = *wndCurr++;
384         w1 = *wndCurr++;
385         in = *buf1--;
386
387         /* save over0/over1 for next short block, in the slots just vacated */
388         *over1-- = MULSHIFT32(w0, in);
389         *over0++ = MULSHIFT32(w1, in);
390     } while (over0 < over1);
391
392     /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703]
393      * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831]
394      * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959]
395      */
396     for (i = 0; i < 3; i++) {
397         pcm0 += 64 * nChans;
398         pcm1 = pcm0 + (128 - 1) * nChans;
399         over0 += 64;
400         over1 = over0 + 128 - 1;
401         buf0 += 64;
402         buf1 = buf0 - 1;
403         wndCurr -= 128;
404
405         do {
406             w0 = *wndCurr++;    /* W[0], W[1], ...W[63] */
407             w1 = *wndCurr++;    /* W[127], W[126], ... W[64] */
408             in = *buf0++;
409
410             f0 = MULSHIFT32(w0, in);
411             f1 = MULSHIFT32(w1, in);
412
413             in  = *(over0 - 128);   /* from last short block */
414             in += *(over0 + 0);     /* from last full frame */
415             *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
416             pcm0 += nChans;
417
418             in  = *(over1 - 128);   /* from last short block */
419             in += *(over1 + 0);     /* from last full frame */
420             *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
421             pcm1 -= nChans;
422
423             /* save over0/over1 for next short block, in the slots just vacated */
424             in = *buf1--;
425             *over1-- = MULSHIFT32(w0, in);
426             *over0++ = MULSHIFT32(w1, in);
427         } while (over0 < over1);
428     }
429
430     /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63]   * block4[0-63] + overlap[960-1023]
431      * over[0-63]    = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127]
432      */
433     pcm0 += 64 * nChans;
434     over0 -= 832;               /* points at overlap[64] */
435     over1 = over0 + 128 - 1;    /* points at overlap[191] */
436     buf0 += 64;
437     buf1 = buf0 - 1;
438     wndCurr -= 128;
439     do {
440         w0 = *wndCurr++;    /* W[0], W[1], ...W[63] */
441         w1 = *wndCurr++;    /* W[127], W[126], ... W[64] */
442         in = *buf0++;
443
444         f0 = MULSHIFT32(w0, in);
445         f1 = MULSHIFT32(w1, in);
446
447         in  = *(over0 + 768);   /* from last short block */
448         in += *(over0 + 896);   /* from last full frame */
449         *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
450         pcm0 += nChans;
451
452         in  = *(over1 + 768);   /* from last short block */
453         *(over1 - 128) = in + f1;
454
455         in = *buf1--;
456         *over1-- = MULSHIFT32(w0, in);  /* save in overlap[128-191] */
457         *over0++ = MULSHIFT32(w1, in);  /* save in overlap[64-127] */
458     } while (over0 < over1);
459
460     /* over0 now points at overlap[128] */
461
462     /* over[64-191]   = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127]
463      * over[192-319]  = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127]
464      * over[320-447]  = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127]
465      * over[448-576]  = Wc[128-255] * block7[128-255]
466      */
467     for (i = 0; i < 3; i++) {
468         over0 += 64;
469         over1 = over0 + 128 - 1;
470         buf0 += 64;
471         buf1 = buf0 - 1;
472         wndCurr -= 128;
473         do {
474             w0 = *wndCurr++;    /* W[0], W[1], ...W[63] */
475             w1 = *wndCurr++;    /* W[127], W[126], ... W[64] */
476             in = *buf0++;
477
478             f0 = MULSHIFT32(w0, in);
479             f1 = MULSHIFT32(w1, in);
480
481             /* from last short block */
482             *(over0 - 128) -= f0;
483             *(over1 - 128) += f1;
484
485             in = *buf1--;
486             *over1-- = MULSHIFT32(w0, in);
487             *over0++ = MULSHIFT32(w1, in);
488         } while (over0 < over1);
489     }
490
491     /* over[576-1024] = 0 */
492     i = 448;
493     over0 += 64;
494     do {
495         *over0++ = 0;
496         *over0++ = 0;
497         *over0++ = 0;
498         *over0++ = 0;
499         i -= 4;
500     } while (i);
501 }
502
503 #endif  /* !AAC_ENABLE_SBR */
504
505 /**************************************************************************************
506  * Function:    IMDCT
507  *
508  * Description: inverse transform and convert to 16-bit PCM
509  *
510  * Inputs:      valid AACDecInfo struct
511  *              index of current channel (0 for SCE/LFE, 0 or 1 for CPE)
512  *              output channel (range = [0, nChans-1])
513  *
514  * Outputs:     complete frame of decoded PCM, after inverse transform
515  *
516  * Return:      0 if successful, -1 if error
517  *
518  * Notes:       If AAC_ENABLE_SBR is defined at compile time then window + overlap
519  *                does NOT clip to 16-bit PCM and does NOT interleave channels
520  *              If AAC_ENABLE_SBR is NOT defined at compile time, then window + overlap
521  *                does clip to 16-bit PCM and interleaves channels
522  *              If SBR is enabled at compile time, but we don't know whether it is
523  *                actually used for this frame (e.g. the first frame of a stream),
524  *                we need to produce both clipped 16-bit PCM in outbuf AND
525  *                unclipped 32-bit PCM in the SBR input buffer. In this case we make
526  *                a separate pass over the 32-bit PCM to produce 16-bit PCM output.
527  *                This inflicts a slight performance hit when decoding non-SBR files.
528  **************************************************************************************/
529 int IMDCT(AACDecInfo *aacDecInfo, int ch, int chOut, short *outbuf)
530 {
531     int i;
532     PSInfoBase *psi;
533     ICSInfo *icsInfo;
534
535     /* validate pointers */
536     if (!aacDecInfo || !aacDecInfo->psInfoBase) {
537         return -1;
538     }
539     psi = (PSInfoBase *)(aacDecInfo->psInfoBase);
540     icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]);
541     outbuf += chOut;
542
543     /* optimized type-IV DCT (operates inplace) */
544     if (icsInfo->winSequence == 2) {
545         /* 8 short blocks */
546         for (i = 0; i < 8; i++) {
547             DCT4(0, psi->coef[ch] + i * 128, psi->gbCurrent[ch]);
548         }
549     } else {
550         /* 1 long block */
551         DCT4(1, psi->coef[ch], psi->gbCurrent[ch]);
552     }
553
554 #ifdef AAC_ENABLE_SBR
555     /* window, overlap-add, don't clip to short (send to SBR decoder)
556      * store the decoded 32-bit samples in top half (second AAC_MAX_NSAMPS samples) of coef buffer
557      */
558     if (icsInfo->winSequence == 0) {
559         DecWindowOverlapNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
560     } else if (icsInfo->winSequence == 1) {
561         DecWindowOverlapLongStartNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
562     } else if (icsInfo->winSequence == 2) {
563         DecWindowOverlapShortNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
564     } else if (icsInfo->winSequence == 3) {
565         DecWindowOverlapLongStopNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
566     }
567
568     if (!aacDecInfo->sbrEnabled) {
569         for (i = 0; i < AAC_MAX_NSAMPS; i++) {
570             *outbuf = CLIPTOSHORT((psi->sbrWorkBuf[ch][i] + RND_VAL) >> FBITS_OUT_IMDCT);
571             outbuf += aacDecInfo->nChans;
572         }
573     }
574
575     aacDecInfo->rawSampleBuf[ch] = psi->sbrWorkBuf[ch];
576     aacDecInfo->rawSampleBytes = sizeof(int);
577     aacDecInfo->rawSampleFBits = FBITS_OUT_IMDCT;
578 #else
579     /* window, overlap-add, round to PCM - optimized for each window sequence */
580     if (icsInfo->winSequence == 0) {
581         DecWindowOverlap(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
582     } else if (icsInfo->winSequence == 1) {
583         DecWindowOverlapLongStart(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
584     } else if (icsInfo->winSequence == 2) {
585         DecWindowOverlapShort(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
586     } else if (icsInfo->winSequence == 3) {
587         DecWindowOverlapLongStop(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
588     }
589
590     aacDecInfo->rawSampleBuf[ch] = 0;
591     aacDecInfo->rawSampleBytes = 0;
592     aacDecInfo->rawSampleFBits = 0;
593 #endif
594
595     psi->prevWinShape[chOut] = icsInfo->winShape;
596
597     return 0;
598 }
599
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: imdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $
3	*
4	* Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5	*
6	* The contents of this file, and the files included with this file,
7	* are subject to the current version of the RealNetworks Public
8	* Source License (the "RPSL") available at
9	* http://www.helixcommunity.org/content/rpsl unless you have licensed
10	* the file under the current version of the RealNetworks Community
11	* Source License (the "RCSL") available at
12	* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13	* will apply. You may also obtain the license terms directly from
14	* RealNetworks. You may not use this file except in compliance with
15	* the RPSL or, if you have a valid RCSL with RealNetworks applicable
16	* to this file, the RCSL. Please see the applicable RPSL or RCSL for
17	* the rights, obligations and limitations governing use of the
18	* contents of the file.
19	*
20	* This file is part of the Helix DNA Technology. RealNetworks is the
21	* developer of the Original Code and owns the copyrights in the
22	* portions it created.
23	*
24	* This file, and the files included with this file, is distributed
25	* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26	* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27	* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29	* ENJOYMENT OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* http://www.helixcommunity.org/content/tck
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point HE-AAC decoder
40	* Jon Recker (jrecker@real.com)
41	* February 2005
42	*
43	* imdct.c - inverse MDCT
44	**************************************************************************************/
45
46	#include "coder.h"
47
48	#include "assembly.h"
49
50	#include "aacdec.h"
51
52	#define RND_VAL (1 << (FBITS_OUT_IMDCT-1))
53
54	#ifndef AAC_ENABLE_SBR
55
56	/**************************************************************************************
57	* Function: DecWindowOverlap
58	*
59	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
60	* for winSequence LONG-LONG
61	*
62	* Inputs: input buffer (output of type-IV DCT)
63	* overlap buffer (saved from last time)
64	* number of channels
65	* window type (sin or KBD) for input buffer
66	* window type (sin or KBD) for overlap buffer
67	*
68	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
69	*
70	* Return: none
71	*
72	* Notes: this processes one channel at a time, but skips every other sample in
73	* the output buffer (pcm) for stereo interleaving
74	* this should fit in registers on ARM
75	*
76	* TODO: ARM5E version with saturating overlap/add (QADD)
77	* asm code with free pointer updates, better load scheduling
78	**************************************************************************************/
79	static void DecWindowOverlap(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
80	{
81	int in, w0, w1, f0, f1;
82	int buf1, over1;
83	short *pcm1;
84	const int wndPrev, wndCurr;
85
86	buf0 += (1024 >> 1);
87	buf1 = buf0 - 1;
88	pcm1 = pcm0 + (1024 - 1) * nChans;
89	over1 = over0 + 1024 - 1;
90
91	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
92	if (winTypeCurr == winTypePrev) {
93	/* cut window loads in half since current and overlap sections use same symmetric window */
94	do {
95	w0 = *wndPrev++;
96	w1 = *wndPrev++;
97	in = *buf0++;
98
99	f0 = MULSHIFT32(w0, in);
100	f1 = MULSHIFT32(w1, in);
101
102	in = *over0;
103	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
104	pcm0 += nChans;
105
106	in = *over1;
107	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
108	pcm1 -= nChans;
109
110	in = *buf1--;
111	*over1-- = MULSHIFT32(w0, in);
112	*over0++ = MULSHIFT32(w1, in);
113	} while (over0 < over1);
114	} else {
115	/* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */
116	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
117	do {
118	w0 = *wndPrev++;
119	w1 = *wndPrev++;
120	in = *buf0++;
121
122	f0 = MULSHIFT32(w0, in);
123	f1 = MULSHIFT32(w1, in);
124
125	in = *over0;
126	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
127	pcm0 += nChans;
128
129	in = *over1;
130	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
131	pcm1 -= nChans;
132
133	w0 = *wndCurr++;
134	w1 = *wndCurr++;
135	in = *buf1--;
136
137	*over1-- = MULSHIFT32(w0, in);
138	*over0++ = MULSHIFT32(w1, in);
139	} while (over0 < over1);
140	}
141	}
142
143	/**************************************************************************************
144	* Function: DecWindowOverlapLongStart
145	*
146	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
147	* for winSequence LONG-START
148	*
149	* Inputs: input buffer (output of type-IV DCT)
150	* overlap buffer (saved from last time)
151	* number of channels
152	* window type (sin or KBD) for input buffer
153	* window type (sin or KBD) for overlap buffer
154	*
155	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
156	*
157	* Return: none
158	*
159	* Notes: this processes one channel at a time, but skips every other sample in
160	* the output buffer (pcm) for stereo interleaving
161	* this should fit in registers on ARM
162	*
163	* TODO: ARM5E version with saturating overlap/add (QADD)
164	* asm code with free pointer updates, better load scheduling
165	**************************************************************************************/
166	static void DecWindowOverlapLongStart(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
167	{
168	int i, in, w0, w1, f0, f1;
169	int buf1, over1;
170	short *pcm1;
171	const int wndPrev, wndCurr;
172
173	buf0 += (1024 >> 1);
174	buf1 = buf0 - 1;
175	pcm1 = pcm0 + (1024 - 1) * nChans;
176	over1 = over0 + 1024 - 1;
177
178	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
179	i = 448; /* 2 outputs, 2 overlaps per loop */
180	do {
181	w0 = *wndPrev++;
182	w1 = *wndPrev++;
183	in = *buf0++;
184
185	f0 = MULSHIFT32(w0, in);
186	f1 = MULSHIFT32(w1, in);
187
188	in = *over0;
189	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
190	pcm0 += nChans;
191
192	in = *over1;
193	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
194	pcm1 -= nChans;
195
196	in = *buf1--;
197
198	over1-- = 0; / Wn = 0 for n = (2047, 2046, ... 1600) */
199	over0++ = in >> 1; / Wn = 1 for n = (1024, 1025, ... 1471) */
200	} while (--i);
201
202	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
203
204	/* do 64 more loops - 2 outputs, 2 overlaps per loop */
205	do {
206	w0 = *wndPrev++;
207	w1 = *wndPrev++;
208	in = *buf0++;
209
210	f0 = MULSHIFT32(w0, in);
211	f1 = MULSHIFT32(w1, in);
212
213	in = *over0;
214	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
215	pcm0 += nChans;
216
217	in = *over1;
218	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
219	pcm1 -= nChans;
220
221	w0 = wndCurr++; / W[0], W[1], ... --> W[255], W[254], ... */
222	w1 = wndCurr++; / W[127], W[126], ... --> W[128], W[129], ... */
223	in = *buf1--;
224
225	over1-- = MULSHIFT32(w0, in); / Wn = short window for n = (1599, 1598, ... , 1536) */
226	over0++ = MULSHIFT32(w1, in); / Wn = short window for n = (1472, 1473, ... , 1535) */
227	} while (over0 < over1);
228	}
229
230	/**************************************************************************************
231	* Function: DecWindowOverlapLongStop
232	*
233	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
234	* for winSequence LONG-STOP
235	*
236	* Inputs: input buffer (output of type-IV DCT)
237	* overlap buffer (saved from last time)
238	* number of channels
239	* window type (sin or KBD) for input buffer
240	* window type (sin or KBD) for overlap buffer
241	*
242	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
243	*
244	* Return: none
245	*
246	* Notes: this processes one channel at a time, but skips every other sample in
247	* the output buffer (pcm) for stereo interleaving
248	* this should fit in registers on ARM
249	*
250	* TODO: ARM5E version with saturating overlap/add (QADD)
251	* asm code with free pointer updates, better load scheduling
252	**************************************************************************************/
253	static void DecWindowOverlapLongStop(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
254	{
255	int i, in, w0, w1, f0, f1;
256	int buf1, over1;
257	short *pcm1;
258	const int wndPrev, wndCurr;
259
260	buf0 += (1024 >> 1);
261	buf1 = buf0 - 1;
262	pcm1 = pcm0 + (1024 - 1) * nChans;
263	over1 = over0 + 1024 - 1;
264
265	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
266	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
267
268	i = 448; /* 2 outputs, 2 overlaps per loop */
269	do {
270	/* Wn = 0 for n = (0, 1, ... 447) */
271	/* Wn = 1 for n = (576, 577, ... 1023) */
272	in = *buf0++;
273	f1 = in >> 1; /* scale since skipping multiply by Q31 */
274
275	in = *over0;
276	*pcm0 = CLIPTOSHORT((in + RND_VAL) >> FBITS_OUT_IMDCT);
277	pcm0 += nChans;
278
279	in = *over1;
280	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
281	pcm1 -= nChans;
282
283	w0 = *wndCurr++;
284	w1 = *wndCurr++;
285	in = *buf1--;
286
287	*over1-- = MULSHIFT32(w0, in);
288	*over0++ = MULSHIFT32(w1, in);
289	} while (--i);
290
291	/* do 64 more loops - 2 outputs, 2 overlaps per loop */
292	do {
293	w0 = wndPrev++; / W[0], W[1], ...W[63] */
294	w1 = wndPrev++; / W[127], W[126], ... W[64] */
295	in = *buf0++;
296
297	f0 = MULSHIFT32(w0, in);
298	f1 = MULSHIFT32(w1, in);
299
300	in = *over0;
301	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
302	pcm0 += nChans;
303
304	in = *over1;
305	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
306	pcm1 -= nChans;
307
308	w0 = *wndCurr++;
309	w1 = *wndCurr++;
310	in = *buf1--;
311
312	*over1-- = MULSHIFT32(w0, in);
313	*over0++ = MULSHIFT32(w1, in);
314	} while (over0 < over1);
315	}
316
317	/**************************************************************************************
318	* Function: DecWindowOverlapShort
319	*
320	* Description: apply synthesis window, do overlap-add, clip to 16-bit PCM,
321	* for winSequence EIGHT-SHORT (does all 8 short blocks)
322	*
323	* Inputs: input buffer (output of type-IV DCT)
324	* overlap buffer (saved from last time)
325	* number of channels
326	* window type (sin or KBD) for input buffer
327	* window type (sin or KBD) for overlap buffer
328	*
329	* Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans
330	*
331	* Return: none
332	*
333	* Notes: this processes one channel at a time, but skips every other sample in
334	* the output buffer (pcm) for stereo interleaving
335	* this should fit in registers on ARM
336	*
337	* TODO: ARM5E version with saturating overlap/add (QADD)
338	* asm code with free pointer updates, better load scheduling
339	**************************************************************************************/
340	static void DecWindowOverlapShort(int buf0, int over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev)
341	{
342	int i, in, w0, w1, f0, f1;
343	int buf1, over1;
344	short *pcm1;
345	const int wndPrev, wndCurr;
346
347	wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
348	wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
349
350	/* pcm[0-447] = 0 + overlap[0-447] */
351	i = 448;
352	do {
353	f0 = *over0++;
354	f1 = *over0++;
355	*pcm0 = CLIPTOSHORT((f0 + RND_VAL) >> FBITS_OUT_IMDCT);
356	pcm0 += nChans;
357	*pcm0 = CLIPTOSHORT((f1 + RND_VAL) >> FBITS_OUT_IMDCT);
358	pcm0 += nChans;
359	i -= 2;
360	} while (i);
361
362	/* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */
363	pcm1 = pcm0 + (128 - 1) * nChans;
364	over1 = over0 + 128 - 1;
365	buf0 += 64;
366	buf1 = buf0 - 1;
367	do {
368	w0 = wndPrev++; / W[0], W[1], ...W[63] */
369	w1 = wndPrev++; / W[127], W[126], ... W[64] */
370	in = *buf0++;
371
372	f0 = MULSHIFT32(w0, in);
373	f1 = MULSHIFT32(w1, in);
374
375	in = *over0;
376	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
377	pcm0 += nChans;
378
379	in = *over1;
380	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
381	pcm1 -= nChans;
382
383	w0 = *wndCurr++;
384	w1 = *wndCurr++;
385	in = *buf1--;
386
387	/* save over0/over1 for next short block, in the slots just vacated */
388	*over1-- = MULSHIFT32(w0, in);
389	*over0++ = MULSHIFT32(w1, in);
390	} while (over0 < over1);
391
392	/* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703]
393	* pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831]
394	* pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959]
395	*/
396	for (i = 0; i < 3; i++) {
397	pcm0 += 64 * nChans;
398	pcm1 = pcm0 + (128 - 1) * nChans;
399	over0 += 64;
400	over1 = over0 + 128 - 1;
401	buf0 += 64;
402	buf1 = buf0 - 1;
403	wndCurr -= 128;
404
405	do {
406	w0 = wndCurr++; / W[0], W[1], ...W[63] */
407	w1 = wndCurr++; / W[127], W[126], ... W[64] */
408	in = *buf0++;
409
410	f0 = MULSHIFT32(w0, in);
411	f1 = MULSHIFT32(w1, in);
412
413	in = (over0 - 128); / from last short block */
414	in += (over0 + 0); / from last full frame */
415	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
416	pcm0 += nChans;
417
418	in = (over1 - 128); / from last short block */
419	in += (over1 + 0); / from last full frame */
420	*pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT);
421	pcm1 -= nChans;
422
423	/* save over0/over1 for next short block, in the slots just vacated */
424	in = *buf1--;
425	*over1-- = MULSHIFT32(w0, in);
426	*over0++ = MULSHIFT32(w1, in);
427	} while (over0 < over1);
428	}
429
430	/* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023]
431	* over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127]
432	*/
433	pcm0 += 64 * nChans;
434	over0 -= 832; /* points at overlap[64] */
435	over1 = over0 + 128 - 1; /* points at overlap[191] */
436	buf0 += 64;
437	buf1 = buf0 - 1;
438	wndCurr -= 128;
439	do {
440	w0 = wndCurr++; / W[0], W[1], ...W[63] */
441	w1 = wndCurr++; / W[127], W[126], ... W[64] */
442	in = *buf0++;
443
444	f0 = MULSHIFT32(w0, in);
445	f1 = MULSHIFT32(w1, in);
446
447	in = (over0 + 768); / from last short block */
448	in += (over0 + 896); / from last full frame */
449	*pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT);
450	pcm0 += nChans;
451
452	in = (over1 + 768); / from last short block */
453	*(over1 - 128) = in + f1;
454
455	in = *buf1--;
456	over1-- = MULSHIFT32(w0, in); / save in overlap[128-191] */
457	over0++ = MULSHIFT32(w1, in); / save in overlap[64-127] */
458	} while (over0 < over1);
459
460	/* over0 now points at overlap[128] */
461
462	/* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127]
463	* over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127]
464	* over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127]
465	* over[448-576] = Wc[128-255] * block7[128-255]
466	*/
467	for (i = 0; i < 3; i++) {
468	over0 += 64;
469	over1 = over0 + 128 - 1;
470	buf0 += 64;
471	buf1 = buf0 - 1;
472	wndCurr -= 128;
473	do {
474	w0 = wndCurr++; / W[0], W[1], ...W[63] */
475	w1 = wndCurr++; / W[127], W[126], ... W[64] */
476	in = *buf0++;
477
478	f0 = MULSHIFT32(w0, in);
479	f1 = MULSHIFT32(w1, in);
480
481	/* from last short block */
482	*(over0 - 128) -= f0;
483	*(over1 - 128) += f1;
484
485	in = *buf1--;
486	*over1-- = MULSHIFT32(w0, in);
487	*over0++ = MULSHIFT32(w1, in);
488	} while (over0 < over1);
489	}
490
491	/* over[576-1024] = 0 */
492	i = 448;
493	over0 += 64;
494	do {
495	*over0++ = 0;
496	*over0++ = 0;
497	*over0++ = 0;
498	*over0++ = 0;
499	i -= 4;
500	} while (i);
501	}
502
503	#endif /* !AAC_ENABLE_SBR */
504
505	/**************************************************************************************
506	* Function: IMDCT
507	*
508	* Description: inverse transform and convert to 16-bit PCM
509	*
510	* Inputs: valid AACDecInfo struct
511	* index of current channel (0 for SCE/LFE, 0 or 1 for CPE)
512	* output channel (range = [0, nChans-1])
513	*
514	* Outputs: complete frame of decoded PCM, after inverse transform
515	*
516	* Return: 0 if successful, -1 if error
517	*
518	* Notes: If AAC_ENABLE_SBR is defined at compile time then window + overlap
519	* does NOT clip to 16-bit PCM and does NOT interleave channels
520	* If AAC_ENABLE_SBR is NOT defined at compile time, then window + overlap
521	* does clip to 16-bit PCM and interleaves channels
522	* If SBR is enabled at compile time, but we don't know whether it is
523	* actually used for this frame (e.g. the first frame of a stream),
524	* we need to produce both clipped 16-bit PCM in outbuf AND
525	* unclipped 32-bit PCM in the SBR input buffer. In this case we make
526	* a separate pass over the 32-bit PCM to produce 16-bit PCM output.
527	* This inflicts a slight performance hit when decoding non-SBR files.
528	**************************************************************************************/
529	int IMDCT(AACDecInfo aacDecInfo, int ch, int chOut, short outbuf)
530	{
531	int i;
532	PSInfoBase *psi;
533	ICSInfo *icsInfo;
534
535	/* validate pointers */
536	if (!aacDecInfo \|\| !aacDecInfo->psInfoBase) {
537	return -1;
538	}
539	psi = (PSInfoBase *)(aacDecInfo->psInfoBase);
540	icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]);
541	outbuf += chOut;
542
543	/* optimized type-IV DCT (operates inplace) */
544	if (icsInfo->winSequence == 2) {
545	/* 8 short blocks */
546	for (i = 0; i < 8; i++) {
547	DCT4(0, psi->coef[ch] + i * 128, psi->gbCurrent[ch]);
548	}
549	} else {
550	/* 1 long block */
551	DCT4(1, psi->coef[ch], psi->gbCurrent[ch]);
552	}
553
554	#ifdef AAC_ENABLE_SBR
555	/* window, overlap-add, don't clip to short (send to SBR decoder)
556	* store the decoded 32-bit samples in top half (second AAC_MAX_NSAMPS samples) of coef buffer
557	*/
558	if (icsInfo->winSequence == 0) {
559	DecWindowOverlapNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
560	} else if (icsInfo->winSequence == 1) {
561	DecWindowOverlapLongStartNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
562	} else if (icsInfo->winSequence == 2) {
563	DecWindowOverlapShortNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
564	} else if (icsInfo->winSequence == 3) {
565	DecWindowOverlapLongStopNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]);
566	}
567
568	if (!aacDecInfo->sbrEnabled) {
569	for (i = 0; i < AAC_MAX_NSAMPS; i++) {
570	*outbuf = CLIPTOSHORT((psi->sbrWorkBuf[ch][i] + RND_VAL) >> FBITS_OUT_IMDCT);
571	outbuf += aacDecInfo->nChans;
572	}
573	}
574
575	aacDecInfo->rawSampleBuf[ch] = psi->sbrWorkBuf[ch];
576	aacDecInfo->rawSampleBytes = sizeof(int);
577	aacDecInfo->rawSampleFBits = FBITS_OUT_IMDCT;
578	#else
579	/* window, overlap-add, round to PCM - optimized for each window sequence */
580	if (icsInfo->winSequence == 0) {
581	DecWindowOverlap(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
582	} else if (icsInfo->winSequence == 1) {
583	DecWindowOverlapLongStart(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
584	} else if (icsInfo->winSequence == 2) {
585	DecWindowOverlapShort(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
586	} else if (icsInfo->winSequence == 3) {
587	DecWindowOverlapLongStop(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]);
588	}
589
590	aacDecInfo->rawSampleBuf[ch] = 0;
591	aacDecInfo->rawSampleBytes = 0;
592	aacDecInfo->rawSampleFBits = 0;
593	#endif
594
595	psi->prevWinShape[chOut] = icsInfo->winShape;
596
597	return 0;
598	}
599