blob: 4eb4f0b5b1a7c6a3ef6f3f4d905b0fa03daa75e8
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: imdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $ |
3 | * |
4 | * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. |
5 | * |
6 | * The contents of this file, and the files included with this file, |
7 | * are subject to the current version of the RealNetworks Public |
8 | * Source License (the "RPSL") available at |
9 | * http://www.helixcommunity.org/content/rpsl unless you have licensed |
10 | * the file under the current version of the RealNetworks Community |
11 | * Source License (the "RCSL") available at |
12 | * http://www.helixcommunity.org/content/rcsl, in which case the RCSL |
13 | * will apply. You may also obtain the license terms directly from |
14 | * RealNetworks. You may not use this file except in compliance with |
15 | * the RPSL or, if you have a valid RCSL with RealNetworks applicable |
16 | * to this file, the RCSL. Please see the applicable RPSL or RCSL for |
17 | * the rights, obligations and limitations governing use of the |
18 | * contents of the file. |
19 | * |
20 | * This file is part of the Helix DNA Technology. RealNetworks is the |
21 | * developer of the Original Code and owns the copyrights in the |
22 | * portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed |
25 | * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY |
26 | * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS |
27 | * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES |
28 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET |
29 | * ENJOYMENT OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * http://www.helixcommunity.org/content/tck |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point HE-AAC decoder |
40 | * Jon Recker (jrecker@real.com) |
41 | * February 2005 |
42 | * |
43 | * imdct.c - inverse MDCT |
44 | **************************************************************************************/ |
45 | |
46 | #include "coder.h" |
47 | |
48 | #include "assembly.h" |
49 | |
50 | #include "aacdec.h" |
51 | |
52 | #define RND_VAL (1 << (FBITS_OUT_IMDCT-1)) |
53 | |
54 | #ifndef AAC_ENABLE_SBR |
55 | |
56 | /************************************************************************************** |
57 | * Function: DecWindowOverlap |
58 | * |
59 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
60 | * for winSequence LONG-LONG |
61 | * |
62 | * Inputs: input buffer (output of type-IV DCT) |
63 | * overlap buffer (saved from last time) |
64 | * number of channels |
65 | * window type (sin or KBD) for input buffer |
66 | * window type (sin or KBD) for overlap buffer |
67 | * |
68 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
69 | * |
70 | * Return: none |
71 | * |
72 | * Notes: this processes one channel at a time, but skips every other sample in |
73 | * the output buffer (pcm) for stereo interleaving |
74 | * this should fit in registers on ARM |
75 | * |
76 | * TODO: ARM5E version with saturating overlap/add (QADD) |
77 | * asm code with free pointer updates, better load scheduling |
78 | **************************************************************************************/ |
79 | static void DecWindowOverlap(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
80 | { |
81 | int in, w0, w1, f0, f1; |
82 | int *buf1, *over1; |
83 | short *pcm1; |
84 | const int *wndPrev, *wndCurr; |
85 | |
86 | buf0 += (1024 >> 1); |
87 | buf1 = buf0 - 1; |
88 | pcm1 = pcm0 + (1024 - 1) * nChans; |
89 | over1 = over0 + 1024 - 1; |
90 | |
91 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
92 | if (winTypeCurr == winTypePrev) { |
93 | /* cut window loads in half since current and overlap sections use same symmetric window */ |
94 | do { |
95 | w0 = *wndPrev++; |
96 | w1 = *wndPrev++; |
97 | in = *buf0++; |
98 | |
99 | f0 = MULSHIFT32(w0, in); |
100 | f1 = MULSHIFT32(w1, in); |
101 | |
102 | in = *over0; |
103 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
104 | pcm0 += nChans; |
105 | |
106 | in = *over1; |
107 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
108 | pcm1 -= nChans; |
109 | |
110 | in = *buf1--; |
111 | *over1-- = MULSHIFT32(w0, in); |
112 | *over0++ = MULSHIFT32(w1, in); |
113 | } while (over0 < over1); |
114 | } else { |
115 | /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */ |
116 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
117 | do { |
118 | w0 = *wndPrev++; |
119 | w1 = *wndPrev++; |
120 | in = *buf0++; |
121 | |
122 | f0 = MULSHIFT32(w0, in); |
123 | f1 = MULSHIFT32(w1, in); |
124 | |
125 | in = *over0; |
126 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
127 | pcm0 += nChans; |
128 | |
129 | in = *over1; |
130 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
131 | pcm1 -= nChans; |
132 | |
133 | w0 = *wndCurr++; |
134 | w1 = *wndCurr++; |
135 | in = *buf1--; |
136 | |
137 | *over1-- = MULSHIFT32(w0, in); |
138 | *over0++ = MULSHIFT32(w1, in); |
139 | } while (over0 < over1); |
140 | } |
141 | } |
142 | |
143 | /************************************************************************************** |
144 | * Function: DecWindowOverlapLongStart |
145 | * |
146 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
147 | * for winSequence LONG-START |
148 | * |
149 | * Inputs: input buffer (output of type-IV DCT) |
150 | * overlap buffer (saved from last time) |
151 | * number of channels |
152 | * window type (sin or KBD) for input buffer |
153 | * window type (sin or KBD) for overlap buffer |
154 | * |
155 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
156 | * |
157 | * Return: none |
158 | * |
159 | * Notes: this processes one channel at a time, but skips every other sample in |
160 | * the output buffer (pcm) for stereo interleaving |
161 | * this should fit in registers on ARM |
162 | * |
163 | * TODO: ARM5E version with saturating overlap/add (QADD) |
164 | * asm code with free pointer updates, better load scheduling |
165 | **************************************************************************************/ |
166 | static void DecWindowOverlapLongStart(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
167 | { |
168 | int i, in, w0, w1, f0, f1; |
169 | int *buf1, *over1; |
170 | short *pcm1; |
171 | const int *wndPrev, *wndCurr; |
172 | |
173 | buf0 += (1024 >> 1); |
174 | buf1 = buf0 - 1; |
175 | pcm1 = pcm0 + (1024 - 1) * nChans; |
176 | over1 = over0 + 1024 - 1; |
177 | |
178 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
179 | i = 448; /* 2 outputs, 2 overlaps per loop */ |
180 | do { |
181 | w0 = *wndPrev++; |
182 | w1 = *wndPrev++; |
183 | in = *buf0++; |
184 | |
185 | f0 = MULSHIFT32(w0, in); |
186 | f1 = MULSHIFT32(w1, in); |
187 | |
188 | in = *over0; |
189 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
190 | pcm0 += nChans; |
191 | |
192 | in = *over1; |
193 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
194 | pcm1 -= nChans; |
195 | |
196 | in = *buf1--; |
197 | |
198 | *over1-- = 0; /* Wn = 0 for n = (2047, 2046, ... 1600) */ |
199 | *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */ |
200 | } while (--i); |
201 | |
202 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
203 | |
204 | /* do 64 more loops - 2 outputs, 2 overlaps per loop */ |
205 | do { |
206 | w0 = *wndPrev++; |
207 | w1 = *wndPrev++; |
208 | in = *buf0++; |
209 | |
210 | f0 = MULSHIFT32(w0, in); |
211 | f1 = MULSHIFT32(w1, in); |
212 | |
213 | in = *over0; |
214 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
215 | pcm0 += nChans; |
216 | |
217 | in = *over1; |
218 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
219 | pcm1 -= nChans; |
220 | |
221 | w0 = *wndCurr++; /* W[0], W[1], ... --> W[255], W[254], ... */ |
222 | w1 = *wndCurr++; /* W[127], W[126], ... --> W[128], W[129], ... */ |
223 | in = *buf1--; |
224 | |
225 | *over1-- = MULSHIFT32(w0, in); /* Wn = short window for n = (1599, 1598, ... , 1536) */ |
226 | *over0++ = MULSHIFT32(w1, in); /* Wn = short window for n = (1472, 1473, ... , 1535) */ |
227 | } while (over0 < over1); |
228 | } |
229 | |
230 | /************************************************************************************** |
231 | * Function: DecWindowOverlapLongStop |
232 | * |
233 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
234 | * for winSequence LONG-STOP |
235 | * |
236 | * Inputs: input buffer (output of type-IV DCT) |
237 | * overlap buffer (saved from last time) |
238 | * number of channels |
239 | * window type (sin or KBD) for input buffer |
240 | * window type (sin or KBD) for overlap buffer |
241 | * |
242 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
243 | * |
244 | * Return: none |
245 | * |
246 | * Notes: this processes one channel at a time, but skips every other sample in |
247 | * the output buffer (pcm) for stereo interleaving |
248 | * this should fit in registers on ARM |
249 | * |
250 | * TODO: ARM5E version with saturating overlap/add (QADD) |
251 | * asm code with free pointer updates, better load scheduling |
252 | **************************************************************************************/ |
253 | static void DecWindowOverlapLongStop(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
254 | { |
255 | int i, in, w0, w1, f0, f1; |
256 | int *buf1, *over1; |
257 | short *pcm1; |
258 | const int *wndPrev, *wndCurr; |
259 | |
260 | buf0 += (1024 >> 1); |
261 | buf1 = buf0 - 1; |
262 | pcm1 = pcm0 + (1024 - 1) * nChans; |
263 | over1 = over0 + 1024 - 1; |
264 | |
265 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
266 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
267 | |
268 | i = 448; /* 2 outputs, 2 overlaps per loop */ |
269 | do { |
270 | /* Wn = 0 for n = (0, 1, ... 447) */ |
271 | /* Wn = 1 for n = (576, 577, ... 1023) */ |
272 | in = *buf0++; |
273 | f1 = in >> 1; /* scale since skipping multiply by Q31 */ |
274 | |
275 | in = *over0; |
276 | *pcm0 = CLIPTOSHORT((in + RND_VAL) >> FBITS_OUT_IMDCT); |
277 | pcm0 += nChans; |
278 | |
279 | in = *over1; |
280 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
281 | pcm1 -= nChans; |
282 | |
283 | w0 = *wndCurr++; |
284 | w1 = *wndCurr++; |
285 | in = *buf1--; |
286 | |
287 | *over1-- = MULSHIFT32(w0, in); |
288 | *over0++ = MULSHIFT32(w1, in); |
289 | } while (--i); |
290 | |
291 | /* do 64 more loops - 2 outputs, 2 overlaps per loop */ |
292 | do { |
293 | w0 = *wndPrev++; /* W[0], W[1], ...W[63] */ |
294 | w1 = *wndPrev++; /* W[127], W[126], ... W[64] */ |
295 | in = *buf0++; |
296 | |
297 | f0 = MULSHIFT32(w0, in); |
298 | f1 = MULSHIFT32(w1, in); |
299 | |
300 | in = *over0; |
301 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
302 | pcm0 += nChans; |
303 | |
304 | in = *over1; |
305 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
306 | pcm1 -= nChans; |
307 | |
308 | w0 = *wndCurr++; |
309 | w1 = *wndCurr++; |
310 | in = *buf1--; |
311 | |
312 | *over1-- = MULSHIFT32(w0, in); |
313 | *over0++ = MULSHIFT32(w1, in); |
314 | } while (over0 < over1); |
315 | } |
316 | |
317 | /************************************************************************************** |
318 | * Function: DecWindowOverlapShort |
319 | * |
320 | * Description: apply synthesis window, do overlap-add, clip to 16-bit PCM, |
321 | * for winSequence EIGHT-SHORT (does all 8 short blocks) |
322 | * |
323 | * Inputs: input buffer (output of type-IV DCT) |
324 | * overlap buffer (saved from last time) |
325 | * number of channels |
326 | * window type (sin or KBD) for input buffer |
327 | * window type (sin or KBD) for overlap buffer |
328 | * |
329 | * Outputs: one channel, one frame of 16-bit PCM, interleaved by nChans |
330 | * |
331 | * Return: none |
332 | * |
333 | * Notes: this processes one channel at a time, but skips every other sample in |
334 | * the output buffer (pcm) for stereo interleaving |
335 | * this should fit in registers on ARM |
336 | * |
337 | * TODO: ARM5E version with saturating overlap/add (QADD) |
338 | * asm code with free pointer updates, better load scheduling |
339 | **************************************************************************************/ |
340 | static void DecWindowOverlapShort(int *buf0, int *over0, short *pcm0, int nChans, int winTypeCurr, int winTypePrev) |
341 | { |
342 | int i, in, w0, w1, f0, f1; |
343 | int *buf1, *over1; |
344 | short *pcm1; |
345 | const int *wndPrev, *wndCurr; |
346 | |
347 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
348 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
349 | |
350 | /* pcm[0-447] = 0 + overlap[0-447] */ |
351 | i = 448; |
352 | do { |
353 | f0 = *over0++; |
354 | f1 = *over0++; |
355 | *pcm0 = CLIPTOSHORT((f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
356 | pcm0 += nChans; |
357 | *pcm0 = CLIPTOSHORT((f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
358 | pcm0 += nChans; |
359 | i -= 2; |
360 | } while (i); |
361 | |
362 | /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */ |
363 | pcm1 = pcm0 + (128 - 1) * nChans; |
364 | over1 = over0 + 128 - 1; |
365 | buf0 += 64; |
366 | buf1 = buf0 - 1; |
367 | do { |
368 | w0 = *wndPrev++; /* W[0], W[1], ...W[63] */ |
369 | w1 = *wndPrev++; /* W[127], W[126], ... W[64] */ |
370 | in = *buf0++; |
371 | |
372 | f0 = MULSHIFT32(w0, in); |
373 | f1 = MULSHIFT32(w1, in); |
374 | |
375 | in = *over0; |
376 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
377 | pcm0 += nChans; |
378 | |
379 | in = *over1; |
380 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
381 | pcm1 -= nChans; |
382 | |
383 | w0 = *wndCurr++; |
384 | w1 = *wndCurr++; |
385 | in = *buf1--; |
386 | |
387 | /* save over0/over1 for next short block, in the slots just vacated */ |
388 | *over1-- = MULSHIFT32(w0, in); |
389 | *over0++ = MULSHIFT32(w1, in); |
390 | } while (over0 < over1); |
391 | |
392 | /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703] |
393 | * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831] |
394 | * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959] |
395 | */ |
396 | for (i = 0; i < 3; i++) { |
397 | pcm0 += 64 * nChans; |
398 | pcm1 = pcm0 + (128 - 1) * nChans; |
399 | over0 += 64; |
400 | over1 = over0 + 128 - 1; |
401 | buf0 += 64; |
402 | buf1 = buf0 - 1; |
403 | wndCurr -= 128; |
404 | |
405 | do { |
406 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
407 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
408 | in = *buf0++; |
409 | |
410 | f0 = MULSHIFT32(w0, in); |
411 | f1 = MULSHIFT32(w1, in); |
412 | |
413 | in = *(over0 - 128); /* from last short block */ |
414 | in += *(over0 + 0); /* from last full frame */ |
415 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
416 | pcm0 += nChans; |
417 | |
418 | in = *(over1 - 128); /* from last short block */ |
419 | in += *(over1 + 0); /* from last full frame */ |
420 | *pcm1 = CLIPTOSHORT((in + f1 + RND_VAL) >> FBITS_OUT_IMDCT); |
421 | pcm1 -= nChans; |
422 | |
423 | /* save over0/over1 for next short block, in the slots just vacated */ |
424 | in = *buf1--; |
425 | *over1-- = MULSHIFT32(w0, in); |
426 | *over0++ = MULSHIFT32(w1, in); |
427 | } while (over0 < over1); |
428 | } |
429 | |
430 | /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023] |
431 | * over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127] |
432 | */ |
433 | pcm0 += 64 * nChans; |
434 | over0 -= 832; /* points at overlap[64] */ |
435 | over1 = over0 + 128 - 1; /* points at overlap[191] */ |
436 | buf0 += 64; |
437 | buf1 = buf0 - 1; |
438 | wndCurr -= 128; |
439 | do { |
440 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
441 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
442 | in = *buf0++; |
443 | |
444 | f0 = MULSHIFT32(w0, in); |
445 | f1 = MULSHIFT32(w1, in); |
446 | |
447 | in = *(over0 + 768); /* from last short block */ |
448 | in += *(over0 + 896); /* from last full frame */ |
449 | *pcm0 = CLIPTOSHORT((in - f0 + RND_VAL) >> FBITS_OUT_IMDCT); |
450 | pcm0 += nChans; |
451 | |
452 | in = *(over1 + 768); /* from last short block */ |
453 | *(over1 - 128) = in + f1; |
454 | |
455 | in = *buf1--; |
456 | *over1-- = MULSHIFT32(w0, in); /* save in overlap[128-191] */ |
457 | *over0++ = MULSHIFT32(w1, in); /* save in overlap[64-127] */ |
458 | } while (over0 < over1); |
459 | |
460 | /* over0 now points at overlap[128] */ |
461 | |
462 | /* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127] |
463 | * over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127] |
464 | * over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127] |
465 | * over[448-576] = Wc[128-255] * block7[128-255] |
466 | */ |
467 | for (i = 0; i < 3; i++) { |
468 | over0 += 64; |
469 | over1 = over0 + 128 - 1; |
470 | buf0 += 64; |
471 | buf1 = buf0 - 1; |
472 | wndCurr -= 128; |
473 | do { |
474 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
475 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
476 | in = *buf0++; |
477 | |
478 | f0 = MULSHIFT32(w0, in); |
479 | f1 = MULSHIFT32(w1, in); |
480 | |
481 | /* from last short block */ |
482 | *(over0 - 128) -= f0; |
483 | *(over1 - 128) += f1; |
484 | |
485 | in = *buf1--; |
486 | *over1-- = MULSHIFT32(w0, in); |
487 | *over0++ = MULSHIFT32(w1, in); |
488 | } while (over0 < over1); |
489 | } |
490 | |
491 | /* over[576-1024] = 0 */ |
492 | i = 448; |
493 | over0 += 64; |
494 | do { |
495 | *over0++ = 0; |
496 | *over0++ = 0; |
497 | *over0++ = 0; |
498 | *over0++ = 0; |
499 | i -= 4; |
500 | } while (i); |
501 | } |
502 | |
503 | #endif /* !AAC_ENABLE_SBR */ |
504 | |
505 | /************************************************************************************** |
506 | * Function: IMDCT |
507 | * |
508 | * Description: inverse transform and convert to 16-bit PCM |
509 | * |
510 | * Inputs: valid AACDecInfo struct |
511 | * index of current channel (0 for SCE/LFE, 0 or 1 for CPE) |
512 | * output channel (range = [0, nChans-1]) |
513 | * |
514 | * Outputs: complete frame of decoded PCM, after inverse transform |
515 | * |
516 | * Return: 0 if successful, -1 if error |
517 | * |
518 | * Notes: If AAC_ENABLE_SBR is defined at compile time then window + overlap |
519 | * does NOT clip to 16-bit PCM and does NOT interleave channels |
520 | * If AAC_ENABLE_SBR is NOT defined at compile time, then window + overlap |
521 | * does clip to 16-bit PCM and interleaves channels |
522 | * If SBR is enabled at compile time, but we don't know whether it is |
523 | * actually used for this frame (e.g. the first frame of a stream), |
524 | * we need to produce both clipped 16-bit PCM in outbuf AND |
525 | * unclipped 32-bit PCM in the SBR input buffer. In this case we make |
526 | * a separate pass over the 32-bit PCM to produce 16-bit PCM output. |
527 | * This inflicts a slight performance hit when decoding non-SBR files. |
528 | **************************************************************************************/ |
529 | int IMDCT(AACDecInfo *aacDecInfo, int ch, int chOut, short *outbuf) |
530 | { |
531 | int i; |
532 | PSInfoBase *psi; |
533 | ICSInfo *icsInfo; |
534 | |
535 | /* validate pointers */ |
536 | if (!aacDecInfo || !aacDecInfo->psInfoBase) { |
537 | return -1; |
538 | } |
539 | psi = (PSInfoBase *)(aacDecInfo->psInfoBase); |
540 | icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]); |
541 | outbuf += chOut; |
542 | |
543 | /* optimized type-IV DCT (operates inplace) */ |
544 | if (icsInfo->winSequence == 2) { |
545 | /* 8 short blocks */ |
546 | for (i = 0; i < 8; i++) { |
547 | DCT4(0, psi->coef[ch] + i * 128, psi->gbCurrent[ch]); |
548 | } |
549 | } else { |
550 | /* 1 long block */ |
551 | DCT4(1, psi->coef[ch], psi->gbCurrent[ch]); |
552 | } |
553 | |
554 | #ifdef AAC_ENABLE_SBR |
555 | /* window, overlap-add, don't clip to short (send to SBR decoder) |
556 | * store the decoded 32-bit samples in top half (second AAC_MAX_NSAMPS samples) of coef buffer |
557 | */ |
558 | if (icsInfo->winSequence == 0) { |
559 | DecWindowOverlapNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
560 | } else if (icsInfo->winSequence == 1) { |
561 | DecWindowOverlapLongStartNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
562 | } else if (icsInfo->winSequence == 2) { |
563 | DecWindowOverlapShortNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
564 | } else if (icsInfo->winSequence == 3) { |
565 | DecWindowOverlapLongStopNoClip(psi->coef[ch], psi->overlap[chOut], psi->sbrWorkBuf[ch], icsInfo->winShape, psi->prevWinShape[chOut]); |
566 | } |
567 | |
568 | if (!aacDecInfo->sbrEnabled) { |
569 | for (i = 0; i < AAC_MAX_NSAMPS; i++) { |
570 | *outbuf = CLIPTOSHORT((psi->sbrWorkBuf[ch][i] + RND_VAL) >> FBITS_OUT_IMDCT); |
571 | outbuf += aacDecInfo->nChans; |
572 | } |
573 | } |
574 | |
575 | aacDecInfo->rawSampleBuf[ch] = psi->sbrWorkBuf[ch]; |
576 | aacDecInfo->rawSampleBytes = sizeof(int); |
577 | aacDecInfo->rawSampleFBits = FBITS_OUT_IMDCT; |
578 | #else |
579 | /* window, overlap-add, round to PCM - optimized for each window sequence */ |
580 | if (icsInfo->winSequence == 0) { |
581 | DecWindowOverlap(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
582 | } else if (icsInfo->winSequence == 1) { |
583 | DecWindowOverlapLongStart(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
584 | } else if (icsInfo->winSequence == 2) { |
585 | DecWindowOverlapShort(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
586 | } else if (icsInfo->winSequence == 3) { |
587 | DecWindowOverlapLongStop(psi->coef[ch], psi->overlap[chOut], outbuf, aacDecInfo->nChans, icsInfo->winShape, psi->prevWinShape[chOut]); |
588 | } |
589 | |
590 | aacDecInfo->rawSampleBuf[ch] = 0; |
591 | aacDecInfo->rawSampleBytes = 0; |
592 | aacDecInfo->rawSampleFBits = 0; |
593 | #endif |
594 | |
595 | psi->prevWinShape[chOut] = icsInfo->winShape; |
596 | |
597 | return 0; |
598 | } |
599 |