summaryrefslogtreecommitdiff
path: root/audio_codec/libfaad/helixaac/sbrqmf.c (plain)
blob: 6a9464a418c9e250849f168dcebe036d38e32d38
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: sbrqmf.c,v 1.1.2.2 2005/05/19 21:00:01 jrecker Exp $
3 *
4 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file,
7 * are subject to the current version of the RealNetworks Public
8 * Source License (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the current version of the RealNetworks Community
11 * Source License (the "RCSL") available at
12 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13 * will apply. You may also obtain the license terms directly from
14 * RealNetworks. You may not use this file except in compliance with
15 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16 * to this file, the RCSL. Please see the applicable RPSL or RCSL for
17 * the rights, obligations and limitations governing use of the
18 * contents of the file.
19 *
20 * This file is part of the Helix DNA Technology. RealNetworks is the
21 * developer of the Original Code and owns the copyrights in the
22 * portions it created.
23 *
24 * This file, and the files included with this file, is distributed
25 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29 * ENJOYMENT OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * http://www.helixcommunity.org/content/tck
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point HE-AAC decoder
40 * Jon Recker (jrecker@real.com)
41 * February 2005
42 *
43 * sbrqmf.c - analysis and synthesis QMF filters for SBR
44 **************************************************************************************/
45
46#include "sbr.h"
47#include "assembly.h"
48
49/* PreMultiply64() table
50 * format = Q30
51 * reordered for sequential access
52 *
53 * for (i = 0; i < 64/4; i++) {
54 * angle = (i + 0.25) * M_PI / nmdct;
55 * x = (cos(angle) + sin(angle));
56 * x = sin(angle);
57 *
58 * angle = (nmdct/2 - 1 - i + 0.25) * M_PI / nmdct;
59 * x = (cos(angle) + sin(angle));
60 * x = sin(angle);
61 * }
62 */
63static const int cos4sin4tab64[64] = {
64 0x40c7d2bd, 0x00c90e90, 0x424ff28f, 0x3ff4e5e0, 0x43cdd89a, 0x03ecadcf, 0x454149fc, 0x3fc395f9,
65 0x46aa0d6d, 0x070de172, 0x4807eb4b, 0x3f6af2e3, 0x495aada2, 0x0a2abb59, 0x4aa22036, 0x3eeb3347,
66 0x4bde1089, 0x0d415013, 0x4d0e4de2, 0x3e44a5ef, 0x4e32a956, 0x104fb80e, 0x4f4af5d1, 0x3d77b192,
67 0x50570819, 0x135410c3, 0x5156b6d9, 0x3c84d496, 0x5249daa2, 0x164c7ddd, 0x53304df6, 0x3b6ca4c4,
68 0x5409ed4b, 0x19372a64, 0x54d69714, 0x3a2fcee8, 0x55962bc0, 0x1c1249d8, 0x56488dc5, 0x38cf1669,
69 0x56eda1a0, 0x1edc1953, 0x57854ddd, 0x374b54ce, 0x580f7b19, 0x2192e09b, 0x588c1404, 0x35a5793c,
70 0x58fb0568, 0x2434f332, 0x595c3e2a, 0x33de87de, 0x59afaf4c, 0x26c0b162, 0x59f54bee, 0x31f79948,
71 0x5a2d0957, 0x29348937, 0x5a56deec, 0x2ff1d9c7, 0x5a72c63b, 0x2b8ef77d, 0x5a80baf6, 0x2dce88aa,
72};
73
74/* PostMultiply64() table
75 * format = Q30
76 * reordered for sequential access
77 *
78 * for (i = 0; i <= (32/2); i++) {
79 * angle = i * M_PI / 64;
80 * x = (cos(angle) + sin(angle));
81 * x = sin(angle);
82 * }
83 */
84static const int cos1sin1tab64[34] = {
85 0x40000000, 0x00000000, 0x43103085, 0x0323ecbe, 0x45f704f7, 0x0645e9af, 0x48b2b335, 0x09640837,
86 0x4b418bbe, 0x0c7c5c1e, 0x4da1fab5, 0x0f8cfcbe, 0x4fd288dc, 0x1294062f, 0x51d1dc80, 0x158f9a76,
87 0x539eba45, 0x187de2a7, 0x553805f2, 0x1b5d100a, 0x569cc31b, 0x1e2b5d38, 0x57cc15bc, 0x20e70f32,
88 0x58c542c5, 0x238e7673, 0x5987b08a, 0x261feffa, 0x5a12e720, 0x2899e64a, 0x5a6690ae, 0x2afad269,
89 0x5a82799a, 0x2d413ccd,
90};
91
92/**************************************************************************************
93 * Function: PreMultiply64
94 *
95 * Description: pre-twiddle stage of 64-point DCT-IV
96 *
97 * Inputs: buffer of 64 samples
98 *
99 * Outputs: processed samples in same buffer
100 *
101 * Return: none
102 *
103 * Notes: minimum 1 GB in, 2 GB out, gains 2 int bits
104 * gbOut = gbIn + 1
105 * output is limited to sqrt(2)/2 plus GB in full GB
106 * uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
107 **************************************************************************************/
108static void PreMultiply64(int *zbuf1)
109{
110 int i, ar1, ai1, ar2, ai2, z1, z2;
111 int t, cms2, cps2a, sin2a, cps2b, sin2b;
112 int *zbuf2;
113 const int *csptr;
114
115 zbuf2 = zbuf1 + 64 - 1;
116 csptr = cos4sin4tab64;
117
118 /* whole thing should fit in registers - verify that compiler does this */
119 for (i = 64 >> 2; i != 0; i--) {
120 /* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
121 cps2a = *csptr++;
122 sin2a = *csptr++;
123 cps2b = *csptr++;
124 sin2b = *csptr++;
125
126 ar1 = *(zbuf1 + 0);
127 ai2 = *(zbuf1 + 1);
128 ai1 = *(zbuf2 + 0);
129 ar2 = *(zbuf2 - 1);
130
131 /* gain 2 ints bit from MULSHIFT32 by Q30
132 * max per-sample gain (ignoring implicit scaling) = MAX(sin(angle)+cos(angle)) = 1.414
133 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
134 * extra sign bits, and eat one in adding
135 */
136 t = MULSHIFT32(sin2a, ar1 + ai1);
137 z2 = MULSHIFT32(cps2a, ai1) - t;
138 cms2 = cps2a - 2 * sin2a;
139 z1 = MULSHIFT32(cms2, ar1) + t;
140 *zbuf1++ = z1; /* cos*ar1 + sin*ai1 */
141 *zbuf1++ = z2; /* cos*ai1 - sin*ar1 */
142
143 t = MULSHIFT32(sin2b, ar2 + ai2);
144 z2 = MULSHIFT32(cps2b, ai2) - t;
145 cms2 = cps2b - 2 * sin2b;
146 z1 = MULSHIFT32(cms2, ar2) + t;
147 *zbuf2-- = z2; /* cos*ai2 - sin*ar2 */
148 *zbuf2-- = z1; /* cos*ar2 + sin*ai2 */
149 }
150}
151
152/**************************************************************************************
153 * Function: PostMultiply64
154 *
155 * Description: post-twiddle stage of 64-point type-IV DCT
156 *
157 * Inputs: buffer of 64 samples
158 * number of output samples to calculate
159 *
160 * Outputs: processed samples in same buffer
161 *
162 * Return: none
163 *
164 * Notes: minimum 1 GB in, 2 GB out, gains 2 int bits
165 * gbOut = gbIn + 1
166 * output is limited to sqrt(2)/2 plus GB in full GB
167 * nSampsOut is rounded up to next multiple of 4, since we calculate
168 * 4 samples per loop
169 **************************************************************************************/
170static void PostMultiply64(int *fft1, int nSampsOut)
171{
172 int i, ar1, ai1, ar2, ai2;
173 int t, cms2, cps2, sin2;
174 int *fft2;
175 const int *csptr;
176
177 csptr = cos1sin1tab64;
178 fft2 = fft1 + 64 - 1;
179
180 /* load coeffs for first pass
181 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
182 */
183 cps2 = *csptr++;
184 sin2 = *csptr++;
185 cms2 = cps2 - 2 * sin2;
186
187 for (i = (nSampsOut + 3) >> 2; i != 0; i--) {
188 ar1 = *(fft1 + 0);
189 ai1 = *(fft1 + 1);
190 ar2 = *(fft2 - 1);
191 ai2 = *(fft2 + 0);
192
193 /* gain 2 int bits (multiplying by Q30), max gain = sqrt(2) */
194 t = MULSHIFT32(sin2, ar1 + ai1);
195 *fft2-- = t - MULSHIFT32(cps2, ai1);
196 *fft1++ = t + MULSHIFT32(cms2, ar1);
197
198 cps2 = *csptr++;
199 sin2 = *csptr++;
200
201 ai2 = -ai2;
202 t = MULSHIFT32(sin2, ar2 + ai2);
203 *fft2-- = t - MULSHIFT32(cps2, ai2);
204 cms2 = cps2 - 2 * sin2;
205 *fft1++ = t + MULSHIFT32(cms2, ar2);
206 }
207}
208
209/**************************************************************************************
210 * Function: QMFAnalysisConv
211 *
212 * Description: convolution kernel for analysis QMF
213 *
214 * Inputs: pointer to coefficient table, reordered for sequential access
215 * delay buffer of size 32*10 = 320 real-valued PCM samples
216 * index for delay ring buffer (range = [0, 9])
217 *
218 * Outputs: 64 consecutive 32-bit samples
219 *
220 * Return: none
221 *
222 * Notes: this is carefully written to be efficient on ARM
223 * use the assembly code version in sbrqmfak.s when building for ARM!
224 **************************************************************************************/
225#if 0// (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
226#ifdef __cplusplus
227extern "C"
228#endif
229void QMFAnalysisConv(int *cTab, int *delay, int dIdx, int *uBuf);
230#else
231void QMFAnalysisConv(int *cTab, int *delay, int dIdx, int *uBuf)
232{
233 int k, dOff;
234 int *cPtr0, *cPtr1;
235 U64 u64lo, u64hi;
236
237 dOff = dIdx * 32 + 31;
238 cPtr0 = cTab;
239 cPtr1 = cTab + 33 * 5 - 1;
240
241 /* special first pass since we need to flip sign to create cTab[384], cTab[512] */
242 u64lo.w64 = 0;
243 u64hi.w64 = 0;
244 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
245 dOff -= 32;
246 if (dOff < 0) {
247 dOff += 320;
248 }
249 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
250 dOff -= 32;
251 if (dOff < 0) {
252 dOff += 320;
253 }
254 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
255 dOff -= 32;
256 if (dOff < 0) {
257 dOff += 320;
258 }
259 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
260 dOff -= 32;
261 if (dOff < 0) {
262 dOff += 320;
263 }
264 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
265 dOff -= 32;
266 if (dOff < 0) {
267 dOff += 320;
268 }
269 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
270 dOff -= 32;
271 if (dOff < 0) {
272 dOff += 320;
273 }
274 u64lo.w64 = MADD64(u64lo.w64, -(*cPtr1--), delay[dOff]);
275 dOff -= 32;
276 if (dOff < 0) {
277 dOff += 320;
278 }
279 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
280 dOff -= 32;
281 if (dOff < 0) {
282 dOff += 320;
283 }
284 u64lo.w64 = MADD64(u64lo.w64, -(*cPtr1--), delay[dOff]);
285 dOff -= 32;
286 if (dOff < 0) {
287 dOff += 320;
288 }
289 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
290 dOff -= 32;
291 if (dOff < 0) {
292 dOff += 320;
293 }
294
295 uBuf[0] = u64lo.r.hi32;
296 uBuf[32] = u64hi.r.hi32;
297 uBuf++;
298 dOff--;
299
300 /* max gain for any sample in uBuf, after scaling by cTab, ~= 0.99
301 * so we can just sum the uBuf values with no overflow problems
302 */
303 for (k = 1; k <= 31; k++) {
304 u64lo.w64 = 0;
305 u64hi.w64 = 0;
306 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
307 dOff -= 32;
308 if (dOff < 0) {
309 dOff += 320;
310 }
311 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
312 dOff -= 32;
313 if (dOff < 0) {
314 dOff += 320;
315 }
316 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
317 dOff -= 32;
318 if (dOff < 0) {
319 dOff += 320;
320 }
321 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
322 dOff -= 32;
323 if (dOff < 0) {
324 dOff += 320;
325 }
326 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
327 dOff -= 32;
328 if (dOff < 0) {
329 dOff += 320;
330 }
331 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
332 dOff -= 32;
333 if (dOff < 0) {
334 dOff += 320;
335 }
336 u64lo.w64 = MADD64(u64lo.w64, *cPtr1--, delay[dOff]);
337 dOff -= 32;
338 if (dOff < 0) {
339 dOff += 320;
340 }
341 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
342 dOff -= 32;
343 if (dOff < 0) {
344 dOff += 320;
345 }
346 u64lo.w64 = MADD64(u64lo.w64, *cPtr1--, delay[dOff]);
347 dOff -= 32;
348 if (dOff < 0) {
349 dOff += 320;
350 }
351 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
352 dOff -= 32;
353 if (dOff < 0) {
354 dOff += 320;
355 }
356
357 uBuf[0] = u64lo.r.hi32;
358 uBuf[32] = u64hi.r.hi32;
359 uBuf++;
360 dOff--;
361 }
362}
363#endif
364
365/**************************************************************************************
366 * Function: QMFAnalysis
367 *
368 * Description: 32-subband analysis QMF (4.6.18.4.1)
369 *
370 * Inputs: 32 consecutive samples of decoded 32-bit PCM, format = Q(fBitsIn)
371 * delay buffer of size 32*10 = 320 PCM samples
372 * number of fraction bits in input PCM
373 * index for delay ring buffer (range = [0, 9])
374 * number of subbands to calculate (range = [0, 32])
375 *
376 * Outputs: qmfaBands complex subband samples, format = Q(FBITS_OUT_QMFA)
377 * updated delay buffer
378 * updated delay index
379 *
380 * Return: guard bit mask
381 *
382 * Notes: output stored as RE{X0}, IM{X0}, RE{X1}, IM{X1}, ... RE{X31}, IM{X31}
383 * output stored in int buffer of size 64*2 = 128
384 * (zero-filled from XBuf[2*qmfaBands] to XBuf[127])
385 **************************************************************************************/
386int QMFAnalysis(int *inbuf, int *delay, int *XBuf, int fBitsIn, int *delayIdx, int qmfaBands)
387{
388 int n, y, shift, gbMask;
389 int *delayPtr, *uBuf, *tBuf;
390
391 /* use XBuf[128] as temp buffer for reordering */
392 uBuf = XBuf; /* first 64 samples */
393 tBuf = XBuf + 64; /* second 64 samples */
394
395 /* overwrite oldest PCM with new PCM
396 * delay[n] has 1 GB after shifting (either << or >>)
397 */
398 delayPtr = delay + (*delayIdx * 32);
399 if (fBitsIn > FBITS_IN_QMFA) {
400 shift = MIN(fBitsIn - FBITS_IN_QMFA, 31);
401 for (n = 32; n != 0; n--) {
402 y = (*inbuf) >> shift;
403 inbuf++;
404 *delayPtr++ = y;
405 }
406 } else {
407 shift = MIN(FBITS_IN_QMFA - fBitsIn, 30);
408 for (n = 32; n != 0; n--) {
409 y = *inbuf++;
410 CLIP_2N_SHIFT30(y, shift);
411 *delayPtr++ = y;
412 }
413 }
414
415 QMFAnalysisConv((int *)cTabA, delay, *delayIdx, uBuf);
416
417 /* uBuf has at least 2 GB right now (1 from clipping to Q(FBITS_IN_QMFA), one from
418 * the scaling by cTab (MULSHIFT32(*delayPtr--, *cPtr++), with net gain of < 1.0)
419 * TODO - fuse with QMFAnalysisConv to avoid separate reordering
420 */
421 tBuf[2 * 0 + 0] = uBuf[0];
422 tBuf[2 * 0 + 1] = uBuf[1];
423 for (n = 1; n < 31; n++) {
424 tBuf[2 * n + 0] = -uBuf[64 - n];
425 tBuf[2 * n + 1] = uBuf[n + 1];
426 }
427 tBuf[2 * 31 + 1] = uBuf[32];
428 tBuf[2 * 31 + 0] = -uBuf[33];
429
430 /* fast in-place DCT-IV - only need 2*qmfaBands output samples */
431 PreMultiply64(tBuf); /* 2 GB in, 3 GB out */
432 FFT32C(tBuf); /* 3 GB in, 1 GB out */
433 PostMultiply64(tBuf, qmfaBands * 2); /* 1 GB in, 2 GB out */
434
435 /* TODO - roll into PostMultiply (if enough registers) */
436 gbMask = 0;
437 for (n = 0; n < qmfaBands; n++) {
438 XBuf[2 * n + 0] = tBuf[ n + 0]; /* implicit scaling of 2 in our output Q format */
439 gbMask |= FASTABS(XBuf[2 * n + 0]);
440 XBuf[2 * n + 1] = -tBuf[63 - n];
441 gbMask |= FASTABS(XBuf[2 * n + 1]);
442 }
443
444 /* fill top section with zeros for HF generation */
445 for (; n < 64; n++) {
446 XBuf[2 * n + 0] = 0;
447 XBuf[2 * n + 1] = 0;
448 }
449
450 *delayIdx = (*delayIdx == NUM_QMF_DELAY_BUFS - 1 ? 0 : *delayIdx + 1);
451
452 /* minimum of 2 GB in output */
453 return gbMask;
454}
455
456/* lose FBITS_LOST_DCT4_64 in DCT4, gain 6 for implicit scaling by 1/64, lose 1 for cTab multiply (Q31) */
457#define FBITS_OUT_QMFS (FBITS_IN_QMFS - FBITS_LOST_DCT4_64 + 6 - 1)
458#define RND_VAL (1 << (FBITS_OUT_QMFS-1))
459
460/**************************************************************************************
461 * Function: QMFSynthesisConv
462 *
463 * Description: final convolution kernel for synthesis QMF
464 *
465 * Inputs: pointer to coefficient table, reordered for sequential access
466 * delay buffer of size 64*10 = 640 complex samples (1280 ints)
467 * index for delay ring buffer (range = [0, 9])
468 * number of QMF subbands to process (range = [0, 64])
469 * number of channels
470 *
471 * Outputs: 64 consecutive 16-bit PCM samples, interleaved by factor of nChans
472 *
473 * Return: none
474 *
475 * Notes: this is carefully written to be efficient on ARM
476 * use the assembly code version in sbrqmfsk.s when building for ARM!
477 **************************************************************************************/
478#if 0// (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
479#ifdef __cplusplus
480extern "C"
481#endif
482void QMFSynthesisConv(int *cPtr, int *delay, int dIdx, short *outbuf, int nChans);
483#else
484void QMFSynthesisConv(int *cPtr, int *delay, int dIdx, short *outbuf, int nChans)
485{
486 int k, dOff0, dOff1;
487 U64 sum64;
488
489 dOff0 = (dIdx) * 128;
490 dOff1 = dOff0 - 1;
491 if (dOff1 < 0) {
492 dOff1 += 1280;
493 }
494
495 /* scaling note: total gain of coefs (cPtr[0]-cPtr[9] for any k) is < 2.0, so 1 GB in delay values is adequate */
496 for (k = 0; k <= 63; k++) {
497 sum64.w64 = 0;
498 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
499 dOff0 -= 256;
500 if (dOff0 < 0) {
501 dOff0 += 1280;
502 }
503 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
504 dOff1 -= 256;
505 if (dOff1 < 0) {
506 dOff1 += 1280;
507 }
508 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
509 dOff0 -= 256;
510 if (dOff0 < 0) {
511 dOff0 += 1280;
512 }
513 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
514 dOff1 -= 256;
515 if (dOff1 < 0) {
516 dOff1 += 1280;
517 }
518 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
519 dOff0 -= 256;
520 if (dOff0 < 0) {
521 dOff0 += 1280;
522 }
523 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
524 dOff1 -= 256;
525 if (dOff1 < 0) {
526 dOff1 += 1280;
527 }
528 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
529 dOff0 -= 256;
530 if (dOff0 < 0) {
531 dOff0 += 1280;
532 }
533 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
534 dOff1 -= 256;
535 if (dOff1 < 0) {
536 dOff1 += 1280;
537 }
538 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
539 dOff0 -= 256;
540 if (dOff0 < 0) {
541 dOff0 += 1280;
542 }
543 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
544 dOff1 -= 256;
545 if (dOff1 < 0) {
546 dOff1 += 1280;
547 }
548
549 dOff0++;
550 dOff1--;
551 *outbuf = CLIPTOSHORT((sum64.r.hi32 + RND_VAL) >> FBITS_OUT_QMFS);
552 outbuf += nChans;
553 }
554}
555#endif
556
557/**************************************************************************************
558 * Function: QMFSynthesis
559 *
560 * Description: 64-subband synthesis QMF (4.6.18.4.2)
561 *
562 * Inputs: 64 consecutive complex subband QMF samples, format = Q(FBITS_IN_QMFS)
563 * delay buffer of size 64*10 = 640 complex samples (1280 ints)
564 * index for delay ring buffer (range = [0, 9])
565 * number of QMF subbands to process (range = [0, 64])
566 * number of channels
567 *
568 * Outputs: 64 consecutive 16-bit PCM samples, interleaved by factor of nChans
569 * updated delay buffer
570 * updated delay index
571 *
572 * Return: none
573 *
574 * Notes: assumes MIN_GBITS_IN_QMFS guard bits in input, either from
575 * QMFAnalysis (if upsampling only) or from MapHF (if SBR on)
576 **************************************************************************************/
577void QMFSynthesis(int *inbuf, int *delay, int *delayIdx, int qmfsBands, short *outbuf, int nChans)
578{
579 int n, a0, a1, b0, b1, dOff0, dOff1, dIdx;
580 int *tBufLo, *tBufHi;
581
582 dIdx = *delayIdx;
583 tBufLo = delay + dIdx * 128 + 0;
584 tBufHi = delay + dIdx * 128 + 127;
585
586 /* reorder inputs to DCT-IV, only use first qmfsBands (complex) samples
587 * TODO - fuse with PreMultiply64 to avoid separate reordering steps
588 */
589 for (n = 0; n < qmfsBands >> 1; n++) {
590 a0 = *inbuf++;
591 b0 = *inbuf++;
592 a1 = *inbuf++;
593 b1 = *inbuf++;
594 *tBufLo++ = a0;
595 *tBufLo++ = a1;
596 *tBufHi-- = b0;
597 *tBufHi-- = b1;
598 }
599 if (qmfsBands & 0x01) {
600 a0 = *inbuf++;
601 b0 = *inbuf++;
602 *tBufLo++ = a0;
603 *tBufHi-- = b0;
604 *tBufLo++ = 0;
605 *tBufHi-- = 0;
606 n++;
607 }
608 for (; n < 32; n++) {
609 *tBufLo++ = 0;
610 *tBufHi-- = 0;
611 *tBufLo++ = 0;
612 *tBufHi-- = 0;
613 }
614
615 tBufLo = delay + dIdx * 128 + 0;
616 tBufHi = delay + dIdx * 128 + 64;
617
618 /* 2 GB in, 3 GB out */
619 PreMultiply64(tBufLo);
620 PreMultiply64(tBufHi);
621
622 /* 3 GB in, 1 GB out */
623 FFT32C(tBufLo);
624 FFT32C(tBufHi);
625
626 /* 1 GB in, 2 GB out */
627 PostMultiply64(tBufLo, 64);
628 PostMultiply64(tBufHi, 64);
629
630 /* could fuse with PostMultiply64 to avoid separate pass */
631 dOff0 = dIdx * 128;
632 dOff1 = dIdx * 128 + 64;
633 for (n = 32; n != 0; n--) {
634 a0 = (*tBufLo++);
635 a1 = (*tBufLo++);
636 b0 = (*tBufHi++);
637 b1 = -(*tBufHi++);
638
639 delay[dOff0++] = (b0 - a0);
640 delay[dOff0++] = (b1 - a1);
641 delay[dOff1++] = (b0 + a0);
642 delay[dOff1++] = (b1 + a1);
643 }
644
645 QMFSynthesisConv((int *)cTabS, delay, dIdx, outbuf, nChans);
646
647 *delayIdx = (*delayIdx == NUM_QMF_DELAY_BUFS - 1 ? 0 : *delayIdx + 1);
648}
649