summaryrefslogtreecommitdiff
path: root/audio_codec/libraac/sbrqmf.c (plain)
blob: 9e8e790c84e80a1607b90f1a19e7cde9f8a86b4f
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: sbrqmf.c,v 1.2 2005/05/19 20:45:20 jrecker Exp $
3 *
4 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file,
7 * are subject to the current version of the RealNetworks Public
8 * Source License (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the current version of the RealNetworks Community
11 * Source License (the "RCSL") available at
12 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13 * will apply. You may also obtain the license terms directly from
14 * RealNetworks. You may not use this file except in compliance with
15 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16 * to this file, the RCSL. Please see the applicable RPSL or RCSL for
17 * the rights, obligations and limitations governing use of the
18 * contents of the file.
19 *
20 * This file is part of the Helix DNA Technology. RealNetworks is the
21 * developer of the Original Code and owns the copyrights in the
22 * portions it created.
23 *
24 * This file, and the files included with this file, is distributed
25 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29 * ENJOYMENT OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * http://www.helixcommunity.org/content/tck
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point HE-AAC decoder
40 * Jon Recker (jrecker@real.com)
41 * February 2005
42 *
43 * sbrqmf.c - analysis and synthesis QMF filters for SBR
44 **************************************************************************************/
45#include "sbr.h"
46#include "assembly.h"
47#include <stdio.h>
48
49
50/* PreMultiply64() table
51 * format = Q30
52 * reordered for sequential access
53 *
54 * for (i = 0; i < 64/4; i++) {
55 * angle = (i + 0.25) * M_PI / nmdct;
56 * x = (cos(angle) + sin(angle));
57 * x = sin(angle);
58 *
59 * angle = (nmdct/2 - 1 - i + 0.25) * M_PI / nmdct;
60 * x = (cos(angle) + sin(angle));
61 * x = sin(angle);
62 * }
63 */
64static const int cos4sin4tab64[64] = {
65 0x40c7d2bd, 0x00c90e90, 0x424ff28f, 0x3ff4e5e0, 0x43cdd89a, 0x03ecadcf, 0x454149fc, 0x3fc395f9,
66 0x46aa0d6d, 0x070de172, 0x4807eb4b, 0x3f6af2e3, 0x495aada2, 0x0a2abb59, 0x4aa22036, 0x3eeb3347,
67 0x4bde1089, 0x0d415013, 0x4d0e4de2, 0x3e44a5ef, 0x4e32a956, 0x104fb80e, 0x4f4af5d1, 0x3d77b192,
68 0x50570819, 0x135410c3, 0x5156b6d9, 0x3c84d496, 0x5249daa2, 0x164c7ddd, 0x53304df6, 0x3b6ca4c4,
69 0x5409ed4b, 0x19372a64, 0x54d69714, 0x3a2fcee8, 0x55962bc0, 0x1c1249d8, 0x56488dc5, 0x38cf1669,
70 0x56eda1a0, 0x1edc1953, 0x57854ddd, 0x374b54ce, 0x580f7b19, 0x2192e09b, 0x588c1404, 0x35a5793c,
71 0x58fb0568, 0x2434f332, 0x595c3e2a, 0x33de87de, 0x59afaf4c, 0x26c0b162, 0x59f54bee, 0x31f79948,
72 0x5a2d0957, 0x29348937, 0x5a56deec, 0x2ff1d9c7, 0x5a72c63b, 0x2b8ef77d, 0x5a80baf6, 0x2dce88aa,
73};
74
75/* PostMultiply64() table
76 * format = Q30
77 * reordered for sequential access
78 *
79 * for (i = 0; i <= (32/2); i++) {
80 * angle = i * M_PI / 64;
81 * x = (cos(angle) + sin(angle));
82 * x = sin(angle);
83 * }
84 */
85static const int cos1sin1tab64[34] = {
86 0x40000000, 0x00000000, 0x43103085, 0x0323ecbe, 0x45f704f7, 0x0645e9af, 0x48b2b335, 0x09640837,
87 0x4b418bbe, 0x0c7c5c1e, 0x4da1fab5, 0x0f8cfcbe, 0x4fd288dc, 0x1294062f, 0x51d1dc80, 0x158f9a76,
88 0x539eba45, 0x187de2a7, 0x553805f2, 0x1b5d100a, 0x569cc31b, 0x1e2b5d38, 0x57cc15bc, 0x20e70f32,
89 0x58c542c5, 0x238e7673, 0x5987b08a, 0x261feffa, 0x5a12e720, 0x2899e64a, 0x5a6690ae, 0x2afad269,
90 0x5a82799a, 0x2d413ccd,
91};
92
93/**************************************************************************************
94 * Function: PreMultiply64
95 *
96 * Description: pre-twiddle stage of 64-point DCT-IV
97 *
98 * Inputs: buffer of 64 samples
99 *
100 * Outputs: processed samples in same buffer
101 *
102 * Return: none
103 *
104 * Notes: minimum 1 GB in, 2 GB out, gains 2 int bits
105 * gbOut = gbIn + 1
106 * output is limited to sqrt(2)/2 plus GB in full GB
107 * uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
108 **************************************************************************************/
109static void PreMultiply64(int *zbuf1)
110{
111 int i, ar1, ai1, ar2, ai2, z1, z2;
112 int t, cms2, cps2a, sin2a, cps2b, sin2b;
113 int *zbuf2;
114 const int *csptr;
115
116 zbuf2 = zbuf1 + 64 - 1;
117 csptr = cos4sin4tab64;
118
119 /* whole thing should fit in registers - verify that compiler does this */
120 for (i = 64 >> 2; i != 0; i--) {
121 /* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
122 cps2a = *csptr++;
123 sin2a = *csptr++;
124 cps2b = *csptr++;
125 sin2b = *csptr++;
126
127 ar1 = *(zbuf1 + 0);
128 ai2 = *(zbuf1 + 1);
129 ai1 = *(zbuf2 + 0);
130 ar2 = *(zbuf2 - 1);
131
132 /* gain 2 ints bit from MULSHIFT32 by Q30
133 * max per-sample gain (ignoring implicit scaling) = MAX(sin(angle)+cos(angle)) = 1.414
134 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
135 * extra sign bits, and eat one in adding
136 */
137 t = MULSHIFT32(sin2a, ar1 + ai1);
138 z2 = MULSHIFT32(cps2a, ai1) - t;
139 cms2 = cps2a - 2 * sin2a;
140 z1 = MULSHIFT32(cms2, ar1) + t;
141 *zbuf1++ = z1; /* cos*ar1 + sin*ai1 */
142 *zbuf1++ = z2; /* cos*ai1 - sin*ar1 */
143
144 t = MULSHIFT32(sin2b, ar2 + ai2);
145 z2 = MULSHIFT32(cps2b, ai2) - t;
146 cms2 = cps2b - 2 * sin2b;
147 z1 = MULSHIFT32(cms2, ar2) + t;
148 *zbuf2-- = z2; /* cos*ai2 - sin*ar2 */
149 *zbuf2-- = z1; /* cos*ar2 + sin*ai2 */
150 }
151}
152
153/**************************************************************************************
154 * Function: PostMultiply64
155 *
156 * Description: post-twiddle stage of 64-point type-IV DCT
157 *
158 * Inputs: buffer of 64 samples
159 * number of output samples to calculate
160 *
161 * Outputs: processed samples in same buffer
162 *
163 * Return: none
164 *
165 * Notes: minimum 1 GB in, 2 GB out, gains 2 int bits
166 * gbOut = gbIn + 1
167 * output is limited to sqrt(2)/2 plus GB in full GB
168 * nSampsOut is rounded up to next multiple of 4, since we calculate
169 * 4 samples per loop
170 **************************************************************************************/
171static void PostMultiply64(int *fft1, int nSampsOut)
172{
173 int i, ar1, ai1, ar2, ai2;
174 int t, cms2, cps2, sin2;
175 int *fft2;
176 const int *csptr;
177
178 csptr = cos1sin1tab64;
179 fft2 = fft1 + 64 - 1;
180
181 /* load coeffs for first pass
182 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
183 */
184 cps2 = *csptr++;
185 sin2 = *csptr++;
186 cms2 = cps2 - 2 * sin2;
187
188 for (i = (nSampsOut + 3) >> 2; i != 0; i--) {
189 ar1 = *(fft1 + 0);
190 ai1 = *(fft1 + 1);
191 ar2 = *(fft2 - 1);
192 ai2 = *(fft2 + 0);
193
194 /* gain 2 int bits (multiplying by Q30), max gain = sqrt(2) */
195 t = MULSHIFT32(sin2, ar1 + ai1);
196 *fft2-- = t - MULSHIFT32(cps2, ai1);
197 *fft1++ = t + MULSHIFT32(cms2, ar1);
198
199 cps2 = *csptr++;
200 sin2 = *csptr++;
201
202 ai2 = -ai2;
203 t = MULSHIFT32(sin2, ar2 + ai2);
204 *fft2-- = t - MULSHIFT32(cps2, ai2);
205 cms2 = cps2 - 2 * sin2;
206 *fft1++ = t + MULSHIFT32(cms2, ar2);
207 }
208}
209
210/**************************************************************************************
211 * Function: QMFAnalysisConv
212 *
213 * Description: convolution kernel for analysis QMF
214 *
215 * Inputs: pointer to coefficient table, reordered for sequential access
216 * delay buffer of size 32*10 = 320 real-valued PCM samples
217 * index for delay ring buffer (range = [0, 9])
218 *
219 * Outputs: 64 consecutive 32-bit samples
220 *
221 * Return: none
222 *
223 * Notes: this is carefully written to be efficient on ARM
224 * use the assembly code version in sbrqmfak.s when building for ARM!
225 **************************************************************************************/
226
227#ifdef __cplusplus
228extern "C"
229#endif
230void QMFAnalysisConv(int *cTab, int *delay, int dIdx, int *uBuf)
231{
232 int k, dOff;
233 int *cPtr0, *cPtr1;
234 U64 u64lo, u64hi;
235
236 dOff = dIdx * 32 + 31;
237 cPtr0 = cTab;
238 cPtr1 = cTab + 33 * 5 - 1;
239
240 /* special first pass since we need to flip sign to create cTab[384], cTab[512] */
241 u64lo.w64 = 0;
242 u64hi.w64 = 0;
243 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
244 dOff -= 32;
245 if (dOff < 0) {
246 dOff += 320;
247 }
248 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
249 dOff -= 32;
250 if (dOff < 0) {
251 dOff += 320;
252 }
253 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
254 dOff -= 32;
255 if (dOff < 0) {
256 dOff += 320;
257 }
258 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
259 dOff -= 32;
260 if (dOff < 0) {
261 dOff += 320;
262 }
263 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
264 dOff -= 32;
265 if (dOff < 0) {
266 dOff += 320;
267 }
268 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
269 dOff -= 32;
270 if (dOff < 0) {
271 dOff += 320;
272 }
273 u64lo.w64 = MADD64(u64lo.w64, -(*cPtr1--), delay[dOff]);
274 dOff -= 32;
275 if (dOff < 0) {
276 dOff += 320;
277 }
278 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
279 dOff -= 32;
280 if (dOff < 0) {
281 dOff += 320;
282 }
283 u64lo.w64 = MADD64(u64lo.w64, -(*cPtr1--), delay[dOff]);
284 dOff -= 32;
285 if (dOff < 0) {
286 dOff += 320;
287 }
288 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
289 dOff -= 32;
290 if (dOff < 0) {
291 dOff += 320;
292 }
293
294 uBuf[0] = u64lo.r.hi32;
295 uBuf[32] = u64hi.r.hi32;
296 uBuf++;
297 dOff--;
298
299 /* max gain for any sample in uBuf, after scaling by cTab, ~= 0.99
300 * so we can just sum the uBuf values with no overflow problems
301 */
302 for (k = 1; k <= 31; k++) {
303 u64lo.w64 = 0;
304 u64hi.w64 = 0;
305 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
306 dOff -= 32;
307 if (dOff < 0) {
308 dOff += 320;
309 }
310 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
311 dOff -= 32;
312 if (dOff < 0) {
313 dOff += 320;
314 }
315 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
316 dOff -= 32;
317 if (dOff < 0) {
318 dOff += 320;
319 }
320 u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]);
321 dOff -= 32;
322 if (dOff < 0) {
323 dOff += 320;
324 }
325 u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]);
326 dOff -= 32;
327 if (dOff < 0) {
328 dOff += 320;
329 }
330 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
331 dOff -= 32;
332 if (dOff < 0) {
333 dOff += 320;
334 }
335 u64lo.w64 = MADD64(u64lo.w64, *cPtr1--, delay[dOff]);
336 dOff -= 32;
337 if (dOff < 0) {
338 dOff += 320;
339 }
340 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
341 dOff -= 32;
342 if (dOff < 0) {
343 dOff += 320;
344 }
345 u64lo.w64 = MADD64(u64lo.w64, *cPtr1--, delay[dOff]);
346 dOff -= 32;
347 if (dOff < 0) {
348 dOff += 320;
349 }
350 u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]);
351 dOff -= 32;
352 if (dOff < 0) {
353 dOff += 320;
354 }
355
356 uBuf[0] = u64lo.r.hi32;
357 uBuf[32] = u64hi.r.hi32;
358 uBuf++;
359 dOff--;
360 }
361}
362
363/**************************************************************************************
364 * Function: QMFAnalysis
365 *
366 * Description: 32-subband analysis QMF (4.6.18.4.1)
367 *
368 * Inputs: 32 consecutive samples of decoded 32-bit PCM, format = Q(fBitsIn)
369 * delay buffer of size 32*10 = 320 PCM samples
370 * number of fraction bits in input PCM
371 * index for delay ring buffer (range = [0, 9])
372 * number of subbands to calculate (range = [0, 32])
373 *
374 * Outputs: qmfaBands complex subband samples, format = Q(FBITS_OUT_QMFA)
375 * updated delay buffer
376 * updated delay index
377 *
378 * Return: guard bit mask
379 *
380 * Notes: output stored as RE{X0}, IM{X0}, RE{X1}, IM{X1}, ... RE{X31}, IM{X31}
381 * output stored in int buffer of size 64*2 = 128
382 * (zero-filled from XBuf[2*qmfaBands] to XBuf[127])
383 **************************************************************************************/
384int QMFAnalysis(int *inbuf, int *delay, int *XBuf, int fBitsIn, int *delayIdx, int qmfaBands)
385{
386 int n, y, shift, gbMask;
387 int *delayPtr, *uBuf, *tBuf;
388
389 /* use XBuf[128] as temp buffer for reordering */
390 uBuf = XBuf; /* first 64 samples */
391 tBuf = XBuf + 64; /* second 64 samples */
392
393 /* overwrite oldest PCM with new PCM
394 * delay[n] has 1 GB after shifting (either << or >>)
395 */
396 delayPtr = delay + (*delayIdx * 32);
397 if (fBitsIn > FBITS_IN_QMFA) {
398 shift = MIN(fBitsIn - FBITS_IN_QMFA, 31);
399 for (n = 32; n != 0; n--) {
400 y = (*inbuf) >> shift;
401 inbuf++;
402 *delayPtr++ = y;
403 }
404 } else {
405 shift = MIN(FBITS_IN_QMFA - fBitsIn, 30);
406 for (n = 32; n != 0; n--) {
407 y = *inbuf++;
408 CLIP_2N_SHIFT30(y, shift);
409 *delayPtr++ = y;
410 }
411 }
412
413 QMFAnalysisConv((int *)cTabA, delay, *delayIdx, uBuf);
414
415 /* uBuf has at least 2 GB right now (1 from clipping to Q(FBITS_IN_QMFA), one from
416 * the scaling by cTab (MULSHIFT32(*delayPtr--, *cPtr++), with net gain of < 1.0)
417 * TODO - fuse with QMFAnalysisConv to avoid separate reordering
418 */
419 tBuf[2 * 0 + 0] = uBuf[0];
420 tBuf[2 * 0 + 1] = uBuf[1];
421 for (n = 1; n < 31; n++) {
422 tBuf[2 * n + 0] = -uBuf[64 - n];
423 tBuf[2 * n + 1] = uBuf[n + 1];
424 }
425 tBuf[2 * 31 + 1] = uBuf[32];
426 tBuf[2 * 31 + 0] = -uBuf[33];
427
428 /* fast in-place DCT-IV - only need 2*qmfaBands output samples */
429 PreMultiply64(tBuf); /* 2 GB in, 3 GB out */
430 FFT32C(tBuf); /* 3 GB in, 1 GB out */
431 PostMultiply64(tBuf, qmfaBands * 2); /* 1 GB in, 2 GB out */
432
433 /* TODO - roll into PostMultiply (if enough registers) */
434 gbMask = 0;
435 for (n = 0; n < qmfaBands; n++) {
436 XBuf[2 * n + 0] = tBuf[ n + 0]; /* implicit scaling of 2 in our output Q format */
437 gbMask |= FASTABS(XBuf[2 * n + 0]);
438 XBuf[2 * n + 1] = -tBuf[63 - n];
439 gbMask |= FASTABS(XBuf[2 * n + 1]);
440 }
441
442 /* fill top section with zeros for HF generation */
443 for (; n < 64; n++) {
444 XBuf[2 * n + 0] = 0;
445 XBuf[2 * n + 1] = 0;
446 }
447
448 *delayIdx = (*delayIdx == NUM_QMF_DELAY_BUFS - 1 ? 0 : *delayIdx + 1);
449
450 /* minimum of 2 GB in output */
451 return gbMask;
452}
453
454/* lose FBITS_LOST_DCT4_64 in DCT4, gain 6 for implicit scaling by 1/64, lose 1 for cTab multiply (Q31) */
455#define FBITS_OUT_QMFS (FBITS_IN_QMFS - FBITS_LOST_DCT4_64 + 6 - 1)
456#define RND_VAL (1 << (FBITS_OUT_QMFS-1))
457
458/**************************************************************************************
459 * Function: QMFSynthesisConv
460 *
461 * Description: final convolution kernel for synthesis QMF
462 *
463 * Inputs: pointer to coefficient table, reordered for sequential access
464 * delay buffer of size 64*10 = 640 complex samples (1280 ints)
465 * index for delay ring buffer (range = [0, 9])
466 * number of QMF subbands to process (range = [0, 64])
467 * number of channels
468 *
469 * Outputs: 64 consecutive 16-bit PCM samples, interleaved by factor of nChans
470 *
471 * Return: none
472 *
473 * Notes: this is carefully written to be efficient on ARM
474 * use the assembly code version in sbrqmfsk.s when building for ARM!
475 **************************************************************************************/
476
477#ifdef __cplusplus
478extern "C"
479#endif
480void QMFSynthesisConv(int *cPtr, int *delay, int dIdx, short *outbuf, int nChans)
481{
482 int k, dOff0, dOff1;
483 U64 sum64;
484
485 dOff0 = (dIdx) * 128;
486 dOff1 = dOff0 - 1;
487 if (dOff1 < 0) {
488 dOff1 += 1280;
489 }
490
491 /* scaling note: total gain of coefs (cPtr[0]-cPtr[9] for any k) is < 2.0, so 1 GB in delay values is adequate */
492 for (k = 0; k <= 63; k++) {
493 sum64.w64 = 0;
494 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
495 dOff0 -= 256;
496 if (dOff0 < 0) {
497 dOff0 += 1280;
498 }
499 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
500 dOff1 -= 256;
501 if (dOff1 < 0) {
502 dOff1 += 1280;
503 }
504 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
505 dOff0 -= 256;
506 if (dOff0 < 0) {
507 dOff0 += 1280;
508 }
509 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
510 dOff1 -= 256;
511 if (dOff1 < 0) {
512 dOff1 += 1280;
513 }
514 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
515 dOff0 -= 256;
516 if (dOff0 < 0) {
517 dOff0 += 1280;
518 }
519 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
520 dOff1 -= 256;
521 if (dOff1 < 0) {
522 dOff1 += 1280;
523 }
524 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
525 dOff0 -= 256;
526 if (dOff0 < 0) {
527 dOff0 += 1280;
528 }
529 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
530 dOff1 -= 256;
531 if (dOff1 < 0) {
532 dOff1 += 1280;
533 }
534 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]);
535 dOff0 -= 256;
536 if (dOff0 < 0) {
537 dOff0 += 1280;
538 }
539 sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]);
540 dOff1 -= 256;
541 if (dOff1 < 0) {
542 dOff1 += 1280;
543 }
544
545 dOff0++;
546 dOff1--;
547 *outbuf = CLIPTOSHORT((sum64.r.hi32 + RND_VAL) >> FBITS_OUT_QMFS);
548 outbuf += nChans;
549 }
550}
551
552/**************************************************************************************
553 * Function: QMFSynthesis
554 *
555 * Description: 64-subband synthesis QMF (4.6.18.4.2)
556 *
557 * Inputs: 64 consecutive complex subband QMF samples, format = Q(FBITS_IN_QMFS)
558 * delay buffer of size 64*10 = 640 complex samples (1280 ints)
559 * index for delay ring buffer (range = [0, 9])
560 * number of QMF subbands to process (range = [0, 64])
561 * number of channels
562 *
563 * Outputs: 64 consecutive 16-bit PCM samples, interleaved by factor of nChans
564 * updated delay buffer
565 * updated delay index
566 *
567 * Return: none
568 *
569 * Notes: assumes MIN_GBITS_IN_QMFS guard bits in input, either from
570 * QMFAnalysis (if upsampling only) or from MapHF (if SBR on)
571 **************************************************************************************/
572void QMFSynthesis(int *inbuf, int *delay, int *delayIdx, int qmfsBands, short *outbuf, int nChans)
573{
574 int n, a0, a1, b0, b1, dOff0, dOff1, dIdx;
575 int *tBufLo, *tBufHi;
576
577 dIdx = *delayIdx;
578 tBufLo = delay + dIdx * 128 + 0;
579 tBufHi = delay + dIdx * 128 + 127;
580
581 /* reorder inputs to DCT-IV, only use first qmfsBands (complex) samples
582 * TODO - fuse with PreMultiply64 to avoid separate reordering steps
583 */
584 for (n = 0; n < qmfsBands >> 1; n++) {
585 a0 = *inbuf++;
586 b0 = *inbuf++;
587 a1 = *inbuf++;
588 b1 = *inbuf++;
589 *tBufLo++ = a0;
590 *tBufLo++ = a1;
591 *tBufHi-- = b0;
592 *tBufHi-- = b1;
593 }
594 if (qmfsBands & 0x01) {
595 a0 = *inbuf++;
596 b0 = *inbuf++;
597 *tBufLo++ = a0;
598 *tBufHi-- = b0;
599 *tBufLo++ = 0;
600 *tBufHi-- = 0;
601 n++;
602 }
603 for (; n < 32; n++) {
604 *tBufLo++ = 0;
605 *tBufHi-- = 0;
606 *tBufLo++ = 0;
607 *tBufHi-- = 0;
608 }
609
610 tBufLo = delay + dIdx * 128 + 0;
611 tBufHi = delay + dIdx * 128 + 64;
612
613 /* 2 GB in, 3 GB out */
614 PreMultiply64(tBufLo);
615 PreMultiply64(tBufHi);
616
617 /* 3 GB in, 1 GB out */
618 FFT32C(tBufLo);
619 FFT32C(tBufHi);
620
621 /* 1 GB in, 2 GB out */
622 PostMultiply64(tBufLo, 64);
623 PostMultiply64(tBufHi, 64);
624
625 /* could fuse with PostMultiply64 to avoid separate pass */
626 dOff0 = dIdx * 128;
627 dOff1 = dIdx * 128 + 64;
628 for (n = 32; n != 0; n--) {
629 a0 = (*tBufLo++);
630 a1 = (*tBufLo++);
631 b0 = (*tBufHi++);
632 b1 = -(*tBufHi++);
633
634 delay[dOff0++] = (b0 - a0);
635 delay[dOff0++] = (b1 - a1);
636 delay[dOff1++] = (b0 + a0);
637 delay[dOff1++] = (b1 + a1);
638 }
639
640 QMFSynthesisConv((int *)cTabS, delay, dIdx, outbuf, nChans);
641
642 *delayIdx = (*delayIdx == NUM_QMF_DELAY_BUFS - 1 ? 0 : *delayIdx + 1);
643}
644