platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: sbrhfgen.c,v 1.2 2005/05/19 20:45:20 jrecker Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * sbrhfgen.c - high frequency generation for SBR
44  **************************************************************************************/
45
46 #include "sbr.h"
47 #include "assembly.h"
48
49 #define FBITS_LPCOEFS   29  /* Q29 for range of (-4, 4) */
50 #define MAG_16          (16 * (1 << (32 - (2*(32-FBITS_LPCOEFS)))))     /* i.e. 16 in Q26 format */
51 #define RELAX_COEF      0x7ffff79c  /* 1.0 / (1.0 + 1e-6), Q31 */
52
53 /* newBWTab[prev invfMode][curr invfMode], format = Q31 (table 4.158)
54  * sample file which uses all of these: al_sbr_sr_64_2_fsaac32.aac
55  */
56 static const int newBWTab[4][4] = {
57     {0x00000000, 0x4ccccccd, 0x73333333, 0x7d70a3d7},
58     {0x4ccccccd, 0x60000000, 0x73333333, 0x7d70a3d7},
59     {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
60     {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
61 };
62
63 /**************************************************************************************
64  * Function:    CVKernel1
65  *
66  * Description: kernel of covariance matrix calculation for p01, p11, p12, p22
67  *
68  * Inputs:      buffer of low-freq samples, starting at time index = 0,
69  *                freq index = patch subband
70  *
71  * Outputs:     64-bit accumulators for p01re, p01im, p12re, p12im, p11re, p22re
72  *                stored in accBuf
73  *
74  * Return:      none
75  *
76  * Notes:       this is carefully written to be efficient on ARM
77  *              use the assembly code version in sbrcov.s when building for ARM!
78  **************************************************************************************/
79
80 #ifdef __cplusplus
81 extern "C"
82 #endif
83
84 void CVKernel1(int *XBuf, int *accBuf)
85 {
86     U64 p01re, p01im, p12re, p12im, p11re, p22re;
87     int n, x0re, x0im, x1re, x1im;
88
89     x0re = XBuf[0];
90     x0im = XBuf[1];
91     XBuf += (2 * 64);
92     x1re = XBuf[0];
93     x1im = XBuf[1];
94     XBuf += (2 * 64);
95
96     p01re.w64 = p01im.w64 = 0;
97     p12re.w64 = p12im.w64 = 0;
98     p11re.w64 = 0;
99     p22re.w64 = 0;
100
101     p12re.w64 = MADD64(p12re.w64,  x1re, x0re);
102     p12re.w64 = MADD64(p12re.w64,  x1im, x0im);
103     p12im.w64 = MADD64(p12im.w64,  x0re, x1im);
104     p12im.w64 = MADD64(p12im.w64, -x0im, x1re);
105     p22re.w64 = MADD64(p22re.w64,  x0re, x0re);
106     p22re.w64 = MADD64(p22re.w64,  x0im, x0im);
107     for (n = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6); n != 0; n--) {
108         /* 4 input, 3*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
109         x0re = x1re;
110         x0im = x1im;
111         x1re = XBuf[0];
112         x1im = XBuf[1];
113
114         p01re.w64 = MADD64(p01re.w64,  x1re, x0re);
115         p01re.w64 = MADD64(p01re.w64,  x1im, x0im);
116         p01im.w64 = MADD64(p01im.w64,  x0re, x1im);
117         p01im.w64 = MADD64(p01im.w64, -x0im, x1re);
118         p11re.w64 = MADD64(p11re.w64,  x0re, x0re);
119         p11re.w64 = MADD64(p11re.w64,  x0im, x0im);
120
121         XBuf += (2 * 64);
122     }
123     /* these can be derived by slight changes to account for boundary conditions */
124     p12re.w64 += p01re.w64;
125     p12re.w64 = MADD64(p12re.w64, x1re, -x0re);
126     p12re.w64 = MADD64(p12re.w64, x1im, -x0im);
127     p12im.w64 += p01im.w64;
128     p12im.w64 = MADD64(p12im.w64, x0re, -x1im);
129     p12im.w64 = MADD64(p12im.w64, x0im,  x1re);
130     p22re.w64 += p11re.w64;
131     p22re.w64 = MADD64(p22re.w64, x0re, -x0re);
132     p22re.w64 = MADD64(p22re.w64, x0im, -x0im);
133
134     accBuf[0]  = p01re.r.lo32;
135     accBuf[1]  = p01re.r.hi32;
136     accBuf[2]  = p01im.r.lo32;
137     accBuf[3]  = p01im.r.hi32;
138     accBuf[4]  = p11re.r.lo32;
139     accBuf[5]  = p11re.r.hi32;
140     accBuf[6]  = p12re.r.lo32;
141     accBuf[7]  = p12re.r.hi32;
142     accBuf[8]  = p12im.r.lo32;
143     accBuf[9]  = p12im.r.hi32;
144     accBuf[10] = p22re.r.lo32;
145     accBuf[11] = p22re.r.hi32;
146 }
147
148 /**************************************************************************************
149  * Function:    CalcCovariance1
150  *
151  * Description: calculate covariance matrix for p01, p12, p11, p22 (4.6.18.6.2)
152  *
153  * Inputs:      buffer of low-freq samples, starting at time index 0,
154  *                freq index = patch subband
155  *
156  * Outputs:     complex covariance elements p01re, p01im, p12re, p12im, p11re, p22re
157  *                (p11im = p22im = 0)
158  *              format = integer (Q0) * 2^N, with scalefactor N >= 0
159  *
160  * Return:      scalefactor N
161  *
162  * Notes:       outputs are normalized to have 1 GB (sign in at least top 2 bits)
163  **************************************************************************************/
164 static int CalcCovariance1(int *XBuf, int *p01reN, int *p01imN, int *p12reN, int *p12imN, int *p11reN, int *p22reN)
165 {
166     int accBuf[2 * 6];
167     int n, z, s, loShift, hiShift, gbMask;
168     U64 p01re, p01im, p12re, p12im, p11re, p22re;
169
170     CVKernel1(XBuf, accBuf);
171     p01re.r.lo32 = accBuf[0];
172     p01re.r.hi32 = accBuf[1];
173     p01im.r.lo32 = accBuf[2];
174     p01im.r.hi32 = accBuf[3];
175     p11re.r.lo32 = accBuf[4];
176     p11re.r.hi32 = accBuf[5];
177     p12re.r.lo32 = accBuf[6];
178     p12re.r.hi32 = accBuf[7];
179     p12im.r.lo32 = accBuf[8];
180     p12im.r.hi32 = accBuf[9];
181     p22re.r.lo32 = accBuf[10];
182     p22re.r.hi32 = accBuf[11];
183
184     /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
185      * want to scale them down to integers (32-bit signed, Q0)
186      *   with scale factor of 2^n, n >= 0
187      * leave 2 GB's for calculating determinant, so take top 30 non-zero bits
188      */
189     gbMask  = ((p01re.r.hi32) ^(p01re.r.hi32 >> 31)) | ((p01im.r.hi32) ^(p01im.r.hi32 >> 31));
190     gbMask |= ((p12re.r.hi32) ^(p12re.r.hi32 >> 31)) | ((p12im.r.hi32) ^(p12im.r.hi32 >> 31));
191     gbMask |= ((p11re.r.hi32) ^(p11re.r.hi32 >> 31)) | ((p22re.r.hi32) ^(p22re.r.hi32 >> 31));
192     if (gbMask == 0) {
193         s = p01re.r.hi32 >> 31;
194         gbMask  = (p01re.r.lo32 ^ s) - s;
195         s = p01im.r.hi32 >> 31;
196         gbMask |= (p01im.r.lo32 ^ s) - s;
197         s = p12re.r.hi32 >> 31;
198         gbMask |= (p12re.r.lo32 ^ s) - s;
199         s = p12im.r.hi32 >> 31;
200         gbMask |= (p12im.r.lo32 ^ s) - s;
201         s = p11re.r.hi32 >> 31;
202         gbMask |= (p11re.r.lo32 ^ s) - s;
203         s = p22re.r.hi32 >> 31;
204         gbMask |= (p22re.r.lo32 ^ s) - s;
205         z = 32 + CLZ(gbMask);
206     } else {
207         gbMask  = FASTABS(p01re.r.hi32) | FASTABS(p01im.r.hi32);
208         gbMask |= FASTABS(p12re.r.hi32) | FASTABS(p12im.r.hi32);
209         gbMask |= FASTABS(p11re.r.hi32) | FASTABS(p22re.r.hi32);
210         z = CLZ(gbMask);
211     }
212
213     n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */
214     if (n <= 30) {
215         loShift = (30 - n);
216         *p01reN = p01re.r.lo32 << loShift;
217         *p01imN = p01im.r.lo32 << loShift;
218         *p12reN = p12re.r.lo32 << loShift;
219         *p12imN = p12im.r.lo32 << loShift;
220         *p11reN = p11re.r.lo32 << loShift;
221         *p22reN = p22re.r.lo32 << loShift;
222         return -(loShift + 2 * FBITS_OUT_QMFA);
223     } else if (n < 32 + 30) {
224         loShift = (n - 30);
225         hiShift = 32 - loShift;
226         *p01reN = (p01re.r.hi32 << hiShift) | (p01re.r.lo32 >> loShift);
227         *p01imN = (p01im.r.hi32 << hiShift) | (p01im.r.lo32 >> loShift);
228         *p12reN = (p12re.r.hi32 << hiShift) | (p12re.r.lo32 >> loShift);
229         *p12imN = (p12im.r.hi32 << hiShift) | (p12im.r.lo32 >> loShift);
230         *p11reN = (p11re.r.hi32 << hiShift) | (p11re.r.lo32 >> loShift);
231         *p22reN = (p22re.r.hi32 << hiShift) | (p22re.r.lo32 >> loShift);
232         return (loShift - 2 * FBITS_OUT_QMFA);
233     } else {
234         hiShift = n - (32 + 30);
235         *p01reN = p01re.r.hi32 >> hiShift;
236         *p01imN = p01im.r.hi32 >> hiShift;
237         *p12reN = p12re.r.hi32 >> hiShift;
238         *p12imN = p12im.r.hi32 >> hiShift;
239         *p11reN = p11re.r.hi32 >> hiShift;
240         *p22reN = p22re.r.hi32 >> hiShift;
241         return (32 - 2 * FBITS_OUT_QMFA - hiShift);
242     }
243
244     return 0;
245 }
246
247 /**************************************************************************************
248  * Function:    CVKernel2
249  *
250  * Description: kernel of covariance matrix calculation for p02
251  *
252  * Inputs:      buffer of low-freq samples, starting at time index = 0,
253  *                freq index = patch subband
254  *
255  * Outputs:     64-bit accumulators for p02re, p02im stored in accBuf
256  *
257  * Return:      none
258  *
259  * Notes:       this is carefully written to be efficient on ARM
260  *              use the assembly code version in sbrcov.s when building for ARM!
261  **************************************************************************************/
262
263 #ifdef __cplusplus
264 extern "C"
265 #endif
266
267 void CVKernel2(int *XBuf, int *accBuf)
268 {
269     U64 p02re, p02im;
270     int n, x0re, x0im, x1re, x1im, x2re, x2im;
271
272     p02re.w64 = p02im.w64 = 0;
273
274     x0re = XBuf[0];
275     x0im = XBuf[1];
276     XBuf += (2 * 64);
277     x1re = XBuf[0];
278     x1im = XBuf[1];
279     XBuf += (2 * 64);
280
281     for (n = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6); n != 0; n--) {
282         /* 6 input, 2*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
283         x2re = XBuf[0];
284         x2im = XBuf[1];
285
286         p02re.w64 = MADD64(p02re.w64,  x2re, x0re);
287         p02re.w64 = MADD64(p02re.w64,  x2im, x0im);
288         p02im.w64 = MADD64(p02im.w64,  x0re, x2im);
289         p02im.w64 = MADD64(p02im.w64, -x0im, x2re);
290
291         x0re = x1re;
292         x0im = x1im;
293         x1re = x2re;
294         x1im = x2im;
295         XBuf += (2 * 64);
296     }
297
298     accBuf[0] = p02re.r.lo32;
299     accBuf[1] = p02re.r.hi32;
300     accBuf[2] = p02im.r.lo32;
301     accBuf[3] = p02im.r.hi32;
302 }
303
304 /**************************************************************************************
305  * Function:    CalcCovariance2
306  *
307  * Description: calculate covariance matrix for p02 (4.6.18.6.2)
308  *
309  * Inputs:      buffer of low-freq samples, starting at time index = 0,
310  *                freq index = patch subband
311  *
312  * Outputs:     complex covariance element p02re, p02im
313  *              format = integer (Q0) * 2^N, with scalefactor N >= 0
314  *
315  * Return:      scalefactor N
316  *
317  * Notes:       outputs are normalized to have 1 GB (sign in at least top 2 bits)
318  **************************************************************************************/
319 static int CalcCovariance2(int *XBuf, int *p02reN, int *p02imN)
320 {
321     U64 p02re, p02im;
322     int n, z, s, loShift, hiShift, gbMask;
323     int accBuf[2 * 2];
324
325     CVKernel2(XBuf, accBuf);
326     p02re.r.lo32 = accBuf[0];
327     p02re.r.hi32 = accBuf[1];
328     p02im.r.lo32 = accBuf[2];
329     p02im.r.hi32 = accBuf[3];
330
331     /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
332      * want to scale them down to integers (32-bit signed, Q0)
333      *   with scale factor of 2^n, n >= 0
334      * leave 1 GB for calculating determinant, so take top 30 non-zero bits
335      */
336     gbMask  = ((p02re.r.hi32) ^(p02re.r.hi32 >> 31)) | ((p02im.r.hi32) ^(p02im.r.hi32 >> 31));
337     if (gbMask == 0) {
338         s = p02re.r.hi32 >> 31;
339         gbMask  = (p02re.r.lo32 ^ s) - s;
340         s = p02im.r.hi32 >> 31;
341         gbMask |= (p02im.r.lo32 ^ s) - s;
342         z = 32 + CLZ(gbMask);
343     } else {
344         gbMask  = FASTABS(p02re.r.hi32) | FASTABS(p02im.r.hi32);
345         z = CLZ(gbMask);
346     }
347     n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */
348
349     if (n <= 30) {
350         loShift = (30 - n);
351         *p02reN = p02re.r.lo32 << loShift;
352         *p02imN = p02im.r.lo32 << loShift;
353         return -(loShift + 2 * FBITS_OUT_QMFA);
354     } else if (n < 32 + 30) {
355         loShift = (n - 30);
356         hiShift = 32 - loShift;
357         *p02reN = (p02re.r.hi32 << hiShift) | (p02re.r.lo32 >> loShift);
358         *p02imN = (p02im.r.hi32 << hiShift) | (p02im.r.lo32 >> loShift);
359         return (loShift - 2 * FBITS_OUT_QMFA);
360     } else {
361         hiShift = n - (32 + 30);
362         *p02reN = p02re.r.hi32 >> hiShift;
363         *p02imN = p02im.r.hi32 >> hiShift;
364         return (32 - 2 * FBITS_OUT_QMFA - hiShift);
365     }
366
367     return 0;
368 }
369
370 /**************************************************************************************
371  * Function:    CalcLPCoefs
372  *
373  * Description: calculate linear prediction coefficients for one subband (4.6.18.6.2)
374  *
375  * Inputs:      buffer of low-freq samples, starting at time index = 0,
376  *                freq index = patch subband
377  *              number of guard bits in input sample buffer
378  *
379  * Outputs:     complex LP coefficients a0re, a0im, a1re, a1im, format = Q29
380  *
381  * Return:      none
382  *
383  * Notes:       output coefficients (a0re, a0im, a1re, a1im) clipped to range (-4, 4)
384  *              if the comples coefficients have magnitude >= 4.0, they are all
385  *                set to 0 (see spec)
386  **************************************************************************************/
387 static void CalcLPCoefs(int *XBuf, int *a0re, int *a0im, int *a1re, int *a1im, int gb)
388 {
389     int zFlag, n1, n2, nd, d, dInv, tre, tim;
390     int p01re, p01im, p02re, p02im, p12re, p12im, p11re, p22re;
391
392     /* pre-scale to avoid overflow - probably never happens in practice (see QMFA)
393      *   max bit growth per accumulator = 38*2 = 76 mul-adds (X * X)
394      *   using 64-bit MADD, so if X has n guard bits, X*X has 2n+1 guard bits
395      *   gain 1 extra sign bit per multiply, so ensure ceil(log2(76/2) / 2) = 3 guard bits on inputs
396      */
397     if (gb < 3) {
398         nd = 3 - gb;
399         for (n1 = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) {
400             XBuf[0] >>= nd;
401             XBuf[1] >>= nd;
402             XBuf += (2 * 64);
403         }
404         XBuf -= (2 * 64 * (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2));
405     }
406
407     /* calculate covariance elements */
408     n1 = CalcCovariance1(XBuf, &p01re, &p01im, &p12re, &p12im, &p11re, &p22re);
409     n2 = CalcCovariance2(XBuf, &p02re, &p02im);
410
411     /* normalize everything to larger power of 2 scalefactor, call it n1 */
412     if (n1 < n2) {
413         nd = MIN(n2 - n1, 31);
414         p01re >>= nd;
415         p01im >>= nd;
416         p12re >>= nd;
417         p12im >>= nd;
418         p11re >>= nd;
419         p22re >>= nd;
420         n1 = n2;
421     } else if (n1 > n2) {
422         nd = MIN(n1 - n2, 31);
423         p02re >>= nd;
424         p02im >>= nd;
425     }
426
427     /* calculate determinant of covariance matrix (at least 1 GB in pXX) */
428     d = MULSHIFT32(p12re, p12re) + MULSHIFT32(p12im, p12im);
429     d = MULSHIFT32(d, RELAX_COEF) << 1;
430     d = MULSHIFT32(p11re, p22re) - d;
431     ASSERT(d >= 0); /* should never be < 0 */
432
433     zFlag = 0;
434     *a0re = *a0im = 0;
435     *a1re = *a1im = 0;
436     if (d > 0) {
437         /* input =   Q31  d    = Q(-2*n1 - 32 + nd) = Q31 * 2^(31 + 2*n1 + 32 - nd)
438          * inverse = Q29  dInv = Q29 * 2^(-31 - 2*n1 - 32 + nd) = Q(29 + 31 + 2*n1 + 32 - nd)
439          *
440          * numerator has same Q format as d, since it's sum of normalized squares
441          * so num * inverse = Q(-2*n1 - 32) * Q(29 + 31 + 2*n1 + 32 - nd)
442          *                  = Q(29 + 31 - nd), drop low 32 in MULSHIFT32
443          *                  = Q(29 + 31 - 32 - nd) = Q(28 - nd)
444          */
445         nd = CLZ(d) - 1;
446         d <<= nd;
447         dInv = InvRNormalized(d);
448
449         /* 1 GB in pXX */
450         tre = MULSHIFT32(p01re, p12re) - MULSHIFT32(p01im, p12im) - MULSHIFT32(p02re, p11re);
451         tre = MULSHIFT32(tre, dInv);
452         tim = MULSHIFT32(p01re, p12im) + MULSHIFT32(p01im, p12re) - MULSHIFT32(p02im, p11re);
453         tim = MULSHIFT32(tim, dInv);
454
455         /* if d is extremely small, just set coefs to 0 (would have poor precision anyway) */
456         if (nd > 28 || (FASTABS(tre) >> (28 - nd)) >= 4 || (FASTABS(tim) >> (28 - nd)) >= 4) {
457             zFlag = 1;
458         } else {
459             *a1re = tre << (FBITS_LPCOEFS - 28 + nd);   /* i.e. convert Q(28 - nd) to Q(29) */
460             *a1im = tim << (FBITS_LPCOEFS - 28 + nd);
461         }
462     }
463
464     if (p11re) {
465         /* input =   Q31  p11re = Q(-n1 + nd) = Q31 * 2^(31 + n1 - nd)
466          * inverse = Q29  dInv  = Q29 * 2^(-31 - n1 + nd) = Q(29 + 31 + n1 - nd)
467          *
468          * numerator is Q(-n1 - 3)
469          * so num * inverse = Q(-n1 - 3) * Q(29 + 31 + n1 - nd)
470          *                  = Q(29 + 31 - 3 - nd), drop low 32 in MULSHIFT32
471          *                  = Q(29 + 31 - 3 - 32 - nd) = Q(25 - nd)
472          */
473         nd = CLZ(p11re) - 1;    /* assume positive */
474         p11re <<= nd;
475         dInv = InvRNormalized(p11re);
476
477         /* a1re, a1im = Q29, so scaled by (n1 + 3) */
478         tre = (p01re >> 3) + MULSHIFT32(p12re, *a1re) + MULSHIFT32(p12im, *a1im);
479         tre = -MULSHIFT32(tre, dInv);
480         tim = (p01im >> 3) - MULSHIFT32(p12im, *a1re) + MULSHIFT32(p12re, *a1im);
481         tim = -MULSHIFT32(tim, dInv);
482
483         if (nd > 25 || (FASTABS(tre) >> (25 - nd)) >= 4 || (FASTABS(tim) >> (25 - nd)) >= 4) {
484             zFlag = 1;
485         } else {
486             *a0re = tre << (FBITS_LPCOEFS - 25 + nd);   /* i.e. convert Q(25 - nd) to Q(29) */
487             *a0im = tim << (FBITS_LPCOEFS - 25 + nd);
488         }
489     }
490
491     /* see 4.6.18.6.2 - if magnitude of a0 or a1 >= 4 then a0 = a1 = 0
492      * i.e. a0re < 4, a0im < 4, a1re < 4, a1im < 4
493      * Q29*Q29 = Q26
494      */
495     if (zFlag || MULSHIFT32(*a0re, *a0re) + MULSHIFT32(*a0im, *a0im) >= MAG_16 || MULSHIFT32(*a1re, *a1re) + MULSHIFT32(*a1im, *a1im) >= MAG_16) {
496         *a0re = *a0im = 0;
497         *a1re = *a1im = 0;
498     }
499
500     /* no need to clip - we never changed the XBuf data, just used it to calculate a0 and a1 */
501     if (gb < 3) {
502         nd = 3 - gb;
503         for (n1 = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) {
504             XBuf[0] <<= nd;
505             XBuf[1] <<= nd;
506             XBuf += (2 * 64);
507         }
508     }
509 }
510
511 /**************************************************************************************
512  * Function:    GenerateHighFreq
513  *
514  * Description: generate high frequencies with SBR (4.6.18.6)
515  *
516  * Inputs:      initialized PSInfoSBR struct
517  *              initialized SBRGrid struct for this channel
518  *              initialized SBRFreq struct for this SCE/CPE block
519  *              initialized SBRChan struct for this channel
520  *              index of current channel (0 for SCE, 0 or 1 for CPE)
521  *
522  * Outputs:     new high frequency samples starting at frequency kStart
523  *
524  * Return:      none
525  **************************************************************************************/
526 void GenerateHighFreq(PSInfoSBR *psi, SBRGrid *sbrGrid, SBRFreq *sbrFreq, SBRChan *sbrChan, int ch)
527 {
528     int band, newBW, c, t, gb, gbMask, gbIdx;
529     int currPatch, p, x, k, g, i, iStart, iEnd, bw, bwsq;
530     int a0re, a0im, a1re, a1im;
531     int x1re, x1im, x2re, x2im;
532     int ACCre, ACCim;
533     int *XBufLo, *XBufHi;
534
535     /* calculate array of chirp factors */
536     for (band = 0; band < sbrFreq->numNoiseFloorBands; band++) {
537         c = sbrChan->chirpFact[band];   /* previous (bwArray') */
538         newBW = newBWTab[sbrChan->invfMode[0][band]][sbrChan->invfMode[1][band]];
539
540         /* weighted average of new and old (can't overflow - total gain = 1.0) */
541         if (newBW < c) {
542             t = MULSHIFT32(newBW, 0x60000000) + MULSHIFT32(0x20000000, c);    /* new is smaller: 0.75*new + 0.25*old */
543         } else {
544             t = MULSHIFT32(newBW, 0x74000000) + MULSHIFT32(0x0c000000, c);    /* new is larger: 0.90625*new + 0.09375*old */
545         }
546         t <<= 1;
547
548         if (t < 0x02000000) { /* below 0.015625, clip to 0 */
549             t = 0;
550         }
551         if (t > 0x7f800000) { /* clip to 0.99609375 */
552             t = 0x7f800000;
553         }
554
555         /* save curr as prev for next time */
556         sbrChan->chirpFact[band] = t;
557         sbrChan->invfMode[0][band] = sbrChan->invfMode[1][band];
558     }
559
560     iStart = sbrGrid->envTimeBorder[0] + HF_ADJ;
561     iEnd =   sbrGrid->envTimeBorder[sbrGrid->numEnv] + HF_ADJ;
562
563     /* generate new high freqs from low freqs, patches, and chirp factors */
564     k = sbrFreq->kStart;
565     g = 0;
566     bw = sbrChan->chirpFact[g];
567     bwsq = MULSHIFT32(bw, bw) << 1;
568
569     gbMask = (sbrChan->gbMask[0] | sbrChan->gbMask[1]); /* older 32 | newer 8 */
570     gb = CLZ(gbMask) - 1;
571
572     for (currPatch = 0; currPatch < sbrFreq->numPatches; currPatch++) {
573         for (x = 0; x < sbrFreq->patchNumSubbands[currPatch]; x++) {
574             /* map k to corresponding noise floor band */
575             if (k >= sbrFreq->freqNoise[g + 1]) {
576                 g++;
577                 bw = sbrChan->chirpFact[g];     /* Q31 */
578                 bwsq = MULSHIFT32(bw, bw) << 1; /* Q31 */
579             }
580
581             p = sbrFreq->patchStartSubband[currPatch] + x;  /* low QMF band */
582             XBufHi = psi->XBuf[iStart][k];
583             if (bw) {
584                 CalcLPCoefs(psi->XBuf[0][p], &a0re, &a0im, &a1re, &a1im, gb);
585
586                 a0re = MULSHIFT32(bw, a0re);    /* Q31 * Q29 = Q28 */
587                 a0im = MULSHIFT32(bw, a0im);
588                 a1re = MULSHIFT32(bwsq, a1re);
589                 a1im = MULSHIFT32(bwsq, a1im);
590
591                 XBufLo = psi->XBuf[iStart - 2][p];
592
593                 x2re = XBufLo[0];   /* RE{XBuf[n-2]} */
594                 x2im = XBufLo[1];   /* IM{XBuf[n-2]} */
595                 XBufLo += (64 * 2);
596
597                 x1re = XBufLo[0];   /* RE{XBuf[n-1]} */
598                 x1im = XBufLo[1];   /* IM{XBuf[n-1]} */
599                 XBufLo += (64 * 2);
600
601                 for (i = iStart; i < iEnd; i++) {
602                     /* a0re/im, a1re/im are Q28 with at least 1 GB,
603                      *   so the summing for AACre/im is fine (1 GB in, plus 1 from MULSHIFT32)
604                      */
605                     ACCre = MULSHIFT32(x2re, a1re) - MULSHIFT32(x2im, a1im);
606                     ACCim = MULSHIFT32(x2re, a1im) + MULSHIFT32(x2im, a1re);
607                     x2re = x1re;
608                     x2im = x1im;
609
610                     ACCre += MULSHIFT32(x1re, a0re) - MULSHIFT32(x1im, a0im);
611                     ACCim += MULSHIFT32(x1re, a0im) + MULSHIFT32(x1im, a0re);
612                     x1re = XBufLo[0];   /* RE{XBuf[n]} */
613                     x1im = XBufLo[1];   /* IM{XBuf[n]} */
614                     XBufLo += (64 * 2);
615
616                     /* lost 4 fbits when scaling by a0re/im, a1re/im (Q28) */
617                     CLIP_2N_SHIFT30(ACCre, 4);
618                     ACCre += x1re;
619                     CLIP_2N_SHIFT30(ACCim, 4);
620                     ACCim += x1im;
621
622                     XBufHi[0] = ACCre;
623                     XBufHi[1] = ACCim;
624                     XBufHi += (64 * 2);
625
626                     /* update guard bit masks */
627                     gbMask  = FASTABS(ACCre);
628                     gbMask |= FASTABS(ACCim);
629                     gbIdx = (i >> 5) & 0x01;    /* 0 if i < 32, 1 if i >= 32 */
630                     sbrChan->gbMask[gbIdx] |= gbMask;
631                 }
632             } else {
633                 XBufLo = (int *)psi->XBuf[iStart][p];
634                 for (i = iStart; i < iEnd; i++) {
635                     XBufHi[0] = XBufLo[0];
636                     XBufHi[1] = XBufLo[1];
637                     XBufLo += (64 * 2);
638                     XBufHi += (64 * 2);
639                 }
640             }
641             k++;    /* high QMF band */
642         }
643     }
644 }
645
646
647