platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: sbrhfgen.c,v 1.1.2.2 2005/05/19 21:00:01 jrecker Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * sbrhfgen.c - high frequency generation for SBR
44  **************************************************************************************/
45
46 #include "sbr.h"
47 #include "assembly.h"
48
49 #define FBITS_LPCOEFS   29  /* Q29 for range of (-4, 4) */
50 #define MAG_16          (16 * (1 << (32 - (2*(32-FBITS_LPCOEFS)))))     /* i.e. 16 in Q26 format */
51 #define RELAX_COEF      0x7ffff79c  /* 1.0 / (1.0 + 1e-6), Q31 */
52
53 /* newBWTab[prev invfMode][curr invfMode], format = Q31 (table 4.158)
54  * sample file which uses all of these: al_sbr_sr_64_2_fsaac32.aac
55  */
56 static const int newBWTab[4][4] = {
57     {0x00000000, 0x4ccccccd, 0x73333333, 0x7d70a3d7},
58     {0x4ccccccd, 0x60000000, 0x73333333, 0x7d70a3d7},
59     {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
60     {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
61 };
62
63 /**************************************************************************************
64  * Function:    CVKernel1
65  *
66  * Description: kernel of covariance matrix calculation for p01, p11, p12, p22
67  *
68  * Inputs:      buffer of low-freq samples, starting at time index = 0,
69  *                freq index = patch subband
70  *
71  * Outputs:     64-bit accumulators for p01re, p01im, p12re, p12im, p11re, p22re
72  *                stored in accBuf
73  *
74  * Return:      none
75  *
76  * Notes:       this is carefully written to be efficient on ARM
77  *              use the assembly code version in sbrcov.s when building for ARM!
78  **************************************************************************************/
79 #if 0// (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
80 #ifdef __cplusplus
81 extern "C"
82 #endif
83 void CVKernel1(int *XBuf, int *accBuf);
84 #else
85 void CVKernel1(int *XBuf, int *accBuf)
86 {
87     U64 p01re, p01im, p12re, p12im, p11re, p22re;
88     int n, x0re, x0im, x1re, x1im;
89
90     x0re = XBuf[0];
91     x0im = XBuf[1];
92     XBuf += (2 * 64);
93     x1re = XBuf[0];
94     x1im = XBuf[1];
95     XBuf += (2 * 64);
96
97     p01re.w64 = p01im.w64 = 0;
98     p12re.w64 = p12im.w64 = 0;
99     p11re.w64 = 0;
100     p22re.w64 = 0;
101
102     p12re.w64 = MADD64(p12re.w64,  x1re, x0re);
103     p12re.w64 = MADD64(p12re.w64,  x1im, x0im);
104     p12im.w64 = MADD64(p12im.w64,  x0re, x1im);
105     p12im.w64 = MADD64(p12im.w64, -x0im, x1re);
106     p22re.w64 = MADD64(p22re.w64,  x0re, x0re);
107     p22re.w64 = MADD64(p22re.w64,  x0im, x0im);
108     for (n = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6); n != 0; n--) {
109         /* 4 input, 3*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
110         x0re = x1re;
111         x0im = x1im;
112         x1re = XBuf[0];
113         x1im = XBuf[1];
114
115         p01re.w64 = MADD64(p01re.w64,  x1re, x0re);
116         p01re.w64 = MADD64(p01re.w64,  x1im, x0im);
117         p01im.w64 = MADD64(p01im.w64,  x0re, x1im);
118         p01im.w64 = MADD64(p01im.w64, -x0im, x1re);
119         p11re.w64 = MADD64(p11re.w64,  x0re, x0re);
120         p11re.w64 = MADD64(p11re.w64,  x0im, x0im);
121
122         XBuf += (2 * 64);
123     }
124     /* these can be derived by slight changes to account for boundary conditions */
125     p12re.w64 += p01re.w64;
126     p12re.w64 = MADD64(p12re.w64, x1re, -x0re);
127     p12re.w64 = MADD64(p12re.w64, x1im, -x0im);
128     p12im.w64 += p01im.w64;
129     p12im.w64 = MADD64(p12im.w64, x0re, -x1im);
130     p12im.w64 = MADD64(p12im.w64, x0im,  x1re);
131     p22re.w64 += p11re.w64;
132     p22re.w64 = MADD64(p22re.w64, x0re, -x0re);
133     p22re.w64 = MADD64(p22re.w64, x0im, -x0im);
134
135     accBuf[0]  = p01re.r.lo32;
136     accBuf[1]  = p01re.r.hi32;
137     accBuf[2]  = p01im.r.lo32;
138     accBuf[3]  = p01im.r.hi32;
139     accBuf[4]  = p11re.r.lo32;
140     accBuf[5]  = p11re.r.hi32;
141     accBuf[6]  = p12re.r.lo32;
142     accBuf[7]  = p12re.r.hi32;
143     accBuf[8]  = p12im.r.lo32;
144     accBuf[9]  = p12im.r.hi32;
145     accBuf[10] = p22re.r.lo32;
146     accBuf[11] = p22re.r.hi32;
147 }
148 #endif
149
150 /**************************************************************************************
151  * Function:    CalcCovariance1
152  *
153  * Description: calculate covariance matrix for p01, p12, p11, p22 (4.6.18.6.2)
154  *
155  * Inputs:      buffer of low-freq samples, starting at time index 0,
156  *                freq index = patch subband
157  *
158  * Outputs:     complex covariance elements p01re, p01im, p12re, p12im, p11re, p22re
159  *                (p11im = p22im = 0)
160  *              format = integer (Q0) * 2^N, with scalefactor N >= 0
161  *
162  * Return:      scalefactor N
163  *
164  * Notes:       outputs are normalized to have 1 GB (sign in at least top 2 bits)
165  **************************************************************************************/
166 static int CalcCovariance1(int *XBuf, int *p01reN, int *p01imN, int *p12reN, int *p12imN, int *p11reN, int *p22reN)
167 {
168     int accBuf[2 * 6];
169     int n, z, s, loShift, hiShift, gbMask;
170     U64 p01re, p01im, p12re, p12im, p11re, p22re;
171
172     CVKernel1(XBuf, accBuf);
173     p01re.r.lo32 = accBuf[0];
174     p01re.r.hi32 = accBuf[1];
175     p01im.r.lo32 = accBuf[2];
176     p01im.r.hi32 = accBuf[3];
177     p11re.r.lo32 = accBuf[4];
178     p11re.r.hi32 = accBuf[5];
179     p12re.r.lo32 = accBuf[6];
180     p12re.r.hi32 = accBuf[7];
181     p12im.r.lo32 = accBuf[8];
182     p12im.r.hi32 = accBuf[9];
183     p22re.r.lo32 = accBuf[10];
184     p22re.r.hi32 = accBuf[11];
185
186     /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
187      * want to scale them down to integers (32-bit signed, Q0)
188      *   with scale factor of 2^n, n >= 0
189      * leave 2 GB's for calculating determinant, so take top 30 non-zero bits
190      */
191     gbMask  = ((p01re.r.hi32) ^(p01re.r.hi32 >> 31)) | ((p01im.r.hi32) ^(p01im.r.hi32 >> 31));
192     gbMask |= ((p12re.r.hi32) ^(p12re.r.hi32 >> 31)) | ((p12im.r.hi32) ^(p12im.r.hi32 >> 31));
193     gbMask |= ((p11re.r.hi32) ^(p11re.r.hi32 >> 31)) | ((p22re.r.hi32) ^(p22re.r.hi32 >> 31));
194     if (gbMask == 0) {
195         s = p01re.r.hi32 >> 31;
196         gbMask  = (p01re.r.lo32 ^ s) - s;
197         s = p01im.r.hi32 >> 31;
198         gbMask |= (p01im.r.lo32 ^ s) - s;
199         s = p12re.r.hi32 >> 31;
200         gbMask |= (p12re.r.lo32 ^ s) - s;
201         s = p12im.r.hi32 >> 31;
202         gbMask |= (p12im.r.lo32 ^ s) - s;
203         s = p11re.r.hi32 >> 31;
204         gbMask |= (p11re.r.lo32 ^ s) - s;
205         s = p22re.r.hi32 >> 31;
206         gbMask |= (p22re.r.lo32 ^ s) - s;
207         z = 32 + CLZ(gbMask);
208     } else {
209         gbMask  = FASTABS(p01re.r.hi32) | FASTABS(p01im.r.hi32);
210         gbMask |= FASTABS(p12re.r.hi32) | FASTABS(p12im.r.hi32);
211         gbMask |= FASTABS(p11re.r.hi32) | FASTABS(p22re.r.hi32);
212         z = CLZ(gbMask);
213     }
214
215     n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */
216     if (n <= 30) {
217         loShift = (30 - n);
218         *p01reN = p01re.r.lo32 << loShift;
219         *p01imN = p01im.r.lo32 << loShift;
220         *p12reN = p12re.r.lo32 << loShift;
221         *p12imN = p12im.r.lo32 << loShift;
222         *p11reN = p11re.r.lo32 << loShift;
223         *p22reN = p22re.r.lo32 << loShift;
224         return -(loShift + 2 * FBITS_OUT_QMFA);
225     } else if (n < 32 + 30) {
226         loShift = (n - 30);
227         hiShift = 32 - loShift;
228         *p01reN = (p01re.r.hi32 << hiShift) | (p01re.r.lo32 >> loShift);
229         *p01imN = (p01im.r.hi32 << hiShift) | (p01im.r.lo32 >> loShift);
230         *p12reN = (p12re.r.hi32 << hiShift) | (p12re.r.lo32 >> loShift);
231         *p12imN = (p12im.r.hi32 << hiShift) | (p12im.r.lo32 >> loShift);
232         *p11reN = (p11re.r.hi32 << hiShift) | (p11re.r.lo32 >> loShift);
233         *p22reN = (p22re.r.hi32 << hiShift) | (p22re.r.lo32 >> loShift);
234         return (loShift - 2 * FBITS_OUT_QMFA);
235     } else {
236         hiShift = n - (32 + 30);
237         *p01reN = p01re.r.hi32 >> hiShift;
238         *p01imN = p01im.r.hi32 >> hiShift;
239         *p12reN = p12re.r.hi32 >> hiShift;
240         *p12imN = p12im.r.hi32 >> hiShift;
241         *p11reN = p11re.r.hi32 >> hiShift;
242         *p22reN = p22re.r.hi32 >> hiShift;
243         return (32 - 2 * FBITS_OUT_QMFA - hiShift);
244     }
245
246     return 0;
247 }
248
249 /**************************************************************************************
250  * Function:    CVKernel2
251  *
252  * Description: kernel of covariance matrix calculation for p02
253  *
254  * Inputs:      buffer of low-freq samples, starting at time index = 0,
255  *                freq index = patch subband
256  *
257  * Outputs:     64-bit accumulators for p02re, p02im stored in accBuf
258  *
259  * Return:      none
260  *
261  * Notes:       this is carefully written to be efficient on ARM
262  *              use the assembly code version in sbrcov.s when building for ARM!
263  **************************************************************************************/
264 #if  0// (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
265 #ifdef __cplusplus
266 extern "C"
267 #endif
268 void CVKernel2(int *XBuf, int *accBuf);
269 #else
270 void CVKernel2(int *XBuf, int *accBuf)
271 {
272     U64 p02re, p02im;
273     int n, x0re, x0im, x1re, x1im, x2re, x2im;
274
275     p02re.w64 = p02im.w64 = 0;
276
277     x0re = XBuf[0];
278     x0im = XBuf[1];
279     XBuf += (2 * 64);
280     x1re = XBuf[0];
281     x1im = XBuf[1];
282     XBuf += (2 * 64);
283
284     for (n = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6); n != 0; n--) {
285         /* 6 input, 2*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
286         x2re = XBuf[0];
287         x2im = XBuf[1];
288
289         p02re.w64 = MADD64(p02re.w64,  x2re, x0re);
290         p02re.w64 = MADD64(p02re.w64,  x2im, x0im);
291         p02im.w64 = MADD64(p02im.w64,  x0re, x2im);
292         p02im.w64 = MADD64(p02im.w64, -x0im, x2re);
293
294         x0re = x1re;
295         x0im = x1im;
296         x1re = x2re;
297         x1im = x2im;
298         XBuf += (2 * 64);
299     }
300
301     accBuf[0] = p02re.r.lo32;
302     accBuf[1] = p02re.r.hi32;
303     accBuf[2] = p02im.r.lo32;
304     accBuf[3] = p02im.r.hi32;
305 }
306 #endif
307
308 /**************************************************************************************
309  * Function:    CalcCovariance2
310  *
311  * Description: calculate covariance matrix for p02 (4.6.18.6.2)
312  *
313  * Inputs:      buffer of low-freq samples, starting at time index = 0,
314  *                freq index = patch subband
315  *
316  * Outputs:     complex covariance element p02re, p02im
317  *              format = integer (Q0) * 2^N, with scalefactor N >= 0
318  *
319  * Return:      scalefactor N
320  *
321  * Notes:       outputs are normalized to have 1 GB (sign in at least top 2 bits)
322  **************************************************************************************/
323 static int CalcCovariance2(int *XBuf, int *p02reN, int *p02imN)
324 {
325     U64 p02re, p02im;
326     int n, z, s, loShift, hiShift, gbMask;
327     int accBuf[2 * 2];
328
329     CVKernel2(XBuf, accBuf);
330     p02re.r.lo32 = accBuf[0];
331     p02re.r.hi32 = accBuf[1];
332     p02im.r.lo32 = accBuf[2];
333     p02im.r.hi32 = accBuf[3];
334
335     /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
336      * want to scale them down to integers (32-bit signed, Q0)
337      *   with scale factor of 2^n, n >= 0
338      * leave 1 GB for calculating determinant, so take top 30 non-zero bits
339      */
340     gbMask  = ((p02re.r.hi32) ^(p02re.r.hi32 >> 31)) | ((p02im.r.hi32) ^(p02im.r.hi32 >> 31));
341     if (gbMask == 0) {
342         s = p02re.r.hi32 >> 31;
343         gbMask  = (p02re.r.lo32 ^ s) - s;
344         s = p02im.r.hi32 >> 31;
345         gbMask |= (p02im.r.lo32 ^ s) - s;
346         z = 32 + CLZ(gbMask);
347     } else {
348         gbMask  = FASTABS(p02re.r.hi32) | FASTABS(p02im.r.hi32);
349         z = CLZ(gbMask);
350     }
351     n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */
352
353     if (n <= 30) {
354         loShift = (30 - n);
355         *p02reN = p02re.r.lo32 << loShift;
356         *p02imN = p02im.r.lo32 << loShift;
357         return -(loShift + 2 * FBITS_OUT_QMFA);
358     } else if (n < 32 + 30) {
359         loShift = (n - 30);
360         hiShift = 32 - loShift;
361         *p02reN = (p02re.r.hi32 << hiShift) | (p02re.r.lo32 >> loShift);
362         *p02imN = (p02im.r.hi32 << hiShift) | (p02im.r.lo32 >> loShift);
363         return (loShift - 2 * FBITS_OUT_QMFA);
364     } else {
365         hiShift = n - (32 + 30);
366         *p02reN = p02re.r.hi32 >> hiShift;
367         *p02imN = p02im.r.hi32 >> hiShift;
368         return (32 - 2 * FBITS_OUT_QMFA - hiShift);
369     }
370
371     return 0;
372 }
373
374 /**************************************************************************************
375  * Function:    CalcLPCoefs
376  *
377  * Description: calculate linear prediction coefficients for one subband (4.6.18.6.2)
378  *
379  * Inputs:      buffer of low-freq samples, starting at time index = 0,
380  *                freq index = patch subband
381  *              number of guard bits in input sample buffer
382  *
383  * Outputs:     complex LP coefficients a0re, a0im, a1re, a1im, format = Q29
384  *
385  * Return:      none
386  *
387  * Notes:       output coefficients (a0re, a0im, a1re, a1im) clipped to range (-4, 4)
388  *              if the comples coefficients have magnitude >= 4.0, they are all
389  *                set to 0 (see spec)
390  **************************************************************************************/
391 static int CalcLPCoefs(int *XBuf, int *a0re, int *a0im, int *a1re, int *a1im, int gb)
392 {
393     int zFlag, n1, n2, nd, d, dInv, tre, tim;
394     int p01re, p01im, p02re, p02im, p12re, p12im, p11re, p22re;
395
396     /* pre-scale to avoid overflow - probably never happens in practice (see QMFA)
397      *   max bit growth per accumulator = 38*2 = 76 mul-adds (X * X)
398      *   using 64-bit MADD, so if X has n guard bits, X*X has 2n+1 guard bits
399      *   gain 1 extra sign bit per multiply, so ensure ceil(log2(76/2) / 2) = 3 guard bits on inputs
400      */
401     if (gb < 3) {
402         nd = 3 - gb;
403         for (n1 = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) {
404             XBuf[0] >>= nd;
405             XBuf[1] >>= nd;
406             XBuf += (2 * 64);
407         }
408         XBuf -= (2 * 64 * (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2));
409     }
410
411     /* calculate covariance elements */
412     n1 = CalcCovariance1(XBuf, &p01re, &p01im, &p12re, &p12im, &p11re, &p22re);
413     n2 = CalcCovariance2(XBuf, &p02re, &p02im);
414
415     /* normalize everything to larger power of 2 scalefactor, call it n1 */
416     if (n1 < n2) {
417         nd = MIN(n2 - n1, 31);
418         p01re >>= nd;
419         p01im >>= nd;
420         p12re >>= nd;
421         p12im >>= nd;
422         p11re >>= nd;
423         p22re >>= nd;
424         n1 = n2;
425     } else if (n1 > n2) {
426         nd = MIN(n1 - n2, 31);
427         p02re >>= nd;
428         p02im >>= nd;
429     }
430
431     /* calculate determinant of covariance matrix (at least 1 GB in pXX) */
432     d = MULSHIFT32(p12re, p12re) + MULSHIFT32(p12im, p12im);
433     d = MULSHIFT32(d, RELAX_COEF) << 1;
434     d = MULSHIFT32(p11re, p22re) - d;
435     //this assert always failed,no a fatal error when decoder sbr data ????.
436     //ASSERT(d >= 0,ERR_AAC_SBR_BITSTREAM); /* should never be < 0 */
437
438     zFlag = 0;
439     *a0re = *a0im = 0;
440     *a1re = *a1im = 0;
441     if (d > 0) {
442         /* input =   Q31  d    = Q(-2*n1 - 32 + nd) = Q31 * 2^(31 + 2*n1 + 32 - nd)
443          * inverse = Q29  dInv = Q29 * 2^(-31 - 2*n1 - 32 + nd) = Q(29 + 31 + 2*n1 + 32 - nd)
444          *
445          * numerator has same Q format as d, since it's sum of normalized squares
446          * so num * inverse = Q(-2*n1 - 32) * Q(29 + 31 + 2*n1 + 32 - nd)
447          *                  = Q(29 + 31 - nd), drop low 32 in MULSHIFT32
448          *                  = Q(29 + 31 - 32 - nd) = Q(28 - nd)
449          */
450         nd = CLZ(d) - 1;
451         d <<= nd;
452         dInv = InvRNormalized(d);
453
454         /* 1 GB in pXX */
455         tre = MULSHIFT32(p01re, p12re) - MULSHIFT32(p01im, p12im) - MULSHIFT32(p02re, p11re);
456         tre = MULSHIFT32(tre, dInv);
457         tim = MULSHIFT32(p01re, p12im) + MULSHIFT32(p01im, p12re) - MULSHIFT32(p02im, p11re);
458         tim = MULSHIFT32(tim, dInv);
459
460         /* if d is extremely small, just set coefs to 0 (would have poor precision anyway) */
461         if (nd > 28 || (FASTABS(tre) >> (28 - nd)) >= 4 || (FASTABS(tim) >> (28 - nd)) >= 4) {
462             zFlag = 1;
463         } else {
464             *a1re = tre << (FBITS_LPCOEFS - 28 + nd);   /* i.e. convert Q(28 - nd) to Q(29) */
465             *a1im = tim << (FBITS_LPCOEFS - 28 + nd);
466         }
467     }
468
469     if (p11re) {
470         /* input =   Q31  p11re = Q(-n1 + nd) = Q31 * 2^(31 + n1 - nd)
471          * inverse = Q29  dInv  = Q29 * 2^(-31 - n1 + nd) = Q(29 + 31 + n1 - nd)
472          *
473          * numerator is Q(-n1 - 3)
474          * so num * inverse = Q(-n1 - 3) * Q(29 + 31 + n1 - nd)
475          *                  = Q(29 + 31 - 3 - nd), drop low 32 in MULSHIFT32
476          *                  = Q(29 + 31 - 3 - 32 - nd) = Q(25 - nd)
477          */
478         nd = CLZ(p11re) - 1;    /* assume positive */
479         p11re <<= nd;
480         dInv = InvRNormalized(p11re);
481
482         /* a1re, a1im = Q29, so scaled by (n1 + 3) */
483         tre = (p01re >> 3) + MULSHIFT32(p12re, *a1re) + MULSHIFT32(p12im, *a1im);
484         tre = -MULSHIFT32(tre, dInv);
485         tim = (p01im >> 3) - MULSHIFT32(p12im, *a1re) + MULSHIFT32(p12re, *a1im);
486         tim = -MULSHIFT32(tim, dInv);
487
488         if (nd > 25 || (FASTABS(tre) >> (25 - nd)) >= 4 || (FASTABS(tim) >> (25 - nd)) >= 4) {
489             zFlag = 1;
490         } else {
491             *a0re = tre << (FBITS_LPCOEFS - 25 + nd);   /* i.e. convert Q(25 - nd) to Q(29) */
492             *a0im = tim << (FBITS_LPCOEFS - 25 + nd);
493         }
494     }
495
496     /* see 4.6.18.6.2 - if magnitude of a0 or a1 >= 4 then a0 = a1 = 0
497      * i.e. a0re < 4, a0im < 4, a1re < 4, a1im < 4
498      * Q29*Q29 = Q26
499      */
500     if (zFlag || MULSHIFT32(*a0re, *a0re) + MULSHIFT32(*a0im, *a0im) >= MAG_16 || MULSHIFT32(*a1re, *a1re) + MULSHIFT32(*a1im, *a1im) >= MAG_16) {
501         *a0re = *a0im = 0;
502         *a1re = *a1im = 0;
503     }
504
505     /* no need to clip - we never changed the XBuf data, just used it to calculate a0 and a1 */
506     if (gb < 3) {
507         nd = 3 - gb;
508         for (n1 = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) {
509             XBuf[0] <<= nd;
510             XBuf[1] <<= nd;
511             XBuf += (2 * 64);
512         }
513     }
514     return ERR_AAC_NONE;
515 }
516
517 /**************************************************************************************
518  * Function:    GenerateHighFreq
519  *
520  * Description: generate high frequencies with SBR (4.6.18.6)
521  *
522  * Inputs:      initialized PSInfoSBR struct
523  *              initialized SBRGrid struct for this channel
524  *              initialized SBRFreq struct for this SCE/CPE block
525  *              initialized SBRChan struct for this channel
526  *              index of current channel (0 for SCE, 0 or 1 for CPE)
527  *
528  * Outputs:     new high frequency samples starting at frequency kStart
529  *
530  * Return:      none
531  **************************************************************************************/
532 int GenerateHighFreq(PSInfoSBR *psi, SBRGrid *sbrGrid, SBRFreq *sbrFreq, SBRChan *sbrChan, int ch)
533 {
534     int band, newBW, c, t, gb, gbMask, gbIdx;
535     int currPatch, p, x, k, g, i, iStart, iEnd, bw, bwsq;
536     int a0re, a0im, a1re, a1im;
537     int x1re, x1im, x2re, x2im;
538     int ACCre, ACCim;
539     int *XBufLo, *XBufHi;
540     int err = ERR_AAC_NONE;
541     /* calculate array of chirp factors */
542     for (band = 0; band < sbrFreq->numNoiseFloorBands; band++) {
543         c = sbrChan->chirpFact[band];   /* previous (bwArray') */
544         newBW = newBWTab[sbrChan->invfMode[0][band]][sbrChan->invfMode[1][band]];
545
546         /* weighted average of new and old (can't overflow - total gain = 1.0) */
547         if (newBW < c) {
548             t = MULSHIFT32(newBW, 0x60000000) + MULSHIFT32(0x20000000, c);    /* new is smaller: 0.75*new + 0.25*old */
549         } else {
550             t = MULSHIFT32(newBW, 0x74000000) + MULSHIFT32(0x0c000000, c);    /* new is larger: 0.90625*new + 0.09375*old */
551         }
552         t <<= 1;
553
554         if (t < 0x02000000) { /* below 0.015625, clip to 0 */
555             t = 0;
556         }
557         if (t > 0x7f800000) { /* clip to 0.99609375 */
558             t = 0x7f800000;
559         }
560
561         /* save curr as prev for next time */
562         sbrChan->chirpFact[band] = t;
563         sbrChan->invfMode[0][band] = sbrChan->invfMode[1][band];
564     }
565
566     iStart = sbrGrid->envTimeBorder[0] + HF_ADJ;
567     iEnd =   sbrGrid->envTimeBorder[sbrGrid->numEnv] + HF_ADJ;
568
569     /* generate new high freqs from low freqs, patches, and chirp factors */
570     k = sbrFreq->kStart;
571     g = 0;
572     bw = sbrChan->chirpFact[g];
573     bwsq = MULSHIFT32(bw, bw) << 1;
574
575     gbMask = (sbrChan->gbMask[0] | sbrChan->gbMask[1]); /* older 32 | newer 8 */
576     gb = CLZ(gbMask) - 1;
577
578     for (currPatch = 0; currPatch < sbrFreq->numPatches; currPatch++) {
579         for (x = 0; x < sbrFreq->patchNumSubbands[currPatch]; x++) {
580             /* map k to corresponding noise floor band */
581             if (k >= sbrFreq->freqNoise[g + 1]) {
582                 g++;
583                 bw = sbrChan->chirpFact[g];     /* Q31 */
584                 bwsq = MULSHIFT32(bw, bw) << 1; /* Q31 */
585             }
586
587             p = sbrFreq->patchStartSubband[currPatch] + x;  /* low QMF band */
588             XBufHi = psi->XBuf[iStart][k];
589             if (bw) {
590                 err = CalcLPCoefs(psi->XBuf[0][p], &a0re, &a0im, &a1re, &a1im, gb);
591                 if (err) {
592                     return err;
593                 }
594                 a0re = MULSHIFT32(bw, a0re);    /* Q31 * Q29 = Q28 */
595                 a0im = MULSHIFT32(bw, a0im);
596                 a1re = MULSHIFT32(bwsq, a1re);
597                 a1im = MULSHIFT32(bwsq, a1im);
598
599                 XBufLo = psi->XBuf[iStart - 2][p];
600
601                 x2re = XBufLo[0];   /* RE{XBuf[n-2]} */
602                 x2im = XBufLo[1];   /* IM{XBuf[n-2]} */
603                 XBufLo += (64 * 2);
604
605                 x1re = XBufLo[0];   /* RE{XBuf[n-1]} */
606                 x1im = XBufLo[1];   /* IM{XBuf[n-1]} */
607                 XBufLo += (64 * 2);
608
609                 for (i = iStart; i < iEnd; i++) {
610                     /* a0re/im, a1re/im are Q28 with at least 1 GB,
611                      *   so the summing for AACre/im is fine (1 GB in, plus 1 from MULSHIFT32)
612                      */
613                     ACCre = MULSHIFT32(x2re, a1re) - MULSHIFT32(x2im, a1im);
614                     ACCim = MULSHIFT32(x2re, a1im) + MULSHIFT32(x2im, a1re);
615                     x2re = x1re;
616                     x2im = x1im;
617
618                     ACCre += MULSHIFT32(x1re, a0re) - MULSHIFT32(x1im, a0im);
619                     ACCim += MULSHIFT32(x1re, a0im) + MULSHIFT32(x1im, a0re);
620                     x1re = XBufLo[0];   /* RE{XBuf[n]} */
621                     x1im = XBufLo[1];   /* IM{XBuf[n]} */
622                     XBufLo += (64 * 2);
623
624                     /* lost 4 fbits when scaling by a0re/im, a1re/im (Q28) */
625                     CLIP_2N_SHIFT30(ACCre, 4);
626                     ACCre += x1re;
627                     CLIP_2N_SHIFT30(ACCim, 4);
628                     ACCim += x1im;
629
630                     XBufHi[0] = ACCre;
631                     XBufHi[1] = ACCim;
632                     XBufHi += (64 * 2);
633
634                     /* update guard bit masks */
635                     gbMask  = FASTABS(ACCre);
636                     gbMask |= FASTABS(ACCim);
637                     gbIdx = (i >> 5) & 0x01;    /* 0 if i < 32, 1 if i >= 32 */
638                     sbrChan->gbMask[gbIdx] |= gbMask;
639                 }
640             } else {
641                 XBufLo = (int *)psi->XBuf[iStart][p];
642                 for (i = iStart; i < iEnd; i++) {
643                     XBufHi[0] = XBufLo[0];
644                     XBufHi[1] = XBufLo[1];
645                     XBufLo += (64 * 2);
646                     XBufHi += (64 * 2);
647                 }
648             }
649             k++;    /* high QMF band */
650         }
651     }
652     return ERR_AAC_NONE;
653 }
654
655
656