blob: 0ab7dad74c029165ce1a2bc66f46d870306445c2
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: sbrhfgen.c,v 1.1.2.2 2005/05/19 21:00:01 jrecker Exp $ |
3 | * |
4 | * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. |
5 | * |
6 | * The contents of this file, and the files included with this file, |
7 | * are subject to the current version of the RealNetworks Public |
8 | * Source License (the "RPSL") available at |
9 | * http://www.helixcommunity.org/content/rpsl unless you have licensed |
10 | * the file under the current version of the RealNetworks Community |
11 | * Source License (the "RCSL") available at |
12 | * http://www.helixcommunity.org/content/rcsl, in which case the RCSL |
13 | * will apply. You may also obtain the license terms directly from |
14 | * RealNetworks. You may not use this file except in compliance with |
15 | * the RPSL or, if you have a valid RCSL with RealNetworks applicable |
16 | * to this file, the RCSL. Please see the applicable RPSL or RCSL for |
17 | * the rights, obligations and limitations governing use of the |
18 | * contents of the file. |
19 | * |
20 | * This file is part of the Helix DNA Technology. RealNetworks is the |
21 | * developer of the Original Code and owns the copyrights in the |
22 | * portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed |
25 | * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY |
26 | * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS |
27 | * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES |
28 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET |
29 | * ENJOYMENT OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * http://www.helixcommunity.org/content/tck |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point HE-AAC decoder |
40 | * Jon Recker (jrecker@real.com) |
41 | * February 2005 |
42 | * |
43 | * sbrhfgen.c - high frequency generation for SBR |
44 | **************************************************************************************/ |
45 | |
46 | #include "sbr.h" |
47 | #include "assembly.h" |
48 | |
49 | #define FBITS_LPCOEFS 29 /* Q29 for range of (-4, 4) */ |
50 | #define MAG_16 (16 * (1 << (32 - (2*(32-FBITS_LPCOEFS))))) /* i.e. 16 in Q26 format */ |
51 | #define RELAX_COEF 0x7ffff79c /* 1.0 / (1.0 + 1e-6), Q31 */ |
52 | |
53 | /* newBWTab[prev invfMode][curr invfMode], format = Q31 (table 4.158) |
54 | * sample file which uses all of these: al_sbr_sr_64_2_fsaac32.aac |
55 | */ |
56 | static const int newBWTab[4][4] = { |
57 | {0x00000000, 0x4ccccccd, 0x73333333, 0x7d70a3d7}, |
58 | {0x4ccccccd, 0x60000000, 0x73333333, 0x7d70a3d7}, |
59 | {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7}, |
60 | {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7}, |
61 | }; |
62 | |
63 | /************************************************************************************** |
64 | * Function: CVKernel1 |
65 | * |
66 | * Description: kernel of covariance matrix calculation for p01, p11, p12, p22 |
67 | * |
68 | * Inputs: buffer of low-freq samples, starting at time index = 0, |
69 | * freq index = patch subband |
70 | * |
71 | * Outputs: 64-bit accumulators for p01re, p01im, p12re, p12im, p11re, p22re |
72 | * stored in accBuf |
73 | * |
74 | * Return: none |
75 | * |
76 | * Notes: this is carefully written to be efficient on ARM |
77 | * use the assembly code version in sbrcov.s when building for ARM! |
78 | **************************************************************************************/ |
79 | #if 0// (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__)) |
80 | #ifdef __cplusplus |
81 | extern "C" |
82 | #endif |
83 | void CVKernel1(int *XBuf, int *accBuf); |
84 | #else |
85 | void CVKernel1(int *XBuf, int *accBuf) |
86 | { |
87 | U64 p01re, p01im, p12re, p12im, p11re, p22re; |
88 | int n, x0re, x0im, x1re, x1im; |
89 | |
90 | x0re = XBuf[0]; |
91 | x0im = XBuf[1]; |
92 | XBuf += (2 * 64); |
93 | x1re = XBuf[0]; |
94 | x1im = XBuf[1]; |
95 | XBuf += (2 * 64); |
96 | |
97 | p01re.w64 = p01im.w64 = 0; |
98 | p12re.w64 = p12im.w64 = 0; |
99 | p11re.w64 = 0; |
100 | p22re.w64 = 0; |
101 | |
102 | p12re.w64 = MADD64(p12re.w64, x1re, x0re); |
103 | p12re.w64 = MADD64(p12re.w64, x1im, x0im); |
104 | p12im.w64 = MADD64(p12im.w64, x0re, x1im); |
105 | p12im.w64 = MADD64(p12im.w64, -x0im, x1re); |
106 | p22re.w64 = MADD64(p22re.w64, x0re, x0re); |
107 | p22re.w64 = MADD64(p22re.w64, x0im, x0im); |
108 | for (n = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6); n != 0; n--) { |
109 | /* 4 input, 3*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */ |
110 | x0re = x1re; |
111 | x0im = x1im; |
112 | x1re = XBuf[0]; |
113 | x1im = XBuf[1]; |
114 | |
115 | p01re.w64 = MADD64(p01re.w64, x1re, x0re); |
116 | p01re.w64 = MADD64(p01re.w64, x1im, x0im); |
117 | p01im.w64 = MADD64(p01im.w64, x0re, x1im); |
118 | p01im.w64 = MADD64(p01im.w64, -x0im, x1re); |
119 | p11re.w64 = MADD64(p11re.w64, x0re, x0re); |
120 | p11re.w64 = MADD64(p11re.w64, x0im, x0im); |
121 | |
122 | XBuf += (2 * 64); |
123 | } |
124 | /* these can be derived by slight changes to account for boundary conditions */ |
125 | p12re.w64 += p01re.w64; |
126 | p12re.w64 = MADD64(p12re.w64, x1re, -x0re); |
127 | p12re.w64 = MADD64(p12re.w64, x1im, -x0im); |
128 | p12im.w64 += p01im.w64; |
129 | p12im.w64 = MADD64(p12im.w64, x0re, -x1im); |
130 | p12im.w64 = MADD64(p12im.w64, x0im, x1re); |
131 | p22re.w64 += p11re.w64; |
132 | p22re.w64 = MADD64(p22re.w64, x0re, -x0re); |
133 | p22re.w64 = MADD64(p22re.w64, x0im, -x0im); |
134 | |
135 | accBuf[0] = p01re.r.lo32; |
136 | accBuf[1] = p01re.r.hi32; |
137 | accBuf[2] = p01im.r.lo32; |
138 | accBuf[3] = p01im.r.hi32; |
139 | accBuf[4] = p11re.r.lo32; |
140 | accBuf[5] = p11re.r.hi32; |
141 | accBuf[6] = p12re.r.lo32; |
142 | accBuf[7] = p12re.r.hi32; |
143 | accBuf[8] = p12im.r.lo32; |
144 | accBuf[9] = p12im.r.hi32; |
145 | accBuf[10] = p22re.r.lo32; |
146 | accBuf[11] = p22re.r.hi32; |
147 | } |
148 | #endif |
149 | |
150 | /************************************************************************************** |
151 | * Function: CalcCovariance1 |
152 | * |
153 | * Description: calculate covariance matrix for p01, p12, p11, p22 (4.6.18.6.2) |
154 | * |
155 | * Inputs: buffer of low-freq samples, starting at time index 0, |
156 | * freq index = patch subband |
157 | * |
158 | * Outputs: complex covariance elements p01re, p01im, p12re, p12im, p11re, p22re |
159 | * (p11im = p22im = 0) |
160 | * format = integer (Q0) * 2^N, with scalefactor N >= 0 |
161 | * |
162 | * Return: scalefactor N |
163 | * |
164 | * Notes: outputs are normalized to have 1 GB (sign in at least top 2 bits) |
165 | **************************************************************************************/ |
166 | static int CalcCovariance1(int *XBuf, int *p01reN, int *p01imN, int *p12reN, int *p12imN, int *p11reN, int *p22reN) |
167 | { |
168 | int accBuf[2 * 6]; |
169 | int n, z, s, loShift, hiShift, gbMask; |
170 | U64 p01re, p01im, p12re, p12im, p11re, p22re; |
171 | |
172 | CVKernel1(XBuf, accBuf); |
173 | p01re.r.lo32 = accBuf[0]; |
174 | p01re.r.hi32 = accBuf[1]; |
175 | p01im.r.lo32 = accBuf[2]; |
176 | p01im.r.hi32 = accBuf[3]; |
177 | p11re.r.lo32 = accBuf[4]; |
178 | p11re.r.hi32 = accBuf[5]; |
179 | p12re.r.lo32 = accBuf[6]; |
180 | p12re.r.hi32 = accBuf[7]; |
181 | p12im.r.lo32 = accBuf[8]; |
182 | p12im.r.hi32 = accBuf[9]; |
183 | p22re.r.lo32 = accBuf[10]; |
184 | p22re.r.hi32 = accBuf[11]; |
185 | |
186 | /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits |
187 | * want to scale them down to integers (32-bit signed, Q0) |
188 | * with scale factor of 2^n, n >= 0 |
189 | * leave 2 GB's for calculating determinant, so take top 30 non-zero bits |
190 | */ |
191 | gbMask = ((p01re.r.hi32) ^(p01re.r.hi32 >> 31)) | ((p01im.r.hi32) ^(p01im.r.hi32 >> 31)); |
192 | gbMask |= ((p12re.r.hi32) ^(p12re.r.hi32 >> 31)) | ((p12im.r.hi32) ^(p12im.r.hi32 >> 31)); |
193 | gbMask |= ((p11re.r.hi32) ^(p11re.r.hi32 >> 31)) | ((p22re.r.hi32) ^(p22re.r.hi32 >> 31)); |
194 | if (gbMask == 0) { |
195 | s = p01re.r.hi32 >> 31; |
196 | gbMask = (p01re.r.lo32 ^ s) - s; |
197 | s = p01im.r.hi32 >> 31; |
198 | gbMask |= (p01im.r.lo32 ^ s) - s; |
199 | s = p12re.r.hi32 >> 31; |
200 | gbMask |= (p12re.r.lo32 ^ s) - s; |
201 | s = p12im.r.hi32 >> 31; |
202 | gbMask |= (p12im.r.lo32 ^ s) - s; |
203 | s = p11re.r.hi32 >> 31; |
204 | gbMask |= (p11re.r.lo32 ^ s) - s; |
205 | s = p22re.r.hi32 >> 31; |
206 | gbMask |= (p22re.r.lo32 ^ s) - s; |
207 | z = 32 + CLZ(gbMask); |
208 | } else { |
209 | gbMask = FASTABS(p01re.r.hi32) | FASTABS(p01im.r.hi32); |
210 | gbMask |= FASTABS(p12re.r.hi32) | FASTABS(p12im.r.hi32); |
211 | gbMask |= FASTABS(p11re.r.hi32) | FASTABS(p22re.r.hi32); |
212 | z = CLZ(gbMask); |
213 | } |
214 | |
215 | n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */ |
216 | if (n <= 30) { |
217 | loShift = (30 - n); |
218 | *p01reN = p01re.r.lo32 << loShift; |
219 | *p01imN = p01im.r.lo32 << loShift; |
220 | *p12reN = p12re.r.lo32 << loShift; |
221 | *p12imN = p12im.r.lo32 << loShift; |
222 | *p11reN = p11re.r.lo32 << loShift; |
223 | *p22reN = p22re.r.lo32 << loShift; |
224 | return -(loShift + 2 * FBITS_OUT_QMFA); |
225 | } else if (n < 32 + 30) { |
226 | loShift = (n - 30); |
227 | hiShift = 32 - loShift; |
228 | *p01reN = (p01re.r.hi32 << hiShift) | (p01re.r.lo32 >> loShift); |
229 | *p01imN = (p01im.r.hi32 << hiShift) | (p01im.r.lo32 >> loShift); |
230 | *p12reN = (p12re.r.hi32 << hiShift) | (p12re.r.lo32 >> loShift); |
231 | *p12imN = (p12im.r.hi32 << hiShift) | (p12im.r.lo32 >> loShift); |
232 | *p11reN = (p11re.r.hi32 << hiShift) | (p11re.r.lo32 >> loShift); |
233 | *p22reN = (p22re.r.hi32 << hiShift) | (p22re.r.lo32 >> loShift); |
234 | return (loShift - 2 * FBITS_OUT_QMFA); |
235 | } else { |
236 | hiShift = n - (32 + 30); |
237 | *p01reN = p01re.r.hi32 >> hiShift; |
238 | *p01imN = p01im.r.hi32 >> hiShift; |
239 | *p12reN = p12re.r.hi32 >> hiShift; |
240 | *p12imN = p12im.r.hi32 >> hiShift; |
241 | *p11reN = p11re.r.hi32 >> hiShift; |
242 | *p22reN = p22re.r.hi32 >> hiShift; |
243 | return (32 - 2 * FBITS_OUT_QMFA - hiShift); |
244 | } |
245 | |
246 | return 0; |
247 | } |
248 | |
249 | /************************************************************************************** |
250 | * Function: CVKernel2 |
251 | * |
252 | * Description: kernel of covariance matrix calculation for p02 |
253 | * |
254 | * Inputs: buffer of low-freq samples, starting at time index = 0, |
255 | * freq index = patch subband |
256 | * |
257 | * Outputs: 64-bit accumulators for p02re, p02im stored in accBuf |
258 | * |
259 | * Return: none |
260 | * |
261 | * Notes: this is carefully written to be efficient on ARM |
262 | * use the assembly code version in sbrcov.s when building for ARM! |
263 | **************************************************************************************/ |
264 | #if 0// (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__)) |
265 | #ifdef __cplusplus |
266 | extern "C" |
267 | #endif |
268 | void CVKernel2(int *XBuf, int *accBuf); |
269 | #else |
270 | void CVKernel2(int *XBuf, int *accBuf) |
271 | { |
272 | U64 p02re, p02im; |
273 | int n, x0re, x0im, x1re, x1im, x2re, x2im; |
274 | |
275 | p02re.w64 = p02im.w64 = 0; |
276 | |
277 | x0re = XBuf[0]; |
278 | x0im = XBuf[1]; |
279 | XBuf += (2 * 64); |
280 | x1re = XBuf[0]; |
281 | x1im = XBuf[1]; |
282 | XBuf += (2 * 64); |
283 | |
284 | for (n = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6); n != 0; n--) { |
285 | /* 6 input, 2*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */ |
286 | x2re = XBuf[0]; |
287 | x2im = XBuf[1]; |
288 | |
289 | p02re.w64 = MADD64(p02re.w64, x2re, x0re); |
290 | p02re.w64 = MADD64(p02re.w64, x2im, x0im); |
291 | p02im.w64 = MADD64(p02im.w64, x0re, x2im); |
292 | p02im.w64 = MADD64(p02im.w64, -x0im, x2re); |
293 | |
294 | x0re = x1re; |
295 | x0im = x1im; |
296 | x1re = x2re; |
297 | x1im = x2im; |
298 | XBuf += (2 * 64); |
299 | } |
300 | |
301 | accBuf[0] = p02re.r.lo32; |
302 | accBuf[1] = p02re.r.hi32; |
303 | accBuf[2] = p02im.r.lo32; |
304 | accBuf[3] = p02im.r.hi32; |
305 | } |
306 | #endif |
307 | |
308 | /************************************************************************************** |
309 | * Function: CalcCovariance2 |
310 | * |
311 | * Description: calculate covariance matrix for p02 (4.6.18.6.2) |
312 | * |
313 | * Inputs: buffer of low-freq samples, starting at time index = 0, |
314 | * freq index = patch subband |
315 | * |
316 | * Outputs: complex covariance element p02re, p02im |
317 | * format = integer (Q0) * 2^N, with scalefactor N >= 0 |
318 | * |
319 | * Return: scalefactor N |
320 | * |
321 | * Notes: outputs are normalized to have 1 GB (sign in at least top 2 bits) |
322 | **************************************************************************************/ |
323 | static int CalcCovariance2(int *XBuf, int *p02reN, int *p02imN) |
324 | { |
325 | U64 p02re, p02im; |
326 | int n, z, s, loShift, hiShift, gbMask; |
327 | int accBuf[2 * 2]; |
328 | |
329 | CVKernel2(XBuf, accBuf); |
330 | p02re.r.lo32 = accBuf[0]; |
331 | p02re.r.hi32 = accBuf[1]; |
332 | p02im.r.lo32 = accBuf[2]; |
333 | p02im.r.hi32 = accBuf[3]; |
334 | |
335 | /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits |
336 | * want to scale them down to integers (32-bit signed, Q0) |
337 | * with scale factor of 2^n, n >= 0 |
338 | * leave 1 GB for calculating determinant, so take top 30 non-zero bits |
339 | */ |
340 | gbMask = ((p02re.r.hi32) ^(p02re.r.hi32 >> 31)) | ((p02im.r.hi32) ^(p02im.r.hi32 >> 31)); |
341 | if (gbMask == 0) { |
342 | s = p02re.r.hi32 >> 31; |
343 | gbMask = (p02re.r.lo32 ^ s) - s; |
344 | s = p02im.r.hi32 >> 31; |
345 | gbMask |= (p02im.r.lo32 ^ s) - s; |
346 | z = 32 + CLZ(gbMask); |
347 | } else { |
348 | gbMask = FASTABS(p02re.r.hi32) | FASTABS(p02im.r.hi32); |
349 | z = CLZ(gbMask); |
350 | } |
351 | n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */ |
352 | |
353 | if (n <= 30) { |
354 | loShift = (30 - n); |
355 | *p02reN = p02re.r.lo32 << loShift; |
356 | *p02imN = p02im.r.lo32 << loShift; |
357 | return -(loShift + 2 * FBITS_OUT_QMFA); |
358 | } else if (n < 32 + 30) { |
359 | loShift = (n - 30); |
360 | hiShift = 32 - loShift; |
361 | *p02reN = (p02re.r.hi32 << hiShift) | (p02re.r.lo32 >> loShift); |
362 | *p02imN = (p02im.r.hi32 << hiShift) | (p02im.r.lo32 >> loShift); |
363 | return (loShift - 2 * FBITS_OUT_QMFA); |
364 | } else { |
365 | hiShift = n - (32 + 30); |
366 | *p02reN = p02re.r.hi32 >> hiShift; |
367 | *p02imN = p02im.r.hi32 >> hiShift; |
368 | return (32 - 2 * FBITS_OUT_QMFA - hiShift); |
369 | } |
370 | |
371 | return 0; |
372 | } |
373 | |
374 | /************************************************************************************** |
375 | * Function: CalcLPCoefs |
376 | * |
377 | * Description: calculate linear prediction coefficients for one subband (4.6.18.6.2) |
378 | * |
379 | * Inputs: buffer of low-freq samples, starting at time index = 0, |
380 | * freq index = patch subband |
381 | * number of guard bits in input sample buffer |
382 | * |
383 | * Outputs: complex LP coefficients a0re, a0im, a1re, a1im, format = Q29 |
384 | * |
385 | * Return: none |
386 | * |
387 | * Notes: output coefficients (a0re, a0im, a1re, a1im) clipped to range (-4, 4) |
388 | * if the comples coefficients have magnitude >= 4.0, they are all |
389 | * set to 0 (see spec) |
390 | **************************************************************************************/ |
391 | static int CalcLPCoefs(int *XBuf, int *a0re, int *a0im, int *a1re, int *a1im, int gb) |
392 | { |
393 | int zFlag, n1, n2, nd, d, dInv, tre, tim; |
394 | int p01re, p01im, p02re, p02im, p12re, p12im, p11re, p22re; |
395 | |
396 | /* pre-scale to avoid overflow - probably never happens in practice (see QMFA) |
397 | * max bit growth per accumulator = 38*2 = 76 mul-adds (X * X) |
398 | * using 64-bit MADD, so if X has n guard bits, X*X has 2n+1 guard bits |
399 | * gain 1 extra sign bit per multiply, so ensure ceil(log2(76/2) / 2) = 3 guard bits on inputs |
400 | */ |
401 | if (gb < 3) { |
402 | nd = 3 - gb; |
403 | for (n1 = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) { |
404 | XBuf[0] >>= nd; |
405 | XBuf[1] >>= nd; |
406 | XBuf += (2 * 64); |
407 | } |
408 | XBuf -= (2 * 64 * (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2)); |
409 | } |
410 | |
411 | /* calculate covariance elements */ |
412 | n1 = CalcCovariance1(XBuf, &p01re, &p01im, &p12re, &p12im, &p11re, &p22re); |
413 | n2 = CalcCovariance2(XBuf, &p02re, &p02im); |
414 | |
415 | /* normalize everything to larger power of 2 scalefactor, call it n1 */ |
416 | if (n1 < n2) { |
417 | nd = MIN(n2 - n1, 31); |
418 | p01re >>= nd; |
419 | p01im >>= nd; |
420 | p12re >>= nd; |
421 | p12im >>= nd; |
422 | p11re >>= nd; |
423 | p22re >>= nd; |
424 | n1 = n2; |
425 | } else if (n1 > n2) { |
426 | nd = MIN(n1 - n2, 31); |
427 | p02re >>= nd; |
428 | p02im >>= nd; |
429 | } |
430 | |
431 | /* calculate determinant of covariance matrix (at least 1 GB in pXX) */ |
432 | d = MULSHIFT32(p12re, p12re) + MULSHIFT32(p12im, p12im); |
433 | d = MULSHIFT32(d, RELAX_COEF) << 1; |
434 | d = MULSHIFT32(p11re, p22re) - d; |
435 | //this assert always failed,no a fatal error when decoder sbr data ????. |
436 | //ASSERT(d >= 0,ERR_AAC_SBR_BITSTREAM); /* should never be < 0 */ |
437 | |
438 | zFlag = 0; |
439 | *a0re = *a0im = 0; |
440 | *a1re = *a1im = 0; |
441 | if (d > 0) { |
442 | /* input = Q31 d = Q(-2*n1 - 32 + nd) = Q31 * 2^(31 + 2*n1 + 32 - nd) |
443 | * inverse = Q29 dInv = Q29 * 2^(-31 - 2*n1 - 32 + nd) = Q(29 + 31 + 2*n1 + 32 - nd) |
444 | * |
445 | * numerator has same Q format as d, since it's sum of normalized squares |
446 | * so num * inverse = Q(-2*n1 - 32) * Q(29 + 31 + 2*n1 + 32 - nd) |
447 | * = Q(29 + 31 - nd), drop low 32 in MULSHIFT32 |
448 | * = Q(29 + 31 - 32 - nd) = Q(28 - nd) |
449 | */ |
450 | nd = CLZ(d) - 1; |
451 | d <<= nd; |
452 | dInv = InvRNormalized(d); |
453 | |
454 | /* 1 GB in pXX */ |
455 | tre = MULSHIFT32(p01re, p12re) - MULSHIFT32(p01im, p12im) - MULSHIFT32(p02re, p11re); |
456 | tre = MULSHIFT32(tre, dInv); |
457 | tim = MULSHIFT32(p01re, p12im) + MULSHIFT32(p01im, p12re) - MULSHIFT32(p02im, p11re); |
458 | tim = MULSHIFT32(tim, dInv); |
459 | |
460 | /* if d is extremely small, just set coefs to 0 (would have poor precision anyway) */ |
461 | if (nd > 28 || (FASTABS(tre) >> (28 - nd)) >= 4 || (FASTABS(tim) >> (28 - nd)) >= 4) { |
462 | zFlag = 1; |
463 | } else { |
464 | *a1re = tre << (FBITS_LPCOEFS - 28 + nd); /* i.e. convert Q(28 - nd) to Q(29) */ |
465 | *a1im = tim << (FBITS_LPCOEFS - 28 + nd); |
466 | } |
467 | } |
468 | |
469 | if (p11re) { |
470 | /* input = Q31 p11re = Q(-n1 + nd) = Q31 * 2^(31 + n1 - nd) |
471 | * inverse = Q29 dInv = Q29 * 2^(-31 - n1 + nd) = Q(29 + 31 + n1 - nd) |
472 | * |
473 | * numerator is Q(-n1 - 3) |
474 | * so num * inverse = Q(-n1 - 3) * Q(29 + 31 + n1 - nd) |
475 | * = Q(29 + 31 - 3 - nd), drop low 32 in MULSHIFT32 |
476 | * = Q(29 + 31 - 3 - 32 - nd) = Q(25 - nd) |
477 | */ |
478 | nd = CLZ(p11re) - 1; /* assume positive */ |
479 | p11re <<= nd; |
480 | dInv = InvRNormalized(p11re); |
481 | |
482 | /* a1re, a1im = Q29, so scaled by (n1 + 3) */ |
483 | tre = (p01re >> 3) + MULSHIFT32(p12re, *a1re) + MULSHIFT32(p12im, *a1im); |
484 | tre = -MULSHIFT32(tre, dInv); |
485 | tim = (p01im >> 3) - MULSHIFT32(p12im, *a1re) + MULSHIFT32(p12re, *a1im); |
486 | tim = -MULSHIFT32(tim, dInv); |
487 | |
488 | if (nd > 25 || (FASTABS(tre) >> (25 - nd)) >= 4 || (FASTABS(tim) >> (25 - nd)) >= 4) { |
489 | zFlag = 1; |
490 | } else { |
491 | *a0re = tre << (FBITS_LPCOEFS - 25 + nd); /* i.e. convert Q(25 - nd) to Q(29) */ |
492 | *a0im = tim << (FBITS_LPCOEFS - 25 + nd); |
493 | } |
494 | } |
495 | |
496 | /* see 4.6.18.6.2 - if magnitude of a0 or a1 >= 4 then a0 = a1 = 0 |
497 | * i.e. a0re < 4, a0im < 4, a1re < 4, a1im < 4 |
498 | * Q29*Q29 = Q26 |
499 | */ |
500 | if (zFlag || MULSHIFT32(*a0re, *a0re) + MULSHIFT32(*a0im, *a0im) >= MAG_16 || MULSHIFT32(*a1re, *a1re) + MULSHIFT32(*a1im, *a1im) >= MAG_16) { |
501 | *a0re = *a0im = 0; |
502 | *a1re = *a1im = 0; |
503 | } |
504 | |
505 | /* no need to clip - we never changed the XBuf data, just used it to calculate a0 and a1 */ |
506 | if (gb < 3) { |
507 | nd = 3 - gb; |
508 | for (n1 = (NUM_TIME_SLOTS * SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) { |
509 | XBuf[0] <<= nd; |
510 | XBuf[1] <<= nd; |
511 | XBuf += (2 * 64); |
512 | } |
513 | } |
514 | return ERR_AAC_NONE; |
515 | } |
516 | |
517 | /************************************************************************************** |
518 | * Function: GenerateHighFreq |
519 | * |
520 | * Description: generate high frequencies with SBR (4.6.18.6) |
521 | * |
522 | * Inputs: initialized PSInfoSBR struct |
523 | * initialized SBRGrid struct for this channel |
524 | * initialized SBRFreq struct for this SCE/CPE block |
525 | * initialized SBRChan struct for this channel |
526 | * index of current channel (0 for SCE, 0 or 1 for CPE) |
527 | * |
528 | * Outputs: new high frequency samples starting at frequency kStart |
529 | * |
530 | * Return: none |
531 | **************************************************************************************/ |
532 | int GenerateHighFreq(PSInfoSBR *psi, SBRGrid *sbrGrid, SBRFreq *sbrFreq, SBRChan *sbrChan, int ch) |
533 | { |
534 | int band, newBW, c, t, gb, gbMask, gbIdx; |
535 | int currPatch, p, x, k, g, i, iStart, iEnd, bw, bwsq; |
536 | int a0re, a0im, a1re, a1im; |
537 | int x1re, x1im, x2re, x2im; |
538 | int ACCre, ACCim; |
539 | int *XBufLo, *XBufHi; |
540 | int err = ERR_AAC_NONE; |
541 | /* calculate array of chirp factors */ |
542 | for (band = 0; band < sbrFreq->numNoiseFloorBands; band++) { |
543 | c = sbrChan->chirpFact[band]; /* previous (bwArray') */ |
544 | newBW = newBWTab[sbrChan->invfMode[0][band]][sbrChan->invfMode[1][band]]; |
545 | |
546 | /* weighted average of new and old (can't overflow - total gain = 1.0) */ |
547 | if (newBW < c) { |
548 | t = MULSHIFT32(newBW, 0x60000000) + MULSHIFT32(0x20000000, c); /* new is smaller: 0.75*new + 0.25*old */ |
549 | } else { |
550 | t = MULSHIFT32(newBW, 0x74000000) + MULSHIFT32(0x0c000000, c); /* new is larger: 0.90625*new + 0.09375*old */ |
551 | } |
552 | t <<= 1; |
553 | |
554 | if (t < 0x02000000) { /* below 0.015625, clip to 0 */ |
555 | t = 0; |
556 | } |
557 | if (t > 0x7f800000) { /* clip to 0.99609375 */ |
558 | t = 0x7f800000; |
559 | } |
560 | |
561 | /* save curr as prev for next time */ |
562 | sbrChan->chirpFact[band] = t; |
563 | sbrChan->invfMode[0][band] = sbrChan->invfMode[1][band]; |
564 | } |
565 | |
566 | iStart = sbrGrid->envTimeBorder[0] + HF_ADJ; |
567 | iEnd = sbrGrid->envTimeBorder[sbrGrid->numEnv] + HF_ADJ; |
568 | |
569 | /* generate new high freqs from low freqs, patches, and chirp factors */ |
570 | k = sbrFreq->kStart; |
571 | g = 0; |
572 | bw = sbrChan->chirpFact[g]; |
573 | bwsq = MULSHIFT32(bw, bw) << 1; |
574 | |
575 | gbMask = (sbrChan->gbMask[0] | sbrChan->gbMask[1]); /* older 32 | newer 8 */ |
576 | gb = CLZ(gbMask) - 1; |
577 | |
578 | for (currPatch = 0; currPatch < sbrFreq->numPatches; currPatch++) { |
579 | for (x = 0; x < sbrFreq->patchNumSubbands[currPatch]; x++) { |
580 | /* map k to corresponding noise floor band */ |
581 | if (k >= sbrFreq->freqNoise[g + 1]) { |
582 | g++; |
583 | bw = sbrChan->chirpFact[g]; /* Q31 */ |
584 | bwsq = MULSHIFT32(bw, bw) << 1; /* Q31 */ |
585 | } |
586 | |
587 | p = sbrFreq->patchStartSubband[currPatch] + x; /* low QMF band */ |
588 | XBufHi = psi->XBuf[iStart][k]; |
589 | if (bw) { |
590 | err = CalcLPCoefs(psi->XBuf[0][p], &a0re, &a0im, &a1re, &a1im, gb); |
591 | if (err) { |
592 | return err; |
593 | } |
594 | a0re = MULSHIFT32(bw, a0re); /* Q31 * Q29 = Q28 */ |
595 | a0im = MULSHIFT32(bw, a0im); |
596 | a1re = MULSHIFT32(bwsq, a1re); |
597 | a1im = MULSHIFT32(bwsq, a1im); |
598 | |
599 | XBufLo = psi->XBuf[iStart - 2][p]; |
600 | |
601 | x2re = XBufLo[0]; /* RE{XBuf[n-2]} */ |
602 | x2im = XBufLo[1]; /* IM{XBuf[n-2]} */ |
603 | XBufLo += (64 * 2); |
604 | |
605 | x1re = XBufLo[0]; /* RE{XBuf[n-1]} */ |
606 | x1im = XBufLo[1]; /* IM{XBuf[n-1]} */ |
607 | XBufLo += (64 * 2); |
608 | |
609 | for (i = iStart; i < iEnd; i++) { |
610 | /* a0re/im, a1re/im are Q28 with at least 1 GB, |
611 | * so the summing for AACre/im is fine (1 GB in, plus 1 from MULSHIFT32) |
612 | */ |
613 | ACCre = MULSHIFT32(x2re, a1re) - MULSHIFT32(x2im, a1im); |
614 | ACCim = MULSHIFT32(x2re, a1im) + MULSHIFT32(x2im, a1re); |
615 | x2re = x1re; |
616 | x2im = x1im; |
617 | |
618 | ACCre += MULSHIFT32(x1re, a0re) - MULSHIFT32(x1im, a0im); |
619 | ACCim += MULSHIFT32(x1re, a0im) + MULSHIFT32(x1im, a0re); |
620 | x1re = XBufLo[0]; /* RE{XBuf[n]} */ |
621 | x1im = XBufLo[1]; /* IM{XBuf[n]} */ |
622 | XBufLo += (64 * 2); |
623 | |
624 | /* lost 4 fbits when scaling by a0re/im, a1re/im (Q28) */ |
625 | CLIP_2N_SHIFT30(ACCre, 4); |
626 | ACCre += x1re; |
627 | CLIP_2N_SHIFT30(ACCim, 4); |
628 | ACCim += x1im; |
629 | |
630 | XBufHi[0] = ACCre; |
631 | XBufHi[1] = ACCim; |
632 | XBufHi += (64 * 2); |
633 | |
634 | /* update guard bit masks */ |
635 | gbMask = FASTABS(ACCre); |
636 | gbMask |= FASTABS(ACCim); |
637 | gbIdx = (i >> 5) & 0x01; /* 0 if i < 32, 1 if i >= 32 */ |
638 | sbrChan->gbMask[gbIdx] |= gbMask; |
639 | } |
640 | } else { |
641 | XBufLo = (int *)psi->XBuf[iStart][p]; |
642 | for (i = iStart; i < iEnd; i++) { |
643 | XBufHi[0] = XBufLo[0]; |
644 | XBufHi[1] = XBufLo[1]; |
645 | XBufLo += (64 * 2); |
646 | XBufHi += (64 * 2); |
647 | } |
648 | } |
649 | k++; /* high QMF band */ |
650 | } |
651 | } |
652 | return ERR_AAC_NONE; |
653 | } |
654 | |
655 | |
656 |