blob: 23845937c6bdd09c0e3255372f14e7d1a87c8096
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: pns.c,v 1.2 2005/03/10 17:01:56 jrecker Exp $ |
3 | * |
4 | * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. |
5 | * |
6 | * The contents of this file, and the files included with this file, |
7 | * are subject to the current version of the RealNetworks Public |
8 | * Source License (the "RPSL") available at |
9 | * http://www.helixcommunity.org/content/rpsl unless you have licensed |
10 | * the file under the current version of the RealNetworks Community |
11 | * Source License (the "RCSL") available at |
12 | * http://www.helixcommunity.org/content/rcsl, in which case the RCSL |
13 | * will apply. You may also obtain the license terms directly from |
14 | * RealNetworks. You may not use this file except in compliance with |
15 | * the RPSL or, if you have a valid RCSL with RealNetworks applicable |
16 | * to this file, the RCSL. Please see the applicable RPSL or RCSL for |
17 | * the rights, obligations and limitations governing use of the |
18 | * contents of the file. |
19 | * |
20 | * This file is part of the Helix DNA Technology. RealNetworks is the |
21 | * developer of the Original Code and owns the copyrights in the |
22 | * portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed |
25 | * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY |
26 | * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS |
27 | * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES |
28 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET |
29 | * ENJOYMENT OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * http://www.helixcommunity.org/content/tck |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point HE-AAC decoder |
40 | * Jon Recker (jrecker@real.com) |
41 | * February 2005 |
42 | * |
43 | * pns.c - perceptual noise substitution |
44 | **************************************************************************************/ |
45 | |
46 | #include "coder.h" |
47 | #ifndef __MW__ |
48 | #include "assembly.h" |
49 | #else |
50 | #include "assembly_mw.h" |
51 | #endif /* __MW__ */ |
52 | |
53 | /************************************************************************************** |
54 | * Function: Get32BitVal |
55 | * |
56 | * Description: generate 32-bit unsigned random number |
57 | * |
58 | * Inputs: last number calculated (seed, first time through) |
59 | * |
60 | * Outputs: new number, saved in *last |
61 | * |
62 | * Return: 32-bit number, uniformly distributed between [0, 2^32) |
63 | * |
64 | * Notes: uses simple linear congruential generator |
65 | **************************************************************************************/ |
66 | static unsigned int Get32BitVal(unsigned int *last) |
67 | { |
68 | unsigned int r = *last; |
69 | |
70 | /* use same coefs as MPEG reference code (classic LCG) |
71 | * use unsigned multiply to force reliable wraparound behavior in C (mod 2^32) |
72 | */ |
73 | r = (1664525U * r) + 1013904223U; |
74 | *last = r; |
75 | |
76 | return r; |
77 | } |
78 | |
79 | /* pow(2, i/4.0) for i = [0,1,2,3], format = Q30 */ |
80 | static const int pow14[4] = { |
81 | 0x40000000, 0x4c1bf829, 0x5a82799a, 0x6ba27e65 |
82 | }; |
83 | |
84 | #define NUM_ITER_INVSQRT 4 |
85 | |
86 | #define X0_COEF_2 0xc0000000 /* Q29: -2.0 */ |
87 | #define X0_OFF_2 0x60000000 /* Q29: 3.0 */ |
88 | #define Q26_3 0x0c000000 /* Q26: 3.0 */ |
89 | |
90 | /************************************************************************************** |
91 | * Function: InvRootR |
92 | * |
93 | * Description: use Newton's method to solve for x = 1/sqrt(r) |
94 | * |
95 | * Inputs: r in Q30 format, range = [0.25, 1] (normalize inputs to this range) |
96 | * |
97 | * Outputs: none |
98 | * |
99 | * Return: x = Q29, range = (1, 2) |
100 | * |
101 | * Notes: guaranteed to converge and not overflow for any r in this range |
102 | * |
103 | * xn+1 = xn - f(xn)/f'(xn) |
104 | * f(x) = 1/sqrt(r) - x = 0 (find root) |
105 | * = 1/x^2 - r |
106 | * f'(x) = -2/x^3 |
107 | * |
108 | * so xn+1 = xn/2 * (3 - r*xn^2) |
109 | * |
110 | * NUM_ITER_INVSQRT = 3, maxDiff = 1.3747e-02 |
111 | * NUM_ITER_INVSQRT = 4, maxDiff = 3.9832e-04 |
112 | **************************************************************************************/ |
113 | static int InvRootR(int r) |
114 | { |
115 | int i, xn, t; |
116 | |
117 | /* use linear equation for initial guess |
118 | * x0 = -2*r + 3 (so x0 always >= correct answer in range [0.25, 1)) |
119 | * xn = Q29 (at every step) |
120 | */ |
121 | xn = (MULSHIFT32(r, X0_COEF_2) << 2) + X0_OFF_2; |
122 | |
123 | for (i = 0; i < NUM_ITER_INVSQRT; i++) { |
124 | t = MULSHIFT32(xn, xn); /* Q26 = Q29*Q29 */ |
125 | t = Q26_3 - (MULSHIFT32(r, t) << 2); /* Q26 = Q26 - (Q31*Q26 << 1) */ |
126 | xn = MULSHIFT32(xn, t) << (6 - 1); /* Q29 = (Q29*Q26 << 6), and -1 for division by 2 */ |
127 | } |
128 | |
129 | /* clip to range (1.0, 2.0) |
130 | * (because of rounding, this can converge to xn slightly > 2.0 when r is near 0.25) |
131 | */ |
132 | if (xn >> 30) { |
133 | xn = (1 << 30) - 1; |
134 | } |
135 | |
136 | return xn; |
137 | } |
138 | |
139 | /************************************************************************************** |
140 | * Function: ScaleNoiseVector |
141 | * |
142 | * Description: apply scaling to vector of noise coefficients for one scalefactor band |
143 | * |
144 | * Inputs: unscaled coefficients |
145 | * number of coefficients in vector (one scalefactor band of coefs) |
146 | * scalefactor for this band (i.e. noise energy) |
147 | * |
148 | * Outputs: nVals coefficients in Q(FBITS_OUT_DQ_OFF) |
149 | * |
150 | * Return: guard bit mask (OR of abs value of all noise coefs) |
151 | **************************************************************************************/ |
152 | static int ScaleNoiseVector(int *coef, int nVals, int sf) |
153 | { |
154 | int i, c, spec, energy, sq, scalef, scalei, invSqrtEnergy, z, gbMask; |
155 | |
156 | energy = 0; |
157 | for (i = 0; i < nVals; i++) { |
158 | spec = coef[i]; |
159 | |
160 | /* max nVals = max SFB width = 96, so energy can gain < 2^7 bits in accumulation */ |
161 | sq = (spec * spec) >> 8; /* spec*spec range = (-2^30, 2^30) */ |
162 | energy += sq; |
163 | } |
164 | |
165 | /* unless nVals == 1 (or the number generator is broken...), this should not happen */ |
166 | if (energy == 0) { |
167 | return 0; /* coef[i] must = 0 for i = [0, nVals-1], so gbMask = 0 */ |
168 | } |
169 | |
170 | /* pow(2, sf/4) * pow(2, FBITS_OUT_DQ_OFF) */ |
171 | scalef = pow14[sf & 0x3]; |
172 | scalei = (sf >> 2) + FBITS_OUT_DQ_OFF; |
173 | |
174 | /* energy has implied factor of 2^-8 since we shifted the accumulator |
175 | * normalize energy to range [0.25, 1.0), calculate 1/sqrt(1), and denormalize |
176 | * i.e. divide input by 2^(30-z) and convert to Q30 |
177 | * output of 1/sqrt(i) now has extra factor of 2^((30-z)/2) |
178 | * for energy > 0, z is an even number between 0 and 28 |
179 | * final scaling of invSqrtEnergy: |
180 | * 2^(15 - z/2) to compensate for implicit 2^(30-z) factor in input |
181 | * +4 to compensate for implicit 2^-8 factor in input |
182 | */ |
183 | z = CLZ(energy) - 2; /* energy has at least 2 leading zeros (see acc loop) */ |
184 | z &= 0xfffffffe; /* force even */ |
185 | invSqrtEnergy = InvRootR(energy << z); /* energy << z must be in range [0x10000000, 0x40000000] */ |
186 | scalei -= (15 - z / 2 + 4); /* nInt = 1/sqrt(energy) in Q29 */ |
187 | |
188 | /* normalize for final scaling */ |
189 | z = CLZ(invSqrtEnergy) - 1; |
190 | invSqrtEnergy <<= z; |
191 | scalei -= (z - 3 - 2); /* -2 for scalef, z-3 for invSqrtEnergy */ |
192 | scalef = MULSHIFT32(scalef, invSqrtEnergy); /* scalef (input) = Q30, invSqrtEnergy = Q29 * 2^z */ |
193 | gbMask = 0; |
194 | |
195 | if (scalei < 0) { |
196 | scalei = -scalei; |
197 | if (scalei > 31) { |
198 | scalei = 31; |
199 | } |
200 | for (i = 0; i < nVals; i++) { |
201 | c = MULSHIFT32(coef[i], scalef) >> scalei; |
202 | gbMask |= FASTABS(c); |
203 | coef[i] = c; |
204 | } |
205 | } else { |
206 | /* for scalei <= 16, no clipping possible (coef[i] is < 2^15 before scaling) |
207 | * for scalei > 16, just saturate exponent (rare) |
208 | * scalef is close to full-scale (since we normalized invSqrtEnergy) |
209 | * remember, we are just producing noise here |
210 | */ |
211 | if (scalei > 16) { |
212 | scalei = 16; |
213 | } |
214 | for (i = 0; i < nVals; i++) { |
215 | c = MULSHIFT32(coef[i] << scalei, scalef); |
216 | coef[i] = c; |
217 | gbMask |= FASTABS(c); |
218 | } |
219 | } |
220 | |
221 | return gbMask; |
222 | } |
223 | |
224 | /************************************************************************************** |
225 | * Function: GenerateNoiseVector |
226 | * |
227 | * Description: create vector of noise coefficients for one scalefactor band |
228 | * |
229 | * Inputs: seed for number generator |
230 | * number of coefficients to generate |
231 | * |
232 | * Outputs: buffer of nVals coefficients, range = [-2^15, 2^15) |
233 | * updated seed for number generator |
234 | * |
235 | * Return: none |
236 | **************************************************************************************/ |
237 | static void GenerateNoiseVector(int *coef, int *last, int nVals) |
238 | { |
239 | int i; |
240 | |
241 | for (i = 0; i < nVals; i++) { |
242 | coef[i] = ((signed int)Get32BitVal((unsigned int *)last)) >> 16; |
243 | } |
244 | } |
245 | |
246 | /************************************************************************************** |
247 | * Function: CopyNoiseVector |
248 | * |
249 | * Description: copy vector of noise coefficients for one scalefactor band from L to R |
250 | * |
251 | * Inputs: buffer of left coefficients |
252 | * number of coefficients to copy |
253 | * |
254 | * Outputs: buffer of right coefficients |
255 | * |
256 | * Return: none |
257 | **************************************************************************************/ |
258 | static void CopyNoiseVector(int *coefL, int *coefR, int nVals) |
259 | { |
260 | int i; |
261 | |
262 | for (i = 0; i < nVals; i++) { |
263 | coefR[i] = coefL[i]; |
264 | } |
265 | } |
266 | |
267 | /************************************************************************************** |
268 | * Function: PNS |
269 | * |
270 | * Description: apply perceptual noise substitution, if enabled (MPEG-4 only) |
271 | * |
272 | * Inputs: valid AACDecInfo struct |
273 | * index of current channel |
274 | * |
275 | * Outputs: shaped noise in scalefactor bands where PNS is active |
276 | * updated minimum guard bit count for this channel |
277 | * |
278 | * Return: 0 if successful, -1 if error |
279 | **************************************************************************************/ |
280 | int PNS(AACDecInfo *aacDecInfo, int ch) |
281 | { |
282 | int gp, sfb, win, width, nSamps, gb, gbMask; |
283 | int *coef; |
284 | const short *sfbTab; |
285 | unsigned char *sfbCodeBook; |
286 | short *scaleFactors; |
287 | int msMaskOffset, checkCorr, genNew; |
288 | unsigned char msMask; |
289 | unsigned char *msMaskPtr; |
290 | PSInfoBase *psi; |
291 | ICSInfo *icsInfo; |
292 | |
293 | /* validate pointers */ |
294 | if (!aacDecInfo || !aacDecInfo->psInfoBase) { |
295 | return -1; |
296 | } |
297 | psi = (PSInfoBase *)(aacDecInfo->psInfoBase); |
298 | icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]); |
299 | |
300 | if (!psi->pnsUsed[ch]) { |
301 | return 0; |
302 | } |
303 | |
304 | if (icsInfo->winSequence == 2) { |
305 | sfbTab = sfBandTabShort + sfBandTabShortOffset[psi->sampRateIdx]; |
306 | nSamps = NSAMPS_SHORT; |
307 | } else { |
308 | sfbTab = sfBandTabLong + sfBandTabLongOffset[psi->sampRateIdx]; |
309 | nSamps = NSAMPS_LONG; |
310 | } |
311 | coef = psi->coef[ch]; |
312 | sfbCodeBook = psi->sfbCodeBook[ch]; |
313 | scaleFactors = psi->scaleFactors[ch]; |
314 | checkCorr = (aacDecInfo->currBlockID == AAC_ID_CPE && psi->commonWin == 1 ? 1 : 0); |
315 | |
316 | gbMask = 0; |
317 | for (gp = 0; gp < icsInfo->numWinGroup; gp++) { |
318 | for (win = 0; win < icsInfo->winGroupLen[gp]; win++) { |
319 | msMaskPtr = psi->msMaskBits + ((gp * icsInfo->maxSFB) >> 3); |
320 | msMaskOffset = ((gp * icsInfo->maxSFB) & 0x07); |
321 | msMask = (*msMaskPtr++) >> msMaskOffset; |
322 | |
323 | for (sfb = 0; sfb < icsInfo->maxSFB; sfb++) { |
324 | width = sfbTab[sfb + 1] - sfbTab[sfb]; |
325 | if (sfbCodeBook[sfb] == 13) { |
326 | if (ch == 0) { |
327 | /* generate new vector, copy into ch 1 if it's possible that the channels will be correlated |
328 | * if ch 1 has PNS enabled for this SFB but it's uncorrelated (i.e. ms_used == 0), |
329 | * the copied values will be overwritten when we process ch 1 |
330 | */ |
331 | GenerateNoiseVector(coef, &psi->pnsLastVal, width); |
332 | if (checkCorr && psi->sfbCodeBook[1][gp * icsInfo->maxSFB + sfb] == 13) { |
333 | CopyNoiseVector(coef, psi->coef[1] + (coef - psi->coef[0]), width); |
334 | } |
335 | } else { |
336 | /* generate new vector if no correlation between channels */ |
337 | genNew = 1; |
338 | if (checkCorr && psi->sfbCodeBook[0][gp * icsInfo->maxSFB + sfb] == 13) { |
339 | if ((psi->msMaskPresent == 1 && (msMask & 0x01)) || psi->msMaskPresent == 2) { |
340 | genNew = 0; |
341 | } |
342 | } |
343 | if (genNew) { |
344 | GenerateNoiseVector(coef, &psi->pnsLastVal, width); |
345 | } |
346 | } |
347 | gbMask |= ScaleNoiseVector(coef, width, psi->scaleFactors[ch][gp * icsInfo->maxSFB + sfb]); |
348 | } |
349 | coef += width; |
350 | |
351 | /* get next mask bit (should be branchless on ARM) */ |
352 | msMask >>= 1; |
353 | if (++msMaskOffset == 8) { |
354 | msMask = *msMaskPtr++; |
355 | msMaskOffset = 0; |
356 | } |
357 | } |
358 | coef += (nSamps - sfbTab[icsInfo->maxSFB]); |
359 | } |
360 | sfbCodeBook += icsInfo->maxSFB; |
361 | scaleFactors += icsInfo->maxSFB; |
362 | } |
363 | |
364 | /* update guard bit count if necessary */ |
365 | gb = CLZ(gbMask) - 1; |
366 | if (psi->gbCurrent[ch] > gb) { |
367 | psi->gbCurrent[ch] = gb; |
368 | } |
369 | |
370 | return 0; |
371 | } |
372 |