blob: 652818512e89330d7ba58ddb33db0affe37a833c
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: sqvh.c,v 1.6 2005/04/27 19:20:50 hubbe Exp $ |
3 | * |
4 | * REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM |
5 | * Portions Copyright (c) 1995-2002 RealNetworks, Inc. |
6 | * All Rights Reserved. |
7 | * |
8 | * The contents of this file, and the files included with this file, |
9 | * are subject to the current version of the Real Format Source Code |
10 | * Porting and Optimization License, available at |
11 | * https://helixcommunity.org/2005/license/realformatsource (unless |
12 | * RealNetworks otherwise expressly agrees in writing that you are |
13 | * subject to a different license). You may also obtain the license |
14 | * terms directly from RealNetworks. You may not use this file except |
15 | * in compliance with the Real Format Source Code Porting and |
16 | * Optimization License. There are no redistribution rights for the |
17 | * source code of this file. Please see the Real Format Source Code |
18 | * Porting and Optimization License for the rights, obligations and |
19 | * limitations governing use of the contents of the file. |
20 | * |
21 | * RealNetworks is the developer of the Original Code and owns the |
22 | * copyrights in the portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed and |
25 | * made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, |
26 | * EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL |
27 | * SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF |
28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT |
29 | * OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * https://rarvcode-tck.helixcommunity.org |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point RealAudio 8 decoder |
40 | * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com) |
41 | * October 2003 |
42 | * |
43 | * sqvh.c - Huffman decoding, unpacking, and dequantization of MLT coefficients |
44 | **************************************************************************************/ |
45 | |
46 | #include "coder.h" |
47 | #include "assembly.h" |
48 | |
49 | static const int vpr_tab[7] = {10, 10, 10, 5, 5, 4, 4}; /* number of vectors in one region (20 bins) */ |
50 | |
51 | /************************************************************************************** |
52 | * Function: DecodeHuffmanVectors |
53 | * |
54 | * Description: decode vector Huffman symbols from bitstream for one region |
55 | * |
56 | * Inputs: pointer to initialized BitStreamInfo struct |
57 | * cat for this region |
58 | * buffer to receive decoded symbols |
59 | * number of bits remaining in bitstream |
60 | * |
61 | * Outputs: NBINS decoded symbols |
62 | * |
63 | * Return: number of bits left in bitstream, -1 if ran out of bits |
64 | * |
65 | * Notes: the alphabet of decoded symbols has been remapped from the |
66 | * floating-pt reference, to replace the Horner-method polynomial |
67 | * evaluation with shift-and-mask (i.e. change of basis to replace |
68 | * 1/(kmax+1) with 1/(2^n) for unpacking vectorized codes) |
69 | * uses byte-cache Huffman decoder and conditional logic which is |
70 | * ARM-friendly |
71 | * this has been carefully arranged to compile well with ADS, so if you |
72 | * change anything, make sure you study the assembly code carefully! |
73 | * consider fusing with loop over all nRegions (rather than function call |
74 | * per loop) to avoid refilling cache each call |
75 | **************************************************************************************/ |
76 | static int DecodeHuffmanVectors(BitStreamInfo *bsi, int cat, int *k, int bitsLeft) |
77 | { |
78 | int nBits, startBits, vpr, v, vPacked; |
79 | int off, key, cachedBits, padBits; |
80 | unsigned int cache, count, t, total, cw; |
81 | unsigned char *buf, *countCurr; |
82 | const unsigned char *countBase; |
83 | const unsigned short *map; |
84 | |
85 | buf = bsi->buf; |
86 | off = bsi->off; |
87 | key = bsi->key; |
88 | |
89 | countBase = huffTabVectorInfo[cat].count; |
90 | map = huffTabVector + huffTabVectorInfo[cat].offset; |
91 | startBits = bitsLeft; |
92 | vpr = vpr_tab[cat]; |
93 | |
94 | /* initially fill cache with any partial byte */ |
95 | cache = 0; |
96 | cachedBits = (8 - off) & 0x07; |
97 | if (cachedBits) { |
98 | cache = (unsigned int)((*buf++) ^ pkkey[key++]) << (32 - cachedBits); |
99 | } |
100 | key &= 0x03; |
101 | bitsLeft -= cachedBits; |
102 | |
103 | padBits = 0; |
104 | while (vpr > 0) { |
105 | /* refill cache - assumes cachedBits <= 24 */ |
106 | if (bitsLeft >= 8) { |
107 | /* load 1 new byte into left-justified cache */ |
108 | cache |= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits); |
109 | key &= 0x03; |
110 | cachedBits += 8; |
111 | bitsLeft -= 8; |
112 | } else { |
113 | /* last time through, pad cache with zeros and drain cache */ |
114 | if (cachedBits + bitsLeft <= 0) { |
115 | return -1; |
116 | } |
117 | if (bitsLeft > 0) { |
118 | cache |= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits); |
119 | } |
120 | key &= 0x03; |
121 | cachedBits += bitsLeft; |
122 | bitsLeft = 0; |
123 | |
124 | cache &= (signed int)0x80000000 >> (cachedBits - 1); |
125 | padBits = 21; |
126 | cachedBits += padBits; /* okay if this is > 32 (0's automatically shifted in from right) */ |
127 | } |
128 | |
129 | /* max bits per Huffman symbol = 16, max sign bits = 5, so 21 bits will do */ |
130 | while (vpr > 0 && cachedBits >= 21) { |
131 | /* left-justify to bit 16 */ |
132 | cw = cache >> (32 - 17); |
133 | countCurr = (unsigned char *)countBase; |
134 | count = *countCurr++; |
135 | t = count << 16; |
136 | total = 0; |
137 | |
138 | while (cw >= t) { |
139 | cw -= t; |
140 | cw <<= 1; |
141 | total += count; |
142 | count = *countCurr++; |
143 | t = count << 16; |
144 | } |
145 | |
146 | nBits = countCurr - countBase; |
147 | cachedBits -= nBits; |
148 | cache <<= nBits; |
149 | |
150 | vPacked = (int)map[total + (cw >> 16)]; |
151 | |
152 | /* sign bits */ |
153 | nBits = (vPacked >> 12) & 0x0f; |
154 | cachedBits -= nBits; |
155 | if (cachedBits < padBits) { |
156 | return -1; |
157 | } |
158 | |
159 | /* should create good asm with conditional instruction execution on ARM |
160 | * format of vPacked: |
161 | * bits 12-15 = number of sign bits |
162 | * 4 bits/coef for cat = 0, 1, 2 (bits 0-3, 4-7) |
163 | * 3 bits/coef for cat = 3, 4 (bits 0-2, 3-5, 6-8. 9-11) |
164 | * 2 bits/coef for cat = 5, 6 (bits 0-1, 2-3, 4-5. 6-7, 8-9) |
165 | */ |
166 | if (cat < 3) { |
167 | v = (vPacked >> 0) & 0x0f; |
168 | if (v) { |
169 | v |= (cache & 0x80000000); |
170 | cache <<= 1; |
171 | } *k++ = v; |
172 | v = (vPacked >> 4) & 0x0f; |
173 | if (v) { |
174 | v |= (cache & 0x80000000); |
175 | cache <<= 1; |
176 | } *k++ = v; |
177 | } else if (cat < 5) { |
178 | v = (vPacked >> 0) & 0x07; |
179 | if (v) { |
180 | v |= (cache & 0x80000000); |
181 | cache <<= 1; |
182 | } *k++ = v; |
183 | v = (vPacked >> 3) & 0x07; |
184 | if (v) { |
185 | v |= (cache & 0x80000000); |
186 | cache <<= 1; |
187 | } *k++ = v; |
188 | v = (vPacked >> 6) & 0x07; |
189 | if (v) { |
190 | v |= (cache & 0x80000000); |
191 | cache <<= 1; |
192 | } *k++ = v; |
193 | v = (vPacked >> 9) & 0x07; |
194 | if (v) { |
195 | v |= (cache & 0x80000000); |
196 | cache <<= 1; |
197 | } *k++ = v; |
198 | } else { |
199 | v = (vPacked >> 0) & 0x03; |
200 | if (v) { |
201 | v |= (cache & 0x80000000); |
202 | cache <<= 1; |
203 | } *k++ = v; |
204 | v = (vPacked >> 2) & 0x03; |
205 | if (v) { |
206 | v |= (cache & 0x80000000); |
207 | cache <<= 1; |
208 | } *k++ = v; |
209 | v = (vPacked >> 4) & 0x03; |
210 | if (v) { |
211 | v |= (cache & 0x80000000); |
212 | cache <<= 1; |
213 | } *k++ = v; |
214 | v = (vPacked >> 6) & 0x03; |
215 | if (v) { |
216 | v |= (cache & 0x80000000); |
217 | cache <<= 1; |
218 | } *k++ = v; |
219 | v = (vPacked >> 8) & 0x03; |
220 | if (v) { |
221 | v |= (cache & 0x80000000); |
222 | cache <<= 1; |
223 | } *k++ = v; |
224 | } |
225 | vpr--; |
226 | } |
227 | } |
228 | |
229 | return startBits - bitsLeft - (cachedBits - padBits); |
230 | } |
231 | |
232 | /* Q28 - first column (0's) are dither |
233 | * rows ..... cat = [0, 7] |
234 | * columns .. k = [0, kmax_tab[cat]] |
235 | * qcOffset[cat] is starting index for k = 0 |
236 | * |
237 | * qcTab[0] scaled by 1.0 |
238 | * qcTab[1] scaled by sqrt(2) |
239 | * |
240 | * lower cat = shorter vectors = finer Q = higher expected bits |
241 | * higher cat = longer vectors = coarser Q = lower expected bits |
242 | * |
243 | * kmax_tab[8] = { 13, 9, 6, 4, 3, 2, 1, 0 }; |
244 | */ |
245 | static const int qcTab[2][14 + 10 + 7 + 5 + 4 + 3 + 2 + 1] = { |
246 | { |
247 | /* quant_centroid_tab * 1.0 */ |
248 | 0x00000000, 0x0645a1c8, 0x0c2d0e50, 0x11eb8520, 0x17a1cac0, 0x1d4fdf40, 0x22ed9180, 0x28a7ef80, 0x2e49ba40, 0x33eb8500, 0x39916880, 0x3f126e80, 0x449ba600, 0x4b958100, |
249 | 0x00000000, 0x08b43960, 0x10f5c280, 0x19020c40, 0x21168740, 0x2922d100, 0x3126e980, 0x38fdf3c0, 0x411eb880, 0x49eb8500, |
250 | 0x00000000, 0x0bef9db0, 0x176c8b40, 0x22e147c0, 0x2e1cac00, 0x39581080, 0x450e5600, |
251 | 0x00000000, 0x10189380, 0x20000000, 0x2fe35400, 0x3fc28f40, |
252 | 0x00000000, 0x1522d0e0, 0x2b3f7d00, 0x3fba5e40, |
253 | 0x02d413cc, 0x1a831260, 0x37db22c0, |
254 | 0x04000000, 0x1f6c8b40, |
255 | 0x0b504f33 |
256 | }, |
257 | { |
258 | /* quant_centroid_tab * sqrt(2) */ |
259 | 0x00000000, 0x08deb4dc, 0x11382ec8, 0x1957bba7, 0x216bb2a8, 0x297413e1, 0x31654988, 0x397f0b29, 0x41760b9f, 0x496d0c14, 0x5169d7b1, 0x593280ab, 0x6106bff4, 0x6ae454b2, |
260 | 0x00000000, 0x0c4f2f4d, 0x17fc2cf0, 0x235ddce6, 0x2ecb22d2, 0x3a2cd2c9, 0x4582ecca, 0x50994ee6, 0x5c17f595, 0x6889e5e1, |
261 | 0x00000000, 0x10e14b25, 0x212064ce, 0x3153e8c5, 0x413653ba, 0x5118bf09, 0x61a8f143, |
262 | 0x00000000, 0x16c35fec, 0x2d413ccc, 0x43b94edf, 0x5a2b95ca, |
263 | 0x00000000, 0x1de40c9b, 0x3d2972e9, 0x5a20002f, |
264 | 0x04000000, 0x257e5e73, 0x4efe081d, |
265 | 0x05a82799, 0x2c70b401, |
266 | 0x10000000 |
267 | }, |
268 | }; |
269 | |
270 | static const int qcOffset[8] = { 0, 14, 24, 31, 36, 40, 43, 45 }; |
271 | |
272 | /************************************************************************************** |
273 | * Function: ScalarDequant |
274 | * |
275 | * Description: dequantize transform coefficients (in-place) |
276 | * |
277 | * Inputs: buffer of decoded/un-vectorized symbols with sign in bit 31 |
278 | * cat (quantizer) for this region |
279 | * pointer to dequantizer table (either scaled by 1.0 or sqrt(2.0)) |
280 | * right-shift amount (i.e. 2^-n for normalizing to correct Q format) |
281 | * pointer to current contents of dither lfsr |
282 | * current guard bit mask |
283 | * |
284 | * Outputs: NBINS dequantized coefficients |
285 | * updated dither generator |
286 | * |
287 | * Return: updated guard bit mask |
288 | * |
289 | * Notes: assumes that shift is in range [0, 31] |
290 | **************************************************************************************/ |
291 | static int ScalarDequant(int *buf, int cat, const int *dqTab, int shift, int *lfsrInit, int gbMask) |
292 | { |
293 | int i, k; |
294 | int dq, sign, lfsr; |
295 | |
296 | lfsr = *lfsrInit; |
297 | |
298 | /* multiply dequantized value (from centroid table) or the dither value (if quantized coeff == 0) |
299 | * with the power in this region (from envelope) |
300 | * |
301 | * implements: |
302 | * buf[i] = quant_centroid_tab[cat][buf[i]] * pow(2, DQ_FRACBITS_OUT + scale / 2.0) |
303 | * |
304 | * example (for testing with Q28 qcTab): |
305 | * fixdeqnt = (int)((double)qcTab[0][qcOffset[cat] + k] / (double)(1 << 28) * pow(2, DQ_FRACBITS_OUT + scale / 2.0)); |
306 | * *buf++ = fixdeqnt * (sign ? -1 : 1); |
307 | * |
308 | * output format = Q(FBITS_OUT_DQ) = Q12 (currently) |
309 | */ |
310 | for (i = NBINS; i != 0; i--) { |
311 | k = *buf; |
312 | if (k == 0 || cat == 7) { |
313 | /* update the LFSR, producing a random sign */ |
314 | sign = (lfsr >> 31); |
315 | lfsr = (lfsr << 1) ^(sign & FEEDBACK); |
316 | dq = dqTab[0] >> shift; |
317 | gbMask |= dq; |
318 | *buf++ = (dq ^ sign) - sign; /* inflict the sign */ |
319 | } else { |
320 | /* for stepsize[cat]*buf[i], use centroid[cat][buf[i]] */ |
321 | sign = k >> 31; |
322 | k &= 0x7fffffff; |
323 | dq = dqTab[k] >> shift; |
324 | gbMask |= dq; |
325 | *buf++ = (dq ^ sign) - sign; /* inflict the sign */ |
326 | } |
327 | } |
328 | *lfsrInit = lfsr; |
329 | |
330 | /* fast way to bound min guard bits is just to return shift (smallest shift = min number of GB's) |
331 | * tighter bound is OR'ing all the dequantized values together before applying sign (done here) |
332 | */ |
333 | return gbMask; |
334 | } |
335 | |
336 | /************************************************************************************** |
337 | * Function: DecodeTransform |
338 | * |
339 | * Description: recover MLT coefficients from scalar-quantized, vector Huffman codes |
340 | * |
341 | * Inputs: pointer to initialized Gecko2Info struct |
342 | * buffer to receive dequantized coefficients |
343 | * number of bits remaining in bitstream |
344 | * pointer to current contents of dither lfsr |
345 | * channel index |
346 | * |
347 | * Outputs: nRegions*NBINS dequantized coefficients per channel |
348 | * updated dither generator |
349 | * |
350 | * Return: minimum number of guard bits in coefficients |
351 | * |
352 | * Notes: for joint stereo, does in-place deinterleaving into left and right |
353 | * halves of mlt buffer |
354 | * (left starts at mlt[0], right starts at mlt[MAXNSAMP]) |
355 | **************************************************************************************/ |
356 | int DecodeTransform(Gecko2Info *gi, int *mlt, int availbits, int *lfsrInit, int ch) |
357 | { |
358 | int r, gbMask, gbMin, bitsUsed, shift, fbits, rmsMax; |
359 | int *dest, *catbuf, *rmsIndex; |
360 | const int *dqTab; |
361 | BitStreamInfo *bsi = &(gi->bsi); |
362 | |
363 | catbuf = gi->db.catbuf; |
364 | rmsIndex = gi->db.rmsIndex; |
365 | |
366 | /* huffman decode - does joint stereo deinterleaving if necessary */ |
367 | for (r = 0; r < gi->cRegions; r++) { |
368 | if (r < 2 * gi->cplStart) { |
369 | dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01); |
370 | } else { |
371 | dest = mlt + NBINS * (r - gi->cplStart); |
372 | } |
373 | |
374 | if (catbuf[r] < 7) { |
375 | /* cat == 7 means region was not coded, and dequantizer will just add power-normalized dither */ |
376 | bitsUsed = DecodeHuffmanVectors(bsi, catbuf[r], dest, availbits); |
377 | if (bitsUsed < 0 || bitsUsed > availbits) { |
378 | break; |
379 | } |
380 | AdvanceBitstream(bsi, bitsUsed); |
381 | availbits -= bitsUsed; |
382 | } |
383 | } |
384 | |
385 | /* if ran out of bits, use dither in rest of regions (zero scalars) */ |
386 | for (; r < gi->cRegions; r++) { |
387 | catbuf[r] = 7; |
388 | } |
389 | |
390 | /* for very large coefficients, we override the default format with fewer fraction bits, to avoid saturation |
391 | * range of rmsIndex = [-31, 63] (from encoder) |
392 | */ |
393 | fbits = FBITS_OUT_DQ; |
394 | rmsMax = gi->rmsMax[ch]; |
395 | if ((rmsMax >> 1) > (28 - fbits)) { |
396 | fbits = 28 - (rmsMax >> 1); |
397 | if (fbits < 0) { |
398 | fbits = 0; |
399 | } |
400 | } |
401 | |
402 | /* dequantize - stops at cRegions*NBINS (doesn't zero out above this) */ |
403 | gbMask = 0; |
404 | for (r = 0; r < gi->cRegions; r++) { |
405 | if (r < 2 * gi->cplStart) { |
406 | dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01); |
407 | } else { |
408 | dest = mlt + NBINS * (r - gi->cplStart); |
409 | } |
410 | |
411 | /* qcTab = Q28 */ |
412 | dqTab = qcTab[rmsIndex[r] & 0x01] + qcOffset[catbuf[r]]; |
413 | shift = (28 - fbits - (rmsIndex[r] >> 1)); |
414 | shift = MIN(shift, 31); /* underflow to +/- 0 */ |
415 | shift = MAX(shift, 0); /* saturate exponent, could only happen for gigantic rmsMax (never seen in practice) */ |
416 | |
417 | gbMask = ScalarDequant(dest, catbuf[r], dqTab, shift, lfsrInit, gbMask); |
418 | } |
419 | |
420 | /* save the number of extra fraction bits we needed, if any */ |
421 | gi->xbits[ch][1] = FBITS_OUT_DQ - fbits; |
422 | |
423 | /* calculate minimum number of guard bits in mlt[] */ |
424 | gbMin = CLZ(gbMask) - 1; |
425 | if (gbMin < 0) { |
426 | gbMin = 0; |
427 | } |
428 | |
429 | /* mlt[] values are Q31 * 2^bExp (i.e. Q(31-bExp) = Q(DQ_FRACBITS_OUT)) */ |
430 | return gbMin; |
431 | } |
432 |