summaryrefslogtreecommitdiff
path: root/audio_codec/libcook/ra_sqvh.c (plain)
blob: 652818512e89330d7ba58ddb33db0affe37a833c
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: sqvh.c,v 1.6 2005/04/27 19:20:50 hubbe Exp $
3 *
4 * REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM
5 * Portions Copyright (c) 1995-2002 RealNetworks, Inc.
6 * All Rights Reserved.
7 *
8 * The contents of this file, and the files included with this file,
9 * are subject to the current version of the Real Format Source Code
10 * Porting and Optimization License, available at
11 * https://helixcommunity.org/2005/license/realformatsource (unless
12 * RealNetworks otherwise expressly agrees in writing that you are
13 * subject to a different license). You may also obtain the license
14 * terms directly from RealNetworks. You may not use this file except
15 * in compliance with the Real Format Source Code Porting and
16 * Optimization License. There are no redistribution rights for the
17 * source code of this file. Please see the Real Format Source Code
18 * Porting and Optimization License for the rights, obligations and
19 * limitations governing use of the contents of the file.
20 *
21 * RealNetworks is the developer of the Original Code and owns the
22 * copyrights in the portions it created.
23 *
24 * This file, and the files included with this file, is distributed and
25 * made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND,
26 * EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL
27 * SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT
29 * OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * https://rarvcode-tck.helixcommunity.org
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point RealAudio 8 decoder
40 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
41 * October 2003
42 *
43 * sqvh.c - Huffman decoding, unpacking, and dequantization of MLT coefficients
44 **************************************************************************************/
45
46#include "coder.h"
47#include "assembly.h"
48
49static const int vpr_tab[7] = {10, 10, 10, 5, 5, 4, 4}; /* number of vectors in one region (20 bins) */
50
51/**************************************************************************************
52 * Function: DecodeHuffmanVectors
53 *
54 * Description: decode vector Huffman symbols from bitstream for one region
55 *
56 * Inputs: pointer to initialized BitStreamInfo struct
57 * cat for this region
58 * buffer to receive decoded symbols
59 * number of bits remaining in bitstream
60 *
61 * Outputs: NBINS decoded symbols
62 *
63 * Return: number of bits left in bitstream, -1 if ran out of bits
64 *
65 * Notes: the alphabet of decoded symbols has been remapped from the
66 * floating-pt reference, to replace the Horner-method polynomial
67 * evaluation with shift-and-mask (i.e. change of basis to replace
68 * 1/(kmax+1) with 1/(2^n) for unpacking vectorized codes)
69 * uses byte-cache Huffman decoder and conditional logic which is
70 * ARM-friendly
71 * this has been carefully arranged to compile well with ADS, so if you
72 * change anything, make sure you study the assembly code carefully!
73 * consider fusing with loop over all nRegions (rather than function call
74 * per loop) to avoid refilling cache each call
75 **************************************************************************************/
76static int DecodeHuffmanVectors(BitStreamInfo *bsi, int cat, int *k, int bitsLeft)
77{
78 int nBits, startBits, vpr, v, vPacked;
79 int off, key, cachedBits, padBits;
80 unsigned int cache, count, t, total, cw;
81 unsigned char *buf, *countCurr;
82 const unsigned char *countBase;
83 const unsigned short *map;
84
85 buf = bsi->buf;
86 off = bsi->off;
87 key = bsi->key;
88
89 countBase = huffTabVectorInfo[cat].count;
90 map = huffTabVector + huffTabVectorInfo[cat].offset;
91 startBits = bitsLeft;
92 vpr = vpr_tab[cat];
93
94 /* initially fill cache with any partial byte */
95 cache = 0;
96 cachedBits = (8 - off) & 0x07;
97 if (cachedBits) {
98 cache = (unsigned int)((*buf++) ^ pkkey[key++]) << (32 - cachedBits);
99 }
100 key &= 0x03;
101 bitsLeft -= cachedBits;
102
103 padBits = 0;
104 while (vpr > 0) {
105 /* refill cache - assumes cachedBits <= 24 */
106 if (bitsLeft >= 8) {
107 /* load 1 new byte into left-justified cache */
108 cache |= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits);
109 key &= 0x03;
110 cachedBits += 8;
111 bitsLeft -= 8;
112 } else {
113 /* last time through, pad cache with zeros and drain cache */
114 if (cachedBits + bitsLeft <= 0) {
115 return -1;
116 }
117 if (bitsLeft > 0) {
118 cache |= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits);
119 }
120 key &= 0x03;
121 cachedBits += bitsLeft;
122 bitsLeft = 0;
123
124 cache &= (signed int)0x80000000 >> (cachedBits - 1);
125 padBits = 21;
126 cachedBits += padBits; /* okay if this is > 32 (0's automatically shifted in from right) */
127 }
128
129 /* max bits per Huffman symbol = 16, max sign bits = 5, so 21 bits will do */
130 while (vpr > 0 && cachedBits >= 21) {
131 /* left-justify to bit 16 */
132 cw = cache >> (32 - 17);
133 countCurr = (unsigned char *)countBase;
134 count = *countCurr++;
135 t = count << 16;
136 total = 0;
137
138 while (cw >= t) {
139 cw -= t;
140 cw <<= 1;
141 total += count;
142 count = *countCurr++;
143 t = count << 16;
144 }
145
146 nBits = countCurr - countBase;
147 cachedBits -= nBits;
148 cache <<= nBits;
149
150 vPacked = (int)map[total + (cw >> 16)];
151
152 /* sign bits */
153 nBits = (vPacked >> 12) & 0x0f;
154 cachedBits -= nBits;
155 if (cachedBits < padBits) {
156 return -1;
157 }
158
159 /* should create good asm with conditional instruction execution on ARM
160 * format of vPacked:
161 * bits 12-15 = number of sign bits
162 * 4 bits/coef for cat = 0, 1, 2 (bits 0-3, 4-7)
163 * 3 bits/coef for cat = 3, 4 (bits 0-2, 3-5, 6-8. 9-11)
164 * 2 bits/coef for cat = 5, 6 (bits 0-1, 2-3, 4-5. 6-7, 8-9)
165 */
166 if (cat < 3) {
167 v = (vPacked >> 0) & 0x0f;
168 if (v) {
169 v |= (cache & 0x80000000);
170 cache <<= 1;
171 } *k++ = v;
172 v = (vPacked >> 4) & 0x0f;
173 if (v) {
174 v |= (cache & 0x80000000);
175 cache <<= 1;
176 } *k++ = v;
177 } else if (cat < 5) {
178 v = (vPacked >> 0) & 0x07;
179 if (v) {
180 v |= (cache & 0x80000000);
181 cache <<= 1;
182 } *k++ = v;
183 v = (vPacked >> 3) & 0x07;
184 if (v) {
185 v |= (cache & 0x80000000);
186 cache <<= 1;
187 } *k++ = v;
188 v = (vPacked >> 6) & 0x07;
189 if (v) {
190 v |= (cache & 0x80000000);
191 cache <<= 1;
192 } *k++ = v;
193 v = (vPacked >> 9) & 0x07;
194 if (v) {
195 v |= (cache & 0x80000000);
196 cache <<= 1;
197 } *k++ = v;
198 } else {
199 v = (vPacked >> 0) & 0x03;
200 if (v) {
201 v |= (cache & 0x80000000);
202 cache <<= 1;
203 } *k++ = v;
204 v = (vPacked >> 2) & 0x03;
205 if (v) {
206 v |= (cache & 0x80000000);
207 cache <<= 1;
208 } *k++ = v;
209 v = (vPacked >> 4) & 0x03;
210 if (v) {
211 v |= (cache & 0x80000000);
212 cache <<= 1;
213 } *k++ = v;
214 v = (vPacked >> 6) & 0x03;
215 if (v) {
216 v |= (cache & 0x80000000);
217 cache <<= 1;
218 } *k++ = v;
219 v = (vPacked >> 8) & 0x03;
220 if (v) {
221 v |= (cache & 0x80000000);
222 cache <<= 1;
223 } *k++ = v;
224 }
225 vpr--;
226 }
227 }
228
229 return startBits - bitsLeft - (cachedBits - padBits);
230}
231
232/* Q28 - first column (0's) are dither
233 * rows ..... cat = [0, 7]
234 * columns .. k = [0, kmax_tab[cat]]
235 * qcOffset[cat] is starting index for k = 0
236 *
237 * qcTab[0] scaled by 1.0
238 * qcTab[1] scaled by sqrt(2)
239 *
240 * lower cat = shorter vectors = finer Q = higher expected bits
241 * higher cat = longer vectors = coarser Q = lower expected bits
242 *
243 * kmax_tab[8] = { 13, 9, 6, 4, 3, 2, 1, 0 };
244 */
245static const int qcTab[2][14 + 10 + 7 + 5 + 4 + 3 + 2 + 1] = {
246 {
247 /* quant_centroid_tab * 1.0 */
248 0x00000000, 0x0645a1c8, 0x0c2d0e50, 0x11eb8520, 0x17a1cac0, 0x1d4fdf40, 0x22ed9180, 0x28a7ef80, 0x2e49ba40, 0x33eb8500, 0x39916880, 0x3f126e80, 0x449ba600, 0x4b958100,
249 0x00000000, 0x08b43960, 0x10f5c280, 0x19020c40, 0x21168740, 0x2922d100, 0x3126e980, 0x38fdf3c0, 0x411eb880, 0x49eb8500,
250 0x00000000, 0x0bef9db0, 0x176c8b40, 0x22e147c0, 0x2e1cac00, 0x39581080, 0x450e5600,
251 0x00000000, 0x10189380, 0x20000000, 0x2fe35400, 0x3fc28f40,
252 0x00000000, 0x1522d0e0, 0x2b3f7d00, 0x3fba5e40,
253 0x02d413cc, 0x1a831260, 0x37db22c0,
254 0x04000000, 0x1f6c8b40,
255 0x0b504f33
256 },
257 {
258 /* quant_centroid_tab * sqrt(2) */
259 0x00000000, 0x08deb4dc, 0x11382ec8, 0x1957bba7, 0x216bb2a8, 0x297413e1, 0x31654988, 0x397f0b29, 0x41760b9f, 0x496d0c14, 0x5169d7b1, 0x593280ab, 0x6106bff4, 0x6ae454b2,
260 0x00000000, 0x0c4f2f4d, 0x17fc2cf0, 0x235ddce6, 0x2ecb22d2, 0x3a2cd2c9, 0x4582ecca, 0x50994ee6, 0x5c17f595, 0x6889e5e1,
261 0x00000000, 0x10e14b25, 0x212064ce, 0x3153e8c5, 0x413653ba, 0x5118bf09, 0x61a8f143,
262 0x00000000, 0x16c35fec, 0x2d413ccc, 0x43b94edf, 0x5a2b95ca,
263 0x00000000, 0x1de40c9b, 0x3d2972e9, 0x5a20002f,
264 0x04000000, 0x257e5e73, 0x4efe081d,
265 0x05a82799, 0x2c70b401,
266 0x10000000
267 },
268};
269
270static const int qcOffset[8] = { 0, 14, 24, 31, 36, 40, 43, 45 };
271
272/**************************************************************************************
273 * Function: ScalarDequant
274 *
275 * Description: dequantize transform coefficients (in-place)
276 *
277 * Inputs: buffer of decoded/un-vectorized symbols with sign in bit 31
278 * cat (quantizer) for this region
279 * pointer to dequantizer table (either scaled by 1.0 or sqrt(2.0))
280 * right-shift amount (i.e. 2^-n for normalizing to correct Q format)
281 * pointer to current contents of dither lfsr
282 * current guard bit mask
283 *
284 * Outputs: NBINS dequantized coefficients
285 * updated dither generator
286 *
287 * Return: updated guard bit mask
288 *
289 * Notes: assumes that shift is in range [0, 31]
290 **************************************************************************************/
291static int ScalarDequant(int *buf, int cat, const int *dqTab, int shift, int *lfsrInit, int gbMask)
292{
293 int i, k;
294 int dq, sign, lfsr;
295
296 lfsr = *lfsrInit;
297
298 /* multiply dequantized value (from centroid table) or the dither value (if quantized coeff == 0)
299 * with the power in this region (from envelope)
300 *
301 * implements:
302 * buf[i] = quant_centroid_tab[cat][buf[i]] * pow(2, DQ_FRACBITS_OUT + scale / 2.0)
303 *
304 * example (for testing with Q28 qcTab):
305 * fixdeqnt = (int)((double)qcTab[0][qcOffset[cat] + k] / (double)(1 << 28) * pow(2, DQ_FRACBITS_OUT + scale / 2.0));
306 * *buf++ = fixdeqnt * (sign ? -1 : 1);
307 *
308 * output format = Q(FBITS_OUT_DQ) = Q12 (currently)
309 */
310 for (i = NBINS; i != 0; i--) {
311 k = *buf;
312 if (k == 0 || cat == 7) {
313 /* update the LFSR, producing a random sign */
314 sign = (lfsr >> 31);
315 lfsr = (lfsr << 1) ^(sign & FEEDBACK);
316 dq = dqTab[0] >> shift;
317 gbMask |= dq;
318 *buf++ = (dq ^ sign) - sign; /* inflict the sign */
319 } else {
320 /* for stepsize[cat]*buf[i], use centroid[cat][buf[i]] */
321 sign = k >> 31;
322 k &= 0x7fffffff;
323 dq = dqTab[k] >> shift;
324 gbMask |= dq;
325 *buf++ = (dq ^ sign) - sign; /* inflict the sign */
326 }
327 }
328 *lfsrInit = lfsr;
329
330 /* fast way to bound min guard bits is just to return shift (smallest shift = min number of GB's)
331 * tighter bound is OR'ing all the dequantized values together before applying sign (done here)
332 */
333 return gbMask;
334}
335
336/**************************************************************************************
337 * Function: DecodeTransform
338 *
339 * Description: recover MLT coefficients from scalar-quantized, vector Huffman codes
340 *
341 * Inputs: pointer to initialized Gecko2Info struct
342 * buffer to receive dequantized coefficients
343 * number of bits remaining in bitstream
344 * pointer to current contents of dither lfsr
345 * channel index
346 *
347 * Outputs: nRegions*NBINS dequantized coefficients per channel
348 * updated dither generator
349 *
350 * Return: minimum number of guard bits in coefficients
351 *
352 * Notes: for joint stereo, does in-place deinterleaving into left and right
353 * halves of mlt buffer
354 * (left starts at mlt[0], right starts at mlt[MAXNSAMP])
355 **************************************************************************************/
356int DecodeTransform(Gecko2Info *gi, int *mlt, int availbits, int *lfsrInit, int ch)
357{
358 int r, gbMask, gbMin, bitsUsed, shift, fbits, rmsMax;
359 int *dest, *catbuf, *rmsIndex;
360 const int *dqTab;
361 BitStreamInfo *bsi = &(gi->bsi);
362
363 catbuf = gi->db.catbuf;
364 rmsIndex = gi->db.rmsIndex;
365
366 /* huffman decode - does joint stereo deinterleaving if necessary */
367 for (r = 0; r < gi->cRegions; r++) {
368 if (r < 2 * gi->cplStart) {
369 dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01);
370 } else {
371 dest = mlt + NBINS * (r - gi->cplStart);
372 }
373
374 if (catbuf[r] < 7) {
375 /* cat == 7 means region was not coded, and dequantizer will just add power-normalized dither */
376 bitsUsed = DecodeHuffmanVectors(bsi, catbuf[r], dest, availbits);
377 if (bitsUsed < 0 || bitsUsed > availbits) {
378 break;
379 }
380 AdvanceBitstream(bsi, bitsUsed);
381 availbits -= bitsUsed;
382 }
383 }
384
385 /* if ran out of bits, use dither in rest of regions (zero scalars) */
386 for (; r < gi->cRegions; r++) {
387 catbuf[r] = 7;
388 }
389
390 /* for very large coefficients, we override the default format with fewer fraction bits, to avoid saturation
391 * range of rmsIndex = [-31, 63] (from encoder)
392 */
393 fbits = FBITS_OUT_DQ;
394 rmsMax = gi->rmsMax[ch];
395 if ((rmsMax >> 1) > (28 - fbits)) {
396 fbits = 28 - (rmsMax >> 1);
397 if (fbits < 0) {
398 fbits = 0;
399 }
400 }
401
402 /* dequantize - stops at cRegions*NBINS (doesn't zero out above this) */
403 gbMask = 0;
404 for (r = 0; r < gi->cRegions; r++) {
405 if (r < 2 * gi->cplStart) {
406 dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01);
407 } else {
408 dest = mlt + NBINS * (r - gi->cplStart);
409 }
410
411 /* qcTab = Q28 */
412 dqTab = qcTab[rmsIndex[r] & 0x01] + qcOffset[catbuf[r]];
413 shift = (28 - fbits - (rmsIndex[r] >> 1));
414 shift = MIN(shift, 31); /* underflow to +/- 0 */
415 shift = MAX(shift, 0); /* saturate exponent, could only happen for gigantic rmsMax (never seen in practice) */
416
417 gbMask = ScalarDequant(dest, catbuf[r], dqTab, shift, lfsrInit, gbMask);
418 }
419
420 /* save the number of extra fraction bits we needed, if any */
421 gi->xbits[ch][1] = FBITS_OUT_DQ - fbits;
422
423 /* calculate minimum number of guard bits in mlt[] */
424 gbMin = CLZ(gbMask) - 1;
425 if (gbMin < 0) {
426 gbMin = 0;
427 }
428
429 /* mlt[] values are Q31 * 2^bExp (i.e. Q(31-bExp) = Q(DQ_FRACBITS_OUT)) */
430 return gbMin;
431}
432