platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: sqvh.c,v 1.6 2005/04/27 19:20:50 hubbe Exp $
3  *
4  * REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM
5  * Portions Copyright (c) 1995-2002 RealNetworks, Inc.
6  * All Rights Reserved.
7  *
8  * The contents of this file, and the files included with this file,
9  * are subject to the current version of the Real Format Source Code
10  * Porting and Optimization License, available at
11  * https://helixcommunity.org/2005/license/realformatsource (unless
12  * RealNetworks otherwise expressly agrees in writing that you are
13  * subject to a different license).  You may also obtain the license
14  * terms directly from RealNetworks.  You may not use this file except
15  * in compliance with the Real Format Source Code Porting and
16  * Optimization License. There are no redistribution rights for the
17  * source code of this file. Please see the Real Format Source Code
18  * Porting and Optimization License for the rights, obligations and
19  * limitations governing use of the contents of the file.
20  *
21  * RealNetworks is the developer of the Original Code and owns the
22  * copyrights in the portions it created.
23  *
24  * This file, and the files included with this file, is distributed and
25  * made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND,
26  * EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL
27  * SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT
29  * OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  * https://rarvcode-tck.helixcommunity.org
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point RealAudio 8 decoder
40  * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
41  * October 2003
42  *
43  * sqvh.c - Huffman decoding, unpacking, and dequantization of MLT coefficients
44  **************************************************************************************/
45
46 #include "coder.h"
47 #include "assembly.h"
48
49 static const int vpr_tab[7] = {10, 10, 10, 5, 5, 4, 4}; /* number of vectors in one region (20 bins) */
50
51 /**************************************************************************************
52  * Function:    DecodeHuffmanVectors
53  *
54  * Description: decode vector Huffman symbols from bitstream for one region
55  *
56  * Inputs:      pointer to initialized BitStreamInfo struct
57  *              cat for this region
58  *              buffer to receive decoded symbols
59  *              number of bits remaining in bitstream
60  *
61  * Outputs:     NBINS decoded symbols
62  *
63  * Return:      number of bits left in bitstream, -1 if ran out of bits
64  *
65  * Notes:       the alphabet of decoded symbols has been remapped from the
66  *                floating-pt reference, to replace the Horner-method polynomial
67  *                evaluation with shift-and-mask (i.e. change of basis to replace
68  *                1/(kmax+1) with 1/(2^n) for unpacking vectorized codes)
69  *              uses byte-cache Huffman decoder and conditional logic which is
70  *                ARM-friendly
71  *              this has been carefully arranged to compile well with ADS, so if you
72  *                change anything, make sure you study the assembly code carefully!
73  *              consider fusing with loop over all nRegions (rather than function call
74  *                per loop) to avoid refilling cache each call
75  **************************************************************************************/
76 static int DecodeHuffmanVectors(BitStreamInfo *bsi, int cat, int *k, int bitsLeft)
77 {
78     int nBits, startBits, vpr, v, vPacked;
79     int off, key, cachedBits, padBits;
80     unsigned int cache, count, t, total, cw;
81     unsigned char *buf, *countCurr;
82     const unsigned char *countBase;
83     const unsigned short *map;
84
85     buf = bsi->buf;
86     off = bsi->off;
87     key = bsi->key;
88
89     countBase = huffTabVectorInfo[cat].count;
90     map = huffTabVector + huffTabVectorInfo[cat].offset;
91     startBits = bitsLeft;
92     vpr = vpr_tab[cat];
93
94     /* initially fill cache with any partial byte */
95     cache = 0;
96     cachedBits = (8 - off) & 0x07;
97     if (cachedBits) {
98         cache = (unsigned int)((*buf++) ^ pkkey[key++]) << (32 - cachedBits);
99     }
100     key &= 0x03;
101     bitsLeft -= cachedBits;
102
103     padBits = 0;
104     while (vpr > 0) {
105         /* refill cache - assumes cachedBits <= 24 */
106         if (bitsLeft >= 8) {
107             /* load 1 new byte into left-justified cache */
108             cache |= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits);
109             key &= 0x03;
110             cachedBits += 8;
111             bitsLeft -= 8;
112         } else {
113             /* last time through, pad cache with zeros and drain cache */
114             if (cachedBits + bitsLeft <= 0) {
115                 return -1;
116             }
117             if (bitsLeft > 0) {
118                 cache |= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits);
119             }
120             key &= 0x03;
121             cachedBits += bitsLeft;
122             bitsLeft = 0;
123
124             cache &= (signed int)0x80000000 >> (cachedBits - 1);
125             padBits = 21;
126             cachedBits += padBits;  /* okay if this is > 32 (0's automatically shifted in from right) */
127         }
128
129         /* max bits per Huffman symbol = 16, max sign bits = 5, so 21 bits will do */
130         while (vpr > 0 && cachedBits >= 21) {
131             /* left-justify to bit 16 */
132             cw = cache >> (32 - 17);
133             countCurr = (unsigned char *)countBase;
134             count = *countCurr++;
135             t = count << 16;
136             total = 0;
137
138             while (cw >= t) {
139                 cw -=  t;
140                 cw <<= 1;
141                 total += count;
142                 count = *countCurr++;
143                 t = count << 16;
144             }
145
146             nBits = countCurr - countBase;
147             cachedBits -= nBits;
148             cache <<= nBits;
149
150             vPacked = (int)map[total + (cw >> 16)];
151
152             /* sign bits */
153             nBits = (vPacked >> 12) & 0x0f;
154             cachedBits -= nBits;
155             if (cachedBits < padBits) {
156                 return -1;
157             }
158
159             /* should create good asm with conditional instruction execution on ARM
160              * format of vPacked:
161              *   bits 12-15 = number of sign bits
162              *   4 bits/coef for cat = 0, 1, 2 (bits 0-3, 4-7)
163              *   3 bits/coef for cat = 3, 4 (bits 0-2, 3-5, 6-8. 9-11)
164              *   2 bits/coef for cat = 5, 6 (bits 0-1, 2-3, 4-5. 6-7, 8-9)
165              */
166             if (cat < 3) {
167                 v = (vPacked >> 0) & 0x0f;
168                 if (v)  {
169                     v |= (cache & 0x80000000);
170                     cache <<= 1;
171                 }   *k++ = v;
172                 v = (vPacked >> 4) & 0x0f;
173                 if (v)  {
174                     v |= (cache & 0x80000000);
175                     cache <<= 1;
176                 }   *k++ = v;
177             } else if (cat < 5) {
178                 v = (vPacked >> 0) & 0x07;
179                 if (v)  {
180                     v |= (cache & 0x80000000);
181                     cache <<= 1;
182                 }   *k++ = v;
183                 v = (vPacked >> 3) & 0x07;
184                 if (v)  {
185                     v |= (cache & 0x80000000);
186                     cache <<= 1;
187                 }   *k++ = v;
188                 v = (vPacked >> 6) & 0x07;
189                 if (v)  {
190                     v |= (cache & 0x80000000);
191                     cache <<= 1;
192                 }   *k++ = v;
193                 v = (vPacked >> 9) & 0x07;
194                 if (v)  {
195                     v |= (cache & 0x80000000);
196                     cache <<= 1;
197                 }   *k++ = v;
198             } else {
199                 v = (vPacked >> 0) & 0x03;
200                 if (v)  {
201                     v |= (cache & 0x80000000);
202                     cache <<= 1;
203                 }   *k++ = v;
204                 v = (vPacked >> 2) & 0x03;
205                 if (v)  {
206                     v |= (cache & 0x80000000);
207                     cache <<= 1;
208                 }   *k++ = v;
209                 v = (vPacked >> 4) & 0x03;
210                 if (v)  {
211                     v |= (cache & 0x80000000);
212                     cache <<= 1;
213                 }   *k++ = v;
214                 v = (vPacked >> 6) & 0x03;
215                 if (v)  {
216                     v |= (cache & 0x80000000);
217                     cache <<= 1;
218                 }   *k++ = v;
219                 v = (vPacked >> 8) & 0x03;
220                 if (v)  {
221                     v |= (cache & 0x80000000);
222                     cache <<= 1;
223                 }   *k++ = v;
224             }
225             vpr--;
226         }
227     }
228
229     return startBits - bitsLeft - (cachedBits - padBits);
230 }
231
232 /* Q28 - first column (0's) are dither
233  *   rows ..... cat = [0, 7]
234  *   columns .. k =   [0, kmax_tab[cat]]
235  *   qcOffset[cat] is starting index for k = 0
236  *
237  * qcTab[0] scaled by 1.0
238  * qcTab[1] scaled by sqrt(2)
239  *
240  * lower cat = shorter vectors = finer Q = higher expected bits
241  * higher cat = longer vectors = coarser Q = lower expected bits
242  *
243  * kmax_tab[8] = { 13,  9,  6,  4,  3,  2,  1,  0 };
244  */
245 static const int qcTab[2][14 + 10 + 7 + 5 + 4 + 3 + 2 + 1] = {
246     {
247         /* quant_centroid_tab * 1.0 */
248         0x00000000, 0x0645a1c8, 0x0c2d0e50, 0x11eb8520, 0x17a1cac0, 0x1d4fdf40, 0x22ed9180, 0x28a7ef80, 0x2e49ba40, 0x33eb8500, 0x39916880, 0x3f126e80, 0x449ba600, 0x4b958100,
249         0x00000000, 0x08b43960, 0x10f5c280, 0x19020c40, 0x21168740, 0x2922d100, 0x3126e980, 0x38fdf3c0, 0x411eb880, 0x49eb8500,
250         0x00000000, 0x0bef9db0, 0x176c8b40, 0x22e147c0, 0x2e1cac00, 0x39581080, 0x450e5600,
251         0x00000000, 0x10189380, 0x20000000, 0x2fe35400, 0x3fc28f40,
252         0x00000000, 0x1522d0e0, 0x2b3f7d00, 0x3fba5e40,
253         0x02d413cc, 0x1a831260, 0x37db22c0,
254         0x04000000, 0x1f6c8b40,
255         0x0b504f33
256     },
257     {
258         /* quant_centroid_tab * sqrt(2) */
259         0x00000000, 0x08deb4dc, 0x11382ec8, 0x1957bba7, 0x216bb2a8, 0x297413e1, 0x31654988, 0x397f0b29, 0x41760b9f, 0x496d0c14, 0x5169d7b1, 0x593280ab, 0x6106bff4, 0x6ae454b2,
260         0x00000000, 0x0c4f2f4d, 0x17fc2cf0, 0x235ddce6, 0x2ecb22d2, 0x3a2cd2c9, 0x4582ecca, 0x50994ee6, 0x5c17f595, 0x6889e5e1,
261         0x00000000, 0x10e14b25, 0x212064ce, 0x3153e8c5, 0x413653ba, 0x5118bf09, 0x61a8f143,
262         0x00000000, 0x16c35fec, 0x2d413ccc, 0x43b94edf, 0x5a2b95ca,
263         0x00000000, 0x1de40c9b, 0x3d2972e9, 0x5a20002f,
264         0x04000000, 0x257e5e73, 0x4efe081d,
265         0x05a82799, 0x2c70b401,
266         0x10000000
267     },
268 };
269
270 static const int qcOffset[8] = {  0, 14, 24, 31, 36, 40, 43, 45 };
271
272 /**************************************************************************************
273  * Function:    ScalarDequant
274  *
275  * Description: dequantize transform coefficients (in-place)
276  *
277  * Inputs:      buffer of decoded/un-vectorized symbols with sign in bit 31
278  *              cat (quantizer) for this region
279  *              pointer to dequantizer table (either scaled by 1.0 or sqrt(2.0))
280  *              right-shift amount (i.e. 2^-n for normalizing to correct Q format)
281  *              pointer to current contents of dither lfsr
282  *              current guard bit mask
283  *
284  * Outputs:     NBINS dequantized coefficients
285  *              updated dither generator
286  *
287  * Return:      updated guard bit mask
288  *
289  * Notes:       assumes that shift is in range [0, 31]
290  **************************************************************************************/
291 static int ScalarDequant(int *buf, int cat, const int *dqTab, int shift, int *lfsrInit, int gbMask)
292 {
293     int i, k;
294     int dq, sign, lfsr;
295
296     lfsr = *lfsrInit;
297
298     /* multiply dequantized value (from centroid table) or the dither value (if quantized coeff == 0)
299      *   with the power in this region (from envelope)
300      *
301      * implements:
302      *   buf[i] = quant_centroid_tab[cat][buf[i]] * pow(2, DQ_FRACBITS_OUT + scale / 2.0)
303      *
304      * example (for testing with Q28 qcTab):
305      *   fixdeqnt = (int)((double)qcTab[0][qcOffset[cat] + k] / (double)(1 << 28) * pow(2, DQ_FRACBITS_OUT + scale / 2.0));
306      *   *buf++ = fixdeqnt * (sign ? -1 : 1);
307      *
308      * output format = Q(FBITS_OUT_DQ) = Q12 (currently)
309      */
310     for (i = NBINS; i != 0; i--) {
311         k = *buf;
312         if (k == 0 || cat == 7) {
313             /* update the LFSR, producing a random sign */
314             sign = (lfsr >> 31);
315             lfsr = (lfsr << 1) ^(sign & FEEDBACK);
316             dq = dqTab[0] >> shift;
317             gbMask |= dq;
318             *buf++ = (dq ^ sign) - sign;                /* inflict the sign */
319         } else {
320             /* for stepsize[cat]*buf[i], use centroid[cat][buf[i]] */
321             sign = k >> 31;
322             k &= 0x7fffffff;
323             dq = dqTab[k] >> shift;
324             gbMask |= dq;
325             *buf++ = (dq ^ sign) - sign;                /* inflict the sign */
326         }
327     }
328     *lfsrInit = lfsr;
329
330     /* fast way to bound min guard bits is just to return shift (smallest shift = min number of GB's)
331      * tighter bound is OR'ing all the dequantized values together before applying sign (done here)
332      */
333     return gbMask;
334 }
335
336 /**************************************************************************************
337  * Function:    DecodeTransform
338  *
339  * Description: recover MLT coefficients from scalar-quantized, vector Huffman codes
340  *
341  * Inputs:      pointer to initialized Gecko2Info struct
342  *              buffer to receive dequantized coefficients
343  *              number of bits remaining in bitstream
344  *              pointer to current contents of dither lfsr
345  *              channel index
346  *
347  * Outputs:     nRegions*NBINS dequantized coefficients per channel
348  *              updated dither generator
349  *
350  * Return:      minimum number of guard bits in coefficients
351  *
352  * Notes:       for joint stereo, does in-place deinterleaving into left and right
353  *                halves of mlt buffer
354  *                (left starts at mlt[0], right starts at mlt[MAXNSAMP])
355  **************************************************************************************/
356 int DecodeTransform(Gecko2Info *gi, int *mlt, int availbits, int *lfsrInit, int ch)
357 {
358     int r, gbMask, gbMin, bitsUsed, shift, fbits, rmsMax;
359     int *dest, *catbuf, *rmsIndex;
360     const int *dqTab;
361     BitStreamInfo *bsi = &(gi->bsi);
362
363     catbuf = gi->db.catbuf;
364     rmsIndex = gi->db.rmsIndex;
365
366     /* huffman decode - does joint stereo deinterleaving if necessary */
367     for (r = 0; r < gi->cRegions; r++) {
368         if (r < 2 * gi->cplStart) {
369             dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01);
370         } else {
371             dest = mlt + NBINS * (r - gi->cplStart);
372         }
373
374         if (catbuf[r] < 7) {
375             /* cat == 7 means region was not coded, and dequantizer will just add power-normalized dither */
376             bitsUsed = DecodeHuffmanVectors(bsi, catbuf[r], dest, availbits);
377             if (bitsUsed < 0 || bitsUsed > availbits) {
378                 break;
379             }
380             AdvanceBitstream(bsi, bitsUsed);
381             availbits -= bitsUsed;
382         }
383     }
384
385     /* if ran out of bits, use dither in rest of regions (zero scalars) */
386     for (; r < gi->cRegions; r++) {
387         catbuf[r] = 7;
388     }
389
390     /* for very large coefficients, we override the default format with fewer fraction bits, to avoid saturation
391      * range of rmsIndex = [-31, 63] (from encoder)
392      */
393     fbits = FBITS_OUT_DQ;
394     rmsMax = gi->rmsMax[ch];
395     if ((rmsMax >> 1) > (28 - fbits)) {
396         fbits = 28 - (rmsMax >> 1);
397         if (fbits < 0) {
398             fbits = 0;
399         }
400     }
401
402     /* dequantize - stops at cRegions*NBINS (doesn't zero out above this) */
403     gbMask = 0;
404     for (r = 0; r < gi->cRegions; r++) {
405         if (r < 2 * gi->cplStart) {
406             dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01);
407         } else {
408             dest = mlt + NBINS * (r - gi->cplStart);
409         }
410
411         /* qcTab = Q28 */
412         dqTab = qcTab[rmsIndex[r] & 0x01] + qcOffset[catbuf[r]];
413         shift = (28 - fbits - (rmsIndex[r] >> 1));
414         shift = MIN(shift, 31); /* underflow to +/- 0 */
415         shift = MAX(shift, 0);  /* saturate exponent, could only happen for gigantic rmsMax (never seen in practice) */
416
417         gbMask = ScalarDequant(dest, catbuf[r], dqTab, shift, lfsrInit, gbMask);
418     }
419
420     /* save the number of extra fraction bits we needed, if any */
421     gi->xbits[ch][1] = FBITS_OUT_DQ - fbits;
422
423     /* calculate minimum number of guard bits in mlt[] */
424     gbMin = CLZ(gbMask) - 1;
425     if (gbMin < 0) {
426         gbMin = 0;
427     }
428
429     /* mlt[] values are Q31 * 2^bExp (i.e. Q(31-bExp) = Q(DQ_FRACBITS_OUT)) */
430     return gbMin;
431 }
432
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: sqvh.c,v 1.6 2005/04/27 19:20:50 hubbe Exp $
3	*
4	* REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM
5	* Portions Copyright (c) 1995-2002 RealNetworks, Inc.
6	* All Rights Reserved.
7	*
8	* The contents of this file, and the files included with this file,
9	* are subject to the current version of the Real Format Source Code
10	* Porting and Optimization License, available at
11	* https://helixcommunity.org/2005/license/realformatsource (unless
12	* RealNetworks otherwise expressly agrees in writing that you are
13	* subject to a different license). You may also obtain the license
14	* terms directly from RealNetworks. You may not use this file except
15	* in compliance with the Real Format Source Code Porting and
16	* Optimization License. There are no redistribution rights for the
17	* source code of this file. Please see the Real Format Source Code
18	* Porting and Optimization License for the rights, obligations and
19	* limitations governing use of the contents of the file.
20	*
21	* RealNetworks is the developer of the Original Code and owns the
22	* copyrights in the portions it created.
23	*
24	* This file, and the files included with this file, is distributed and
25	* made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND,
26	* EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL
27	* SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
28	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT
29	* OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* https://rarvcode-tck.helixcommunity.org
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point RealAudio 8 decoder
40	* Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
41	* October 2003
42	*
43	* sqvh.c - Huffman decoding, unpacking, and dequantization of MLT coefficients
44	**************************************************************************************/
45
46	#include "coder.h"
47	#include "assembly.h"
48
49	static const int vpr_tab[7] = {10, 10, 10, 5, 5, 4, 4}; /* number of vectors in one region (20 bins) */
50
51	/**************************************************************************************
52	* Function: DecodeHuffmanVectors
53	*
54	* Description: decode vector Huffman symbols from bitstream for one region
55	*
56	* Inputs: pointer to initialized BitStreamInfo struct
57	* cat for this region
58	* buffer to receive decoded symbols
59	* number of bits remaining in bitstream
60	*
61	* Outputs: NBINS decoded symbols
62	*
63	* Return: number of bits left in bitstream, -1 if ran out of bits
64	*
65	* Notes: the alphabet of decoded symbols has been remapped from the
66	* floating-pt reference, to replace the Horner-method polynomial
67	* evaluation with shift-and-mask (i.e. change of basis to replace
68	* 1/(kmax+1) with 1/(2^n) for unpacking vectorized codes)
69	* uses byte-cache Huffman decoder and conditional logic which is
70	* ARM-friendly
71	* this has been carefully arranged to compile well with ADS, so if you
72	* change anything, make sure you study the assembly code carefully!
73	* consider fusing with loop over all nRegions (rather than function call
74	* per loop) to avoid refilling cache each call
75	**************************************************************************************/
76	static int DecodeHuffmanVectors(BitStreamInfo bsi, int cat, int k, int bitsLeft)
77	{
78	int nBits, startBits, vpr, v, vPacked;
79	int off, key, cachedBits, padBits;
80	unsigned int cache, count, t, total, cw;
81	unsigned char buf, countCurr;
82	const unsigned char *countBase;
83	const unsigned short *map;
84
85	buf = bsi->buf;
86	off = bsi->off;
87	key = bsi->key;
88
89	countBase = huffTabVectorInfo[cat].count;
90	map = huffTabVector + huffTabVectorInfo[cat].offset;
91	startBits = bitsLeft;
92	vpr = vpr_tab[cat];
93
94	/* initially fill cache with any partial byte */
95	cache = 0;
96	cachedBits = (8 - off) & 0x07;
97	if (cachedBits) {
98	cache = (unsigned int)((*buf++) ^ pkkey[key++]) << (32 - cachedBits);
99	}
100	key &= 0x03;
101	bitsLeft -= cachedBits;
102
103	padBits = 0;
104	while (vpr > 0) {
105	/* refill cache - assumes cachedBits <= 24 */
106	if (bitsLeft >= 8) {
107	/* load 1 new byte into left-justified cache */
108	cache \|= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits);
109	key &= 0x03;
110	cachedBits += 8;
111	bitsLeft -= 8;
112	} else {
113	/* last time through, pad cache with zeros and drain cache */
114	if (cachedBits + bitsLeft <= 0) {
115	return -1;
116	}
117	if (bitsLeft > 0) {
118	cache \|= (unsigned int)((*buf++) ^ pkkey[key++]) << (24 - cachedBits);
119	}
120	key &= 0x03;
121	cachedBits += bitsLeft;
122	bitsLeft = 0;
123
124	cache &= (signed int)0x80000000 >> (cachedBits - 1);
125	padBits = 21;
126	cachedBits += padBits; /* okay if this is > 32 (0's automatically shifted in from right) */
127	}
128
129	/* max bits per Huffman symbol = 16, max sign bits = 5, so 21 bits will do */
130	while (vpr > 0 && cachedBits >= 21) {
131	/* left-justify to bit 16 */
132	cw = cache >> (32 - 17);
133	countCurr = (unsigned char *)countBase;
134	count = *countCurr++;
135	t = count << 16;
136	total = 0;
137
138	while (cw >= t) {
139	cw -= t;
140	cw <<= 1;
141	total += count;
142	count = *countCurr++;
143	t = count << 16;
144	}
145
146	nBits = countCurr - countBase;
147	cachedBits -= nBits;
148	cache <<= nBits;
149
150	vPacked = (int)map[total + (cw >> 16)];
151
152	/* sign bits */
153	nBits = (vPacked >> 12) & 0x0f;
154	cachedBits -= nBits;
155	if (cachedBits < padBits) {
156	return -1;
157	}
158
159	/* should create good asm with conditional instruction execution on ARM
160	* format of vPacked:
161	* bits 12-15 = number of sign bits
162	* 4 bits/coef for cat = 0, 1, 2 (bits 0-3, 4-7)
163	* 3 bits/coef for cat = 3, 4 (bits 0-2, 3-5, 6-8. 9-11)
164	* 2 bits/coef for cat = 5, 6 (bits 0-1, 2-3, 4-5. 6-7, 8-9)
165	*/
166	if (cat < 3) {
167	v = (vPacked >> 0) & 0x0f;
168	if (v) {
169	v \|= (cache & 0x80000000);
170	cache <<= 1;
171	} *k++ = v;
172	v = (vPacked >> 4) & 0x0f;
173	if (v) {
174	v \|= (cache & 0x80000000);
175	cache <<= 1;
176	} *k++ = v;
177	} else if (cat < 5) {
178	v = (vPacked >> 0) & 0x07;
179	if (v) {
180	v \|= (cache & 0x80000000);
181	cache <<= 1;
182	} *k++ = v;
183	v = (vPacked >> 3) & 0x07;
184	if (v) {
185	v \|= (cache & 0x80000000);
186	cache <<= 1;
187	} *k++ = v;
188	v = (vPacked >> 6) & 0x07;
189	if (v) {
190	v \|= (cache & 0x80000000);
191	cache <<= 1;
192	} *k++ = v;
193	v = (vPacked >> 9) & 0x07;
194	if (v) {
195	v \|= (cache & 0x80000000);
196	cache <<= 1;
197	} *k++ = v;
198	} else {
199	v = (vPacked >> 0) & 0x03;
200	if (v) {
201	v \|= (cache & 0x80000000);
202	cache <<= 1;
203	} *k++ = v;
204	v = (vPacked >> 2) & 0x03;
205	if (v) {
206	v \|= (cache & 0x80000000);
207	cache <<= 1;
208	} *k++ = v;
209	v = (vPacked >> 4) & 0x03;
210	if (v) {
211	v \|= (cache & 0x80000000);
212	cache <<= 1;
213	} *k++ = v;
214	v = (vPacked >> 6) & 0x03;
215	if (v) {
216	v \|= (cache & 0x80000000);
217	cache <<= 1;
218	} *k++ = v;
219	v = (vPacked >> 8) & 0x03;
220	if (v) {
221	v \|= (cache & 0x80000000);
222	cache <<= 1;
223	} *k++ = v;
224	}
225	vpr--;
226	}
227	}
228
229	return startBits - bitsLeft - (cachedBits - padBits);
230	}
231
232	/* Q28 - first column (0's) are dither
233	* rows ..... cat = [0, 7]
234	* columns .. k = [0, kmax_tab[cat]]
235	* qcOffset[cat] is starting index for k = 0
236	*
237	* qcTab[0] scaled by 1.0
238	* qcTab[1] scaled by sqrt(2)
239	*
240	* lower cat = shorter vectors = finer Q = higher expected bits
241	* higher cat = longer vectors = coarser Q = lower expected bits
242	*
243	* kmax_tab[8] = { 13, 9, 6, 4, 3, 2, 1, 0 };
244	*/
245	static const int qcTab[2][14 + 10 + 7 + 5 + 4 + 3 + 2 + 1] = {
246	{
247	/* quant_centroid_tab * 1.0 */
248	0x00000000, 0x0645a1c8, 0x0c2d0e50, 0x11eb8520, 0x17a1cac0, 0x1d4fdf40, 0x22ed9180, 0x28a7ef80, 0x2e49ba40, 0x33eb8500, 0x39916880, 0x3f126e80, 0x449ba600, 0x4b958100,
249	0x00000000, 0x08b43960, 0x10f5c280, 0x19020c40, 0x21168740, 0x2922d100, 0x3126e980, 0x38fdf3c0, 0x411eb880, 0x49eb8500,
250	0x00000000, 0x0bef9db0, 0x176c8b40, 0x22e147c0, 0x2e1cac00, 0x39581080, 0x450e5600,
251	0x00000000, 0x10189380, 0x20000000, 0x2fe35400, 0x3fc28f40,
252	0x00000000, 0x1522d0e0, 0x2b3f7d00, 0x3fba5e40,
253	0x02d413cc, 0x1a831260, 0x37db22c0,
254	0x04000000, 0x1f6c8b40,
255	0x0b504f33
256	},
257	{
258	/* quant_centroid_tab * sqrt(2) */
259	0x00000000, 0x08deb4dc, 0x11382ec8, 0x1957bba7, 0x216bb2a8, 0x297413e1, 0x31654988, 0x397f0b29, 0x41760b9f, 0x496d0c14, 0x5169d7b1, 0x593280ab, 0x6106bff4, 0x6ae454b2,
260	0x00000000, 0x0c4f2f4d, 0x17fc2cf0, 0x235ddce6, 0x2ecb22d2, 0x3a2cd2c9, 0x4582ecca, 0x50994ee6, 0x5c17f595, 0x6889e5e1,
261	0x00000000, 0x10e14b25, 0x212064ce, 0x3153e8c5, 0x413653ba, 0x5118bf09, 0x61a8f143,
262	0x00000000, 0x16c35fec, 0x2d413ccc, 0x43b94edf, 0x5a2b95ca,
263	0x00000000, 0x1de40c9b, 0x3d2972e9, 0x5a20002f,
264	0x04000000, 0x257e5e73, 0x4efe081d,
265	0x05a82799, 0x2c70b401,
266	0x10000000
267	},
268	};
269
270	static const int qcOffset[8] = { 0, 14, 24, 31, 36, 40, 43, 45 };
271
272	/**************************************************************************************
273	* Function: ScalarDequant
274	*
275	* Description: dequantize transform coefficients (in-place)
276	*
277	* Inputs: buffer of decoded/un-vectorized symbols with sign in bit 31
278	* cat (quantizer) for this region
279	* pointer to dequantizer table (either scaled by 1.0 or sqrt(2.0))
280	* right-shift amount (i.e. 2^-n for normalizing to correct Q format)
281	* pointer to current contents of dither lfsr
282	* current guard bit mask
283	*
284	* Outputs: NBINS dequantized coefficients
285	* updated dither generator
286	*
287	* Return: updated guard bit mask
288	*
289	* Notes: assumes that shift is in range [0, 31]
290	**************************************************************************************/
291	static int ScalarDequant(int buf, int cat, const int dqTab, int shift, int *lfsrInit, int gbMask)
292	{
293	int i, k;
294	int dq, sign, lfsr;
295
296	lfsr = *lfsrInit;
297
298	/* multiply dequantized value (from centroid table) or the dither value (if quantized coeff == 0)
299	* with the power in this region (from envelope)
300	*
301	* implements:
302	* buf[i] = quant_centroid_tab[cat][buf[i]] * pow(2, DQ_FRACBITS_OUT + scale / 2.0)
303	*
304	* example (for testing with Q28 qcTab):
305	* fixdeqnt = (int)((double)qcTab[0][qcOffset[cat] + k] / (double)(1 << 28) * pow(2, DQ_FRACBITS_OUT + scale / 2.0));
306	* buf++ = fixdeqnt (sign ? -1 : 1);
307	*
308	* output format = Q(FBITS_OUT_DQ) = Q12 (currently)
309	*/
310	for (i = NBINS; i != 0; i--) {
311	k = *buf;
312	if (k == 0 \|\| cat == 7) {
313	/* update the LFSR, producing a random sign */
314	sign = (lfsr >> 31);
315	lfsr = (lfsr << 1) ^(sign & FEEDBACK);
316	dq = dqTab[0] >> shift;
317	gbMask \|= dq;
318	buf++ = (dq ^ sign) - sign; / inflict the sign */
319	} else {
320	/* for stepsize[cat]buf[i], use centroid[cat][buf[i]] /
321	sign = k >> 31;
322	k &= 0x7fffffff;
323	dq = dqTab[k] >> shift;
324	gbMask \|= dq;
325	buf++ = (dq ^ sign) - sign; / inflict the sign */
326	}
327	}
328	*lfsrInit = lfsr;
329
330	/* fast way to bound min guard bits is just to return shift (smallest shift = min number of GB's)
331	* tighter bound is OR'ing all the dequantized values together before applying sign (done here)
332	*/
333	return gbMask;
334	}
335
336	/**************************************************************************************
337	* Function: DecodeTransform
338	*
339	* Description: recover MLT coefficients from scalar-quantized, vector Huffman codes
340	*
341	* Inputs: pointer to initialized Gecko2Info struct
342	* buffer to receive dequantized coefficients
343	* number of bits remaining in bitstream
344	* pointer to current contents of dither lfsr
345	* channel index
346	*
347	* Outputs: nRegions*NBINS dequantized coefficients per channel
348	* updated dither generator
349	*
350	* Return: minimum number of guard bits in coefficients
351	*
352	* Notes: for joint stereo, does in-place deinterleaving into left and right
353	* halves of mlt buffer
354	* (left starts at mlt[0], right starts at mlt[MAXNSAMP])
355	**************************************************************************************/
356	int DecodeTransform(Gecko2Info gi, int mlt, int availbits, int *lfsrInit, int ch)
357	{
358	int r, gbMask, gbMin, bitsUsed, shift, fbits, rmsMax;
359	int dest, catbuf, *rmsIndex;
360	const int *dqTab;
361	BitStreamInfo *bsi = &(gi->bsi);
362
363	catbuf = gi->db.catbuf;
364	rmsIndex = gi->db.rmsIndex;
365
366	/* huffman decode - does joint stereo deinterleaving if necessary */
367	for (r = 0; r < gi->cRegions; r++) {
368	if (r < 2 * gi->cplStart) {
369	dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01);
370	} else {
371	dest = mlt + NBINS * (r - gi->cplStart);
372	}
373
374	if (catbuf[r] < 7) {
375	/* cat == 7 means region was not coded, and dequantizer will just add power-normalized dither */
376	bitsUsed = DecodeHuffmanVectors(bsi, catbuf[r], dest, availbits);
377	if (bitsUsed < 0 \|\| bitsUsed > availbits) {
378	break;
379	}
380	AdvanceBitstream(bsi, bitsUsed);
381	availbits -= bitsUsed;
382	}
383	}
384
385	/* if ran out of bits, use dither in rest of regions (zero scalars) */
386	for (; r < gi->cRegions; r++) {
387	catbuf[r] = 7;
388	}
389
390	/* for very large coefficients, we override the default format with fewer fraction bits, to avoid saturation
391	* range of rmsIndex = [-31, 63] (from encoder)
392	*/
393	fbits = FBITS_OUT_DQ;
394	rmsMax = gi->rmsMax[ch];
395	if ((rmsMax >> 1) > (28 - fbits)) {
396	fbits = 28 - (rmsMax >> 1);
397	if (fbits < 0) {
398	fbits = 0;
399	}
400	}
401
402	/* dequantize - stops at cRegionsNBINS (doesn't zero out above this) /
403	gbMask = 0;
404	for (r = 0; r < gi->cRegions; r++) {
405	if (r < 2 * gi->cplStart) {
406	dest = mlt + NBINS * (r >> 1) + MAXNSAMP * (r & 0x01);
407	} else {
408	dest = mlt + NBINS * (r - gi->cplStart);
409	}
410
411	/* qcTab = Q28 */
412	dqTab = qcTab[rmsIndex[r] & 0x01] + qcOffset[catbuf[r]];
413	shift = (28 - fbits - (rmsIndex[r] >> 1));
414	shift = MIN(shift, 31); /* underflow to +/- 0 */
415	shift = MAX(shift, 0); /* saturate exponent, could only happen for gigantic rmsMax (never seen in practice) */
416
417	gbMask = ScalarDequant(dest, catbuf[r], dqTab, shift, lfsrInit, gbMask);
418	}
419
420	/* save the number of extra fraction bits we needed, if any */
421	gi->xbits[ch][1] = FBITS_OUT_DQ - fbits;
422
423	/* calculate minimum number of guard bits in mlt[] */
424	gbMin = CLZ(gbMask) - 1;
425	if (gbMin < 0) {
426	gbMin = 0;
427	}
428
429	/* mlt[] values are Q31 * 2^bExp (i.e. Q(31-bExp) = Q(DQ_FRACBITS_OUT)) */
430	return gbMin;
431	}
432