platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: mlt.c,v 1.5 2005/04/27 19:20:50 hubbe Exp $
3  *
4  * REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM
5  * Portions Copyright (c) 1995-2002 RealNetworks, Inc.
6  * All Rights Reserved.
7  *
8  * The contents of this file, and the files included with this file,
9  * are subject to the current version of the Real Format Source Code
10  * Porting and Optimization License, available at
11  * https://helixcommunity.org/2005/license/realformatsource (unless
12  * RealNetworks otherwise expressly agrees in writing that you are
13  * subject to a different license).  You may also obtain the license
14  * terms directly from RealNetworks.  You may not use this file except
15  * in compliance with the Real Format Source Code Porting and
16  * Optimization License. There are no redistribution rights for the
17  * source code of this file. Please see the Real Format Source Code
18  * Porting and Optimization License for the rights, obligations and
19  * limitations governing use of the contents of the file.
20  *
21  * RealNetworks is the developer of the Original Code and owns the
22  * copyrights in the portions it created.
23  *
24  * This file, and the files included with this file, is distributed and
25  * made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND,
26  * EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL
27  * SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT
29  * OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  * https://rarvcode-tck.helixcommunity.org
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point RealAudio 8 decoder
40  * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
41  * October 2003
42  *
43  * mlt.c - fast MLT/IMLT functions
44  **************************************************************************************/
45
46 #include "coder.h"
47 #include "assembly.h"
48
49 static const int postSkip[NUM_MLT_SIZES] = {7, 3, 1};
50
51 /**************************************************************************************
52  * Function:    PreMultiply
53  *
54  * Description: pre-twiddle stage of MDCT
55  *
56  * Inputs:      table index (for transform size)
57  *              buffer of nmlt samples
58  *
59  * Outputs:     processed samples in same buffer
60  *
61  * Return:      none
62  *
63  * Notes:       minimum 1 GB in, 2 GB out - loses (1+tabidx) int bits
64  *              normalization by 2/sqrt(N) is rolled into tables here
65  *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
66  *              should asm code (compiler not doing free pointer updates, etc.)
67  **************************************************************************************/
68 static void PreMultiply(int tabidx, int *zbuf1)
69 {
70     int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
71     int t, cms2, cps2a, sin2a, cps2b, sin2b;
72     int *zbuf2;
73     const int *csptr;
74
75     nmlt = nmltTab[tabidx];
76     zbuf2 = zbuf1 + nmlt - 1;
77     csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
78
79     /* whole thing should fit in registers - verify that compiler does this */
80     for (i = nmlt >> 2; i != 0; i--) {
81         /* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
82         cps2a = *csptr++;
83         sin2a = *csptr++;
84         cps2b = *csptr++;
85         sin2b = *csptr++;
86
87         ar1 = *(zbuf1 + 0);
88         ai2 = *(zbuf1 + 1);
89         ai1 = *(zbuf2 + 0);
90         ar2 = *(zbuf2 - 1);
91
92         /* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
93         t  = MULSHIFT32(sin2a, ar1 + ai1);
94         z2 = MULSHIFT32(cps2a, ai1) - t;
95         cms2 = cps2a - 2 * sin2a;
96         z1 = MULSHIFT32(cms2, ar1) + t;
97         *zbuf1++ = z1;
98         *zbuf1++ = z2;
99
100         t  = MULSHIFT32(sin2b, ar2 + ai2);
101         z2 = MULSHIFT32(cps2b, ai2) - t;
102         cms2 = cps2b - 2 * sin2b;
103         z1 = MULSHIFT32(cms2, ar2) + t;
104         *zbuf2-- = z2;
105         *zbuf2-- = z1;
106
107     }
108
109     /* Note on scaling...
110      * assumes 1 guard bit in, gains (1 + tabidx) fraction bits
111      *   i.e. gain 1, 2, or 3 fraction bits, for nSamps = 256, 512, 1024
112      *   (left-shifting, since table scaled by 2 / sqrt(nSamps))
113      * this offsets the fact that each extra pass in FFT gains one more int bit
114      */
115     return;
116 }
117
118 /**************************************************************************************
119  * Function:    PostMultiply
120  *
121  * Description: post-twiddle stage of MDCT
122  *
123  * Inputs:      table index (for transform size)
124  *              buffer of nmlt samples
125  *
126  * Outputs:     processed samples in same buffer
127  *
128  * Return:      none
129  *
130  * Notes:       minimum 1 GB in, 2 GB out - gains 1 int bit
131  *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
132  *              should asm code (compiler not doing free pointer updates, etc.)
133  **************************************************************************************/
134 static void PostMultiply(int tabidx, int *fft1)
135 {
136     int i, nmlt, ar1, ai1, ar2, ai2, skipFactor;
137     int t, cms2, cps2, sin2;
138     int *fft2;
139     const int *csptr;
140
141     nmlt = nmltTab[tabidx];
142     csptr = cos1sin1tab;
143     skipFactor = postSkip[tabidx];
144     fft2 = fft1 + nmlt - 1;
145
146     /* load coeffs for first pass
147      * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
148      */
149     cps2 = *csptr++;
150     sin2 = *csptr;
151     csptr += skipFactor;
152     cms2 = cps2 - 2 * sin2;
153
154     for (i = nmlt >> 2; i != 0; i--) {
155         ar1 = *(fft1 + 0);
156         ai1 = *(fft1 + 1);
157         ar2 = *(fft2 - 1);
158         ai2 = *(fft2 + 0);
159
160         /* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5*sin */
161         t = MULSHIFT32(sin2, ar1 + ai1);
162         *fft2-- = t - MULSHIFT32(cps2, ai1);
163         *fft1++ = t + MULSHIFT32(cms2, ar1);
164
165         cps2 = *csptr++;
166         sin2 = *csptr;
167         csptr += skipFactor;
168
169         ai2 = -ai2;
170         t = MULSHIFT32(sin2, ar2 + ai2);
171         *fft2-- = t - MULSHIFT32(cps2, ai2);
172         cms2 = cps2 - 2 * sin2;
173         *fft1++ = t + MULSHIFT32(cms2, ar2);
174
175     }
176
177     /* Note on scaling...
178      * assumes 1 guard bit in, gains 2 int bits
179      * max gain of abs(cos) + abs(sin) = sqrt(2) = 1.414, so current format
180      *   guarantees 1 guard bit in output
181      */
182     return;
183 }
184
185 /**************************************************************************************
186  * Function:    PreMultiplyRescale
187  *
188  * Description: pre-twiddle stage of MDCT, with rescaling for extra guard bits
189  *
190  * Inputs:      table index (for transform size)
191  *              buffer of nmlt samples
192  *              number of guard bits to add to input before processing
193  *
194  * Outputs:     processed samples in same buffer
195  *
196  * Return:      none
197  *
198  * Notes:       see notes on PreMultiply(), above
199  **************************************************************************************/
200 static void PreMultiplyRescale(int tabidx, int *zbuf1, int es)
201 {
202     int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
203     int t, cms2, cps2a, sin2a, cps2b, sin2b;
204     int *zbuf2;
205     const int *csptr;
206
207     nmlt = nmltTab[tabidx];
208     zbuf2 = zbuf1 + nmlt - 1;
209     csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
210
211     /* whole thing should fit in registers - verify that compiler does this */
212     for (i = nmlt >> 2; i != 0; i--) {
213         /* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
214         cps2a = *csptr++;
215         sin2a = *csptr++;
216         cps2b = *csptr++;
217         sin2b = *csptr++;
218
219         ar1 = *(zbuf1 + 0) >> es;
220         ai1 = *(zbuf2 + 0) >> es;
221         ai2 = *(zbuf1 + 1) >> es;
222
223         /* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
224         t  = MULSHIFT32(sin2a, ar1 + ai1);
225         z2 = MULSHIFT32(cps2a, ai1) - t;
226         cms2 = cps2a - 2 * sin2a;
227         z1 = MULSHIFT32(cms2, ar1) + t;
228         *zbuf1++ = z1;
229         *zbuf1++ = z2;
230
231         ar2 = *(zbuf2 - 1) >> es;   /* do here to free up register used for es */
232
233         t  = MULSHIFT32(sin2b, ar2 + ai2);
234         z2 = MULSHIFT32(cps2b, ai2) - t;
235         cms2 = cps2b - 2 * sin2b;
236         z1 = MULSHIFT32(cms2, ar2) + t;
237         *zbuf2-- = z2;
238         *zbuf2-- = z1;
239
240     }
241
242     /* see comments in PreMultiply() for notes on scaling */
243     return;
244 }
245
246 /**************************************************************************************
247  * Function:    PostMultiplyRescale
248  *
249  * Description: post-twiddle stage of MDCT, with rescaling for extra guard bits
250  *
251  * Inputs:      table index (for transform size)
252  *              buffer of nmlt samples
253  *              number of guard bits to remove from output
254  *
255  * Outputs:     processed samples in same buffer
256  *
257  * Return:      none
258  *
259  * Notes:       clips output to [-2^30, 2^30 - 1], guaranteeing at least 1 guard bit
260  *              see notes on PostMultiply(), above
261  **************************************************************************************/
262 static void PostMultiplyRescale(int tabidx, int *fft1, int es)
263 {
264     int i, nmlt, ar1, ai1, ar2, ai2, skipFactor, z;
265     int t, cs2, sin2;
266     int *fft2;
267     const int *csptr;
268
269     nmlt = nmltTab[tabidx];
270     csptr = cos1sin1tab;
271     skipFactor = postSkip[tabidx];
272     fft2 = fft1 + nmlt - 1;
273
274     /* load coeffs for first pass
275      * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
276      */
277     cs2 = *csptr++;
278     sin2 = *csptr;
279     csptr += skipFactor;
280
281     for (i = nmlt >> 2; i != 0; i--) {
282         ar1 = *(fft1 + 0);
283         ai1 = *(fft1 + 1);
284         ai2 = *(fft2 + 0);
285
286         /* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5*sin */
287         t = MULSHIFT32(sin2, ar1 + ai1);
288         z = t - MULSHIFT32(cs2, ai1);
289         CLIP_2N_SHIFT(z, es);
290         *fft2-- = z;
291         cs2 -= 2 * sin2;
292         z = t + MULSHIFT32(cs2, ar1);
293         CLIP_2N_SHIFT(z, es);
294         *fft1++ = z;
295
296         cs2 = *csptr++;
297         sin2 = *csptr;
298         csptr += skipFactor;
299
300         ar2 = *fft2;
301         ai2 = -ai2;
302         t = MULSHIFT32(sin2, ar2 + ai2);
303         z = t - MULSHIFT32(cs2, ai2);
304         CLIP_2N_SHIFT(z, es);
305         *fft2-- = z;
306         cs2 -= 2 * sin2;
307         z = t + MULSHIFT32(cs2, ar2);
308         CLIP_2N_SHIFT(z, es);
309         *fft1++ = z;
310         cs2 += 2 * sin2;
311
312     }
313
314     /* see comments in PostMultiply() for notes on scaling */
315     return;
316 }
317
318 /**************************************************************************************
319  * Function:    IMLTNoWindow
320  *
321  * Description: inverse MLT without window or overlap-add
322  *
323  * Inputs:      table index (for transform size)
324  *              buffer of nmlt samples
325  *              number of guard bits in the input buffer
326  *
327  * Outputs:     processed samples in same buffer
328  *
329  * Return:      none
330  *
331  * Notes:       operates in-place, and generates nmlt output samples from nmlt input
332  *                samples (doesn't do synthesis window which expands to 2*nmlt samples)
333  *              if number of guard bits in input is < GBITS_IN_IMLT, the input is
334  *                scaled (>>) before the IMLT and rescaled (<<, with clipping) after
335  *                the IMLT (rare)
336  *              the output has FBITS_LOST_IMLT fewer fraction bits than the input
337  *              the output will always have at least 1 guard bit
338  **************************************************************************************/
339 void IMLTNoWindow(int tabidx, int *mlt, int gb)
340 {
341     int es;
342
343     /* fast in-place DCT-IV - adds guard bits if necessary */
344     if (gb < GBITS_IN_IMLT) {
345         es = GBITS_IN_IMLT - gb;
346         PreMultiplyRescale(tabidx, mlt, es);
347         R4FFT(tabidx, mlt);
348         PostMultiplyRescale(tabidx, mlt, es);
349     } else {
350         PreMultiply(tabidx, mlt);
351         R4FFT(tabidx, mlt);
352         PostMultiply(tabidx, mlt);
353     }
354
355     return;
356 }
357
358
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: mlt.c,v 1.5 2005/04/27 19:20:50 hubbe Exp $
3	*
4	* REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM
5	* Portions Copyright (c) 1995-2002 RealNetworks, Inc.
6	* All Rights Reserved.
7	*
8	* The contents of this file, and the files included with this file,
9	* are subject to the current version of the Real Format Source Code
10	* Porting and Optimization License, available at
11	* https://helixcommunity.org/2005/license/realformatsource (unless
12	* RealNetworks otherwise expressly agrees in writing that you are
13	* subject to a different license). You may also obtain the license
14	* terms directly from RealNetworks. You may not use this file except
15	* in compliance with the Real Format Source Code Porting and
16	* Optimization License. There are no redistribution rights for the
17	* source code of this file. Please see the Real Format Source Code
18	* Porting and Optimization License for the rights, obligations and
19	* limitations governing use of the contents of the file.
20	*
21	* RealNetworks is the developer of the Original Code and owns the
22	* copyrights in the portions it created.
23	*
24	* This file, and the files included with this file, is distributed and
25	* made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND,
26	* EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL
27	* SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
28	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT
29	* OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* https://rarvcode-tck.helixcommunity.org
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point RealAudio 8 decoder
40	* Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
41	* October 2003
42	*
43	* mlt.c - fast MLT/IMLT functions
44	**************************************************************************************/
45
46	#include "coder.h"
47	#include "assembly.h"
48
49	static const int postSkip[NUM_MLT_SIZES] = {7, 3, 1};
50
51	/**************************************************************************************
52	* Function: PreMultiply
53	*
54	* Description: pre-twiddle stage of MDCT
55	*
56	* Inputs: table index (for transform size)
57	* buffer of nmlt samples
58	*
59	* Outputs: processed samples in same buffer
60	*
61	* Return: none
62	*
63	* Notes: minimum 1 GB in, 2 GB out - loses (1+tabidx) int bits
64	* normalization by 2/sqrt(N) is rolled into tables here
65	* uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
66	* should asm code (compiler not doing free pointer updates, etc.)
67	**************************************************************************************/
68	static void PreMultiply(int tabidx, int *zbuf1)
69	{
70	int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
71	int t, cms2, cps2a, sin2a, cps2b, sin2b;
72	int *zbuf2;
73	const int *csptr;
74
75	nmlt = nmltTab[tabidx];
76	zbuf2 = zbuf1 + nmlt - 1;
77	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
78
79	/* whole thing should fit in registers - verify that compiler does this */
80	for (i = nmlt >> 2; i != 0; i--) {
81	/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
82	cps2a = *csptr++;
83	sin2a = *csptr++;
84	cps2b = *csptr++;
85	sin2b = *csptr++;
86
87	ar1 = *(zbuf1 + 0);
88	ai2 = *(zbuf1 + 1);
89	ai1 = *(zbuf2 + 0);
90	ar2 = *(zbuf2 - 1);
91
92	/* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
93	t = MULSHIFT32(sin2a, ar1 + ai1);
94	z2 = MULSHIFT32(cps2a, ai1) - t;
95	cms2 = cps2a - 2 * sin2a;
96	z1 = MULSHIFT32(cms2, ar1) + t;
97	*zbuf1++ = z1;
98	*zbuf1++ = z2;
99
100	t = MULSHIFT32(sin2b, ar2 + ai2);
101	z2 = MULSHIFT32(cps2b, ai2) - t;
102	cms2 = cps2b - 2 * sin2b;
103	z1 = MULSHIFT32(cms2, ar2) + t;
104	*zbuf2-- = z2;
105	*zbuf2-- = z1;
106
107	}
108
109	/* Note on scaling...
110	* assumes 1 guard bit in, gains (1 + tabidx) fraction bits
111	* i.e. gain 1, 2, or 3 fraction bits, for nSamps = 256, 512, 1024
112	* (left-shifting, since table scaled by 2 / sqrt(nSamps))
113	* this offsets the fact that each extra pass in FFT gains one more int bit
114	*/
115	return;
116	}
117
118	/**************************************************************************************
119	* Function: PostMultiply
120	*
121	* Description: post-twiddle stage of MDCT
122	*
123	* Inputs: table index (for transform size)
124	* buffer of nmlt samples
125	*
126	* Outputs: processed samples in same buffer
127	*
128	* Return: none
129	*
130	* Notes: minimum 1 GB in, 2 GB out - gains 1 int bit
131	* uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
132	* should asm code (compiler not doing free pointer updates, etc.)
133	**************************************************************************************/
134	static void PostMultiply(int tabidx, int *fft1)
135	{
136	int i, nmlt, ar1, ai1, ar2, ai2, skipFactor;
137	int t, cms2, cps2, sin2;
138	int *fft2;
139	const int *csptr;
140
141	nmlt = nmltTab[tabidx];
142	csptr = cos1sin1tab;
143	skipFactor = postSkip[tabidx];
144	fft2 = fft1 + nmlt - 1;
145
146	/* load coeffs for first pass
147	* cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
148	*/
149	cps2 = *csptr++;
150	sin2 = *csptr;
151	csptr += skipFactor;
152	cms2 = cps2 - 2 * sin2;
153
154	for (i = nmlt >> 2; i != 0; i--) {
155	ar1 = *(fft1 + 0);
156	ai1 = *(fft1 + 1);
157	ar2 = *(fft2 - 1);
158	ai2 = *(fft2 + 0);
159
160	/* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5sin /
161	t = MULSHIFT32(sin2, ar1 + ai1);
162	*fft2-- = t - MULSHIFT32(cps2, ai1);
163	*fft1++ = t + MULSHIFT32(cms2, ar1);
164
165	cps2 = *csptr++;
166	sin2 = *csptr;
167	csptr += skipFactor;
168
169	ai2 = -ai2;
170	t = MULSHIFT32(sin2, ar2 + ai2);
171	*fft2-- = t - MULSHIFT32(cps2, ai2);
172	cms2 = cps2 - 2 * sin2;
173	*fft1++ = t + MULSHIFT32(cms2, ar2);
174
175	}
176
177	/* Note on scaling...
178	* assumes 1 guard bit in, gains 2 int bits
179	* max gain of abs(cos) + abs(sin) = sqrt(2) = 1.414, so current format
180	* guarantees 1 guard bit in output
181	*/
182	return;
183	}
184
185	/**************************************************************************************
186	* Function: PreMultiplyRescale
187	*
188	* Description: pre-twiddle stage of MDCT, with rescaling for extra guard bits
189	*
190	* Inputs: table index (for transform size)
191	* buffer of nmlt samples
192	* number of guard bits to add to input before processing
193	*
194	* Outputs: processed samples in same buffer
195	*
196	* Return: none
197	*
198	* Notes: see notes on PreMultiply(), above
199	**************************************************************************************/
200	static void PreMultiplyRescale(int tabidx, int *zbuf1, int es)
201	{
202	int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
203	int t, cms2, cps2a, sin2a, cps2b, sin2b;
204	int *zbuf2;
205	const int *csptr;
206
207	nmlt = nmltTab[tabidx];
208	zbuf2 = zbuf1 + nmlt - 1;
209	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
210
211	/* whole thing should fit in registers - verify that compiler does this */
212	for (i = nmlt >> 2; i != 0; i--) {
213	/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
214	cps2a = *csptr++;
215	sin2a = *csptr++;
216	cps2b = *csptr++;
217	sin2b = *csptr++;
218
219	ar1 = *(zbuf1 + 0) >> es;
220	ai1 = *(zbuf2 + 0) >> es;
221	ai2 = *(zbuf1 + 1) >> es;
222
223	/* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
224	t = MULSHIFT32(sin2a, ar1 + ai1);
225	z2 = MULSHIFT32(cps2a, ai1) - t;
226	cms2 = cps2a - 2 * sin2a;
227	z1 = MULSHIFT32(cms2, ar1) + t;
228	*zbuf1++ = z1;
229	*zbuf1++ = z2;
230
231	ar2 = (zbuf2 - 1) >> es; / do here to free up register used for es */
232
233	t = MULSHIFT32(sin2b, ar2 + ai2);
234	z2 = MULSHIFT32(cps2b, ai2) - t;
235	cms2 = cps2b - 2 * sin2b;
236	z1 = MULSHIFT32(cms2, ar2) + t;
237	*zbuf2-- = z2;
238	*zbuf2-- = z1;
239
240	}
241
242	/* see comments in PreMultiply() for notes on scaling */
243	return;
244	}
245
246	/**************************************************************************************
247	* Function: PostMultiplyRescale
248	*
249	* Description: post-twiddle stage of MDCT, with rescaling for extra guard bits
250	*
251	* Inputs: table index (for transform size)
252	* buffer of nmlt samples
253	* number of guard bits to remove from output
254	*
255	* Outputs: processed samples in same buffer
256	*
257	* Return: none
258	*
259	* Notes: clips output to [-2^30, 2^30 - 1], guaranteeing at least 1 guard bit
260	* see notes on PostMultiply(), above
261	**************************************************************************************/
262	static void PostMultiplyRescale(int tabidx, int *fft1, int es)
263	{
264	int i, nmlt, ar1, ai1, ar2, ai2, skipFactor, z;
265	int t, cs2, sin2;
266	int *fft2;
267	const int *csptr;
268
269	nmlt = nmltTab[tabidx];
270	csptr = cos1sin1tab;
271	skipFactor = postSkip[tabidx];
272	fft2 = fft1 + nmlt - 1;
273
274	/* load coeffs for first pass
275	* cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
276	*/
277	cs2 = *csptr++;
278	sin2 = *csptr;
279	csptr += skipFactor;
280
281	for (i = nmlt >> 2; i != 0; i--) {
282	ar1 = *(fft1 + 0);
283	ai1 = *(fft1 + 1);
284	ai2 = *(fft2 + 0);
285
286	/* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5sin /
287	t = MULSHIFT32(sin2, ar1 + ai1);
288	z = t - MULSHIFT32(cs2, ai1);
289	CLIP_2N_SHIFT(z, es);
290	*fft2-- = z;
291	cs2 -= 2 * sin2;
292	z = t + MULSHIFT32(cs2, ar1);
293	CLIP_2N_SHIFT(z, es);
294	*fft1++ = z;
295
296	cs2 = *csptr++;
297	sin2 = *csptr;
298	csptr += skipFactor;
299
300	ar2 = *fft2;
301	ai2 = -ai2;
302	t = MULSHIFT32(sin2, ar2 + ai2);
303	z = t - MULSHIFT32(cs2, ai2);
304	CLIP_2N_SHIFT(z, es);
305	*fft2-- = z;
306	cs2 -= 2 * sin2;
307	z = t + MULSHIFT32(cs2, ar2);
308	CLIP_2N_SHIFT(z, es);
309	*fft1++ = z;
310	cs2 += 2 * sin2;
311
312	}
313
314	/* see comments in PostMultiply() for notes on scaling */
315	return;
316	}
317
318	/**************************************************************************************
319	* Function: IMLTNoWindow
320	*
321	* Description: inverse MLT without window or overlap-add
322	*
323	* Inputs: table index (for transform size)
324	* buffer of nmlt samples
325	* number of guard bits in the input buffer
326	*
327	* Outputs: processed samples in same buffer
328	*
329	* Return: none
330	*
331	* Notes: operates in-place, and generates nmlt output samples from nmlt input
332	* samples (doesn't do synthesis window which expands to 2*nmlt samples)
333	* if number of guard bits in input is < GBITS_IN_IMLT, the input is
334	* scaled (>>) before the IMLT and rescaled (<<, with clipping) after
335	* the IMLT (rare)
336	* the output has FBITS_LOST_IMLT fewer fraction bits than the input
337	* the output will always have at least 1 guard bit
338	**************************************************************************************/
339	void IMLTNoWindow(int tabidx, int *mlt, int gb)
340	{
341	int es;
342
343	/* fast in-place DCT-IV - adds guard bits if necessary */
344	if (gb < GBITS_IN_IMLT) {
345	es = GBITS_IN_IMLT - gb;
346	PreMultiplyRescale(tabidx, mlt, es);
347	R4FFT(tabidx, mlt);
348	PostMultiplyRescale(tabidx, mlt, es);
349	} else {
350	PreMultiply(tabidx, mlt);
351	R4FFT(tabidx, mlt);
352	PostMultiply(tabidx, mlt);
353	}
354
355	return;
356	}
357
358