summaryrefslogtreecommitdiff
path: root/audio_codec/libcook/ra_mlt.c (plain)
blob: 6947ca1c508c5a579daf2c62405b44eecde5c74b
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: mlt.c,v 1.5 2005/04/27 19:20:50 hubbe Exp $
3 *
4 * REALNETWORKS CONFIDENTIAL--NOT FOR DISTRIBUTION IN SOURCE CODE FORM
5 * Portions Copyright (c) 1995-2002 RealNetworks, Inc.
6 * All Rights Reserved.
7 *
8 * The contents of this file, and the files included with this file,
9 * are subject to the current version of the Real Format Source Code
10 * Porting and Optimization License, available at
11 * https://helixcommunity.org/2005/license/realformatsource (unless
12 * RealNetworks otherwise expressly agrees in writing that you are
13 * subject to a different license). You may also obtain the license
14 * terms directly from RealNetworks. You may not use this file except
15 * in compliance with the Real Format Source Code Porting and
16 * Optimization License. There are no redistribution rights for the
17 * source code of this file. Please see the Real Format Source Code
18 * Porting and Optimization License for the rights, obligations and
19 * limitations governing use of the contents of the file.
20 *
21 * RealNetworks is the developer of the Original Code and owns the
22 * copyrights in the portions it created.
23 *
24 * This file, and the files included with this file, is distributed and
25 * made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND,
26 * EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL
27 * SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT
29 * OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * https://rarvcode-tck.helixcommunity.org
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point RealAudio 8 decoder
40 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
41 * October 2003
42 *
43 * mlt.c - fast MLT/IMLT functions
44 **************************************************************************************/
45
46#include "coder.h"
47#include "assembly.h"
48
49static const int postSkip[NUM_MLT_SIZES] = {7, 3, 1};
50
51/**************************************************************************************
52 * Function: PreMultiply
53 *
54 * Description: pre-twiddle stage of MDCT
55 *
56 * Inputs: table index (for transform size)
57 * buffer of nmlt samples
58 *
59 * Outputs: processed samples in same buffer
60 *
61 * Return: none
62 *
63 * Notes: minimum 1 GB in, 2 GB out - loses (1+tabidx) int bits
64 * normalization by 2/sqrt(N) is rolled into tables here
65 * uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
66 * should asm code (compiler not doing free pointer updates, etc.)
67 **************************************************************************************/
68static void PreMultiply(int tabidx, int *zbuf1)
69{
70 int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
71 int t, cms2, cps2a, sin2a, cps2b, sin2b;
72 int *zbuf2;
73 const int *csptr;
74
75 nmlt = nmltTab[tabidx];
76 zbuf2 = zbuf1 + nmlt - 1;
77 csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
78
79 /* whole thing should fit in registers - verify that compiler does this */
80 for (i = nmlt >> 2; i != 0; i--) {
81 /* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
82 cps2a = *csptr++;
83 sin2a = *csptr++;
84 cps2b = *csptr++;
85 sin2b = *csptr++;
86
87 ar1 = *(zbuf1 + 0);
88 ai2 = *(zbuf1 + 1);
89 ai1 = *(zbuf2 + 0);
90 ar2 = *(zbuf2 - 1);
91
92 /* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
93 t = MULSHIFT32(sin2a, ar1 + ai1);
94 z2 = MULSHIFT32(cps2a, ai1) - t;
95 cms2 = cps2a - 2 * sin2a;
96 z1 = MULSHIFT32(cms2, ar1) + t;
97 *zbuf1++ = z1;
98 *zbuf1++ = z2;
99
100 t = MULSHIFT32(sin2b, ar2 + ai2);
101 z2 = MULSHIFT32(cps2b, ai2) - t;
102 cms2 = cps2b - 2 * sin2b;
103 z1 = MULSHIFT32(cms2, ar2) + t;
104 *zbuf2-- = z2;
105 *zbuf2-- = z1;
106
107 }
108
109 /* Note on scaling...
110 * assumes 1 guard bit in, gains (1 + tabidx) fraction bits
111 * i.e. gain 1, 2, or 3 fraction bits, for nSamps = 256, 512, 1024
112 * (left-shifting, since table scaled by 2 / sqrt(nSamps))
113 * this offsets the fact that each extra pass in FFT gains one more int bit
114 */
115 return;
116}
117
118/**************************************************************************************
119 * Function: PostMultiply
120 *
121 * Description: post-twiddle stage of MDCT
122 *
123 * Inputs: table index (for transform size)
124 * buffer of nmlt samples
125 *
126 * Outputs: processed samples in same buffer
127 *
128 * Return: none
129 *
130 * Notes: minimum 1 GB in, 2 GB out - gains 1 int bit
131 * uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
132 * should asm code (compiler not doing free pointer updates, etc.)
133 **************************************************************************************/
134static void PostMultiply(int tabidx, int *fft1)
135{
136 int i, nmlt, ar1, ai1, ar2, ai2, skipFactor;
137 int t, cms2, cps2, sin2;
138 int *fft2;
139 const int *csptr;
140
141 nmlt = nmltTab[tabidx];
142 csptr = cos1sin1tab;
143 skipFactor = postSkip[tabidx];
144 fft2 = fft1 + nmlt - 1;
145
146 /* load coeffs for first pass
147 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
148 */
149 cps2 = *csptr++;
150 sin2 = *csptr;
151 csptr += skipFactor;
152 cms2 = cps2 - 2 * sin2;
153
154 for (i = nmlt >> 2; i != 0; i--) {
155 ar1 = *(fft1 + 0);
156 ai1 = *(fft1 + 1);
157 ar2 = *(fft2 - 1);
158 ai2 = *(fft2 + 0);
159
160 /* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5*sin */
161 t = MULSHIFT32(sin2, ar1 + ai1);
162 *fft2-- = t - MULSHIFT32(cps2, ai1);
163 *fft1++ = t + MULSHIFT32(cms2, ar1);
164
165 cps2 = *csptr++;
166 sin2 = *csptr;
167 csptr += skipFactor;
168
169 ai2 = -ai2;
170 t = MULSHIFT32(sin2, ar2 + ai2);
171 *fft2-- = t - MULSHIFT32(cps2, ai2);
172 cms2 = cps2 - 2 * sin2;
173 *fft1++ = t + MULSHIFT32(cms2, ar2);
174
175 }
176
177 /* Note on scaling...
178 * assumes 1 guard bit in, gains 2 int bits
179 * max gain of abs(cos) + abs(sin) = sqrt(2) = 1.414, so current format
180 * guarantees 1 guard bit in output
181 */
182 return;
183}
184
185/**************************************************************************************
186 * Function: PreMultiplyRescale
187 *
188 * Description: pre-twiddle stage of MDCT, with rescaling for extra guard bits
189 *
190 * Inputs: table index (for transform size)
191 * buffer of nmlt samples
192 * number of guard bits to add to input before processing
193 *
194 * Outputs: processed samples in same buffer
195 *
196 * Return: none
197 *
198 * Notes: see notes on PreMultiply(), above
199 **************************************************************************************/
200static void PreMultiplyRescale(int tabidx, int *zbuf1, int es)
201{
202 int i, nmlt, ar1, ai1, ar2, ai2, z1, z2;
203 int t, cms2, cps2a, sin2a, cps2b, sin2b;
204 int *zbuf2;
205 const int *csptr;
206
207 nmlt = nmltTab[tabidx];
208 zbuf2 = zbuf1 + nmlt - 1;
209 csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
210
211 /* whole thing should fit in registers - verify that compiler does this */
212 for (i = nmlt >> 2; i != 0; i--) {
213 /* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
214 cps2a = *csptr++;
215 sin2a = *csptr++;
216 cps2b = *csptr++;
217 sin2b = *csptr++;
218
219 ar1 = *(zbuf1 + 0) >> es;
220 ai1 = *(zbuf2 + 0) >> es;
221 ai2 = *(zbuf1 + 1) >> es;
222
223 /* gain 1 int bit from MULSHIFT32, but drop 2, 3, or 4 int bits from table scaling */
224 t = MULSHIFT32(sin2a, ar1 + ai1);
225 z2 = MULSHIFT32(cps2a, ai1) - t;
226 cms2 = cps2a - 2 * sin2a;
227 z1 = MULSHIFT32(cms2, ar1) + t;
228 *zbuf1++ = z1;
229 *zbuf1++ = z2;
230
231 ar2 = *(zbuf2 - 1) >> es; /* do here to free up register used for es */
232
233 t = MULSHIFT32(sin2b, ar2 + ai2);
234 z2 = MULSHIFT32(cps2b, ai2) - t;
235 cms2 = cps2b - 2 * sin2b;
236 z1 = MULSHIFT32(cms2, ar2) + t;
237 *zbuf2-- = z2;
238 *zbuf2-- = z1;
239
240 }
241
242 /* see comments in PreMultiply() for notes on scaling */
243 return;
244}
245
246/**************************************************************************************
247 * Function: PostMultiplyRescale
248 *
249 * Description: post-twiddle stage of MDCT, with rescaling for extra guard bits
250 *
251 * Inputs: table index (for transform size)
252 * buffer of nmlt samples
253 * number of guard bits to remove from output
254 *
255 * Outputs: processed samples in same buffer
256 *
257 * Return: none
258 *
259 * Notes: clips output to [-2^30, 2^30 - 1], guaranteeing at least 1 guard bit
260 * see notes on PostMultiply(), above
261 **************************************************************************************/
262static void PostMultiplyRescale(int tabidx, int *fft1, int es)
263{
264 int i, nmlt, ar1, ai1, ar2, ai2, skipFactor, z;
265 int t, cs2, sin2;
266 int *fft2;
267 const int *csptr;
268
269 nmlt = nmltTab[tabidx];
270 csptr = cos1sin1tab;
271 skipFactor = postSkip[tabidx];
272 fft2 = fft1 + nmlt - 1;
273
274 /* load coeffs for first pass
275 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
276 */
277 cs2 = *csptr++;
278 sin2 = *csptr;
279 csptr += skipFactor;
280
281 for (i = nmlt >> 2; i != 0; i--) {
282 ar1 = *(fft1 + 0);
283 ai1 = *(fft1 + 1);
284 ai2 = *(fft2 + 0);
285
286 /* gain 1 int bit from MULSHIFT32, and one since coeffs are stored as 0.5 * (cos+sin), 0.5*sin */
287 t = MULSHIFT32(sin2, ar1 + ai1);
288 z = t - MULSHIFT32(cs2, ai1);
289 CLIP_2N_SHIFT(z, es);
290 *fft2-- = z;
291 cs2 -= 2 * sin2;
292 z = t + MULSHIFT32(cs2, ar1);
293 CLIP_2N_SHIFT(z, es);
294 *fft1++ = z;
295
296 cs2 = *csptr++;
297 sin2 = *csptr;
298 csptr += skipFactor;
299
300 ar2 = *fft2;
301 ai2 = -ai2;
302 t = MULSHIFT32(sin2, ar2 + ai2);
303 z = t - MULSHIFT32(cs2, ai2);
304 CLIP_2N_SHIFT(z, es);
305 *fft2-- = z;
306 cs2 -= 2 * sin2;
307 z = t + MULSHIFT32(cs2, ar2);
308 CLIP_2N_SHIFT(z, es);
309 *fft1++ = z;
310 cs2 += 2 * sin2;
311
312 }
313
314 /* see comments in PostMultiply() for notes on scaling */
315 return;
316}
317
318/**************************************************************************************
319 * Function: IMLTNoWindow
320 *
321 * Description: inverse MLT without window or overlap-add
322 *
323 * Inputs: table index (for transform size)
324 * buffer of nmlt samples
325 * number of guard bits in the input buffer
326 *
327 * Outputs: processed samples in same buffer
328 *
329 * Return: none
330 *
331 * Notes: operates in-place, and generates nmlt output samples from nmlt input
332 * samples (doesn't do synthesis window which expands to 2*nmlt samples)
333 * if number of guard bits in input is < GBITS_IN_IMLT, the input is
334 * scaled (>>) before the IMLT and rescaled (<<, with clipping) after
335 * the IMLT (rare)
336 * the output has FBITS_LOST_IMLT fewer fraction bits than the input
337 * the output will always have at least 1 guard bit
338 **************************************************************************************/
339void IMLTNoWindow(int tabidx, int *mlt, int gb)
340{
341 int es;
342
343 /* fast in-place DCT-IV - adds guard bits if necessary */
344 if (gb < GBITS_IN_IMLT) {
345 es = GBITS_IN_IMLT - gb;
346 PreMultiplyRescale(tabidx, mlt, es);
347 R4FFT(tabidx, mlt);
348 PostMultiplyRescale(tabidx, mlt, es);
349 } else {
350 PreMultiply(tabidx, mlt);
351 R4FFT(tabidx, mlt);
352 PostMultiply(tabidx, mlt);
353 }
354
355 return;
356}
357
358