summaryrefslogtreecommitdiff
path: root/audio_codec/wfd_aac_decoder/sbrimdct.c (plain)
blob: abec00816b4fe96e847c9312370a755bd10a1782
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: sbrimdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $
3 *
4 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file,
7 * are subject to the current version of the RealNetworks Public
8 * Source License (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the current version of the RealNetworks Community
11 * Source License (the "RCSL") available at
12 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13 * will apply. You may also obtain the license terms directly from
14 * RealNetworks. You may not use this file except in compliance with
15 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16 * to this file, the RCSL. Please see the applicable RPSL or RCSL for
17 * the rights, obligations and limitations governing use of the
18 * contents of the file.
19 *
20 * This file is part of the Helix DNA Technology. RealNetworks is the
21 * developer of the Original Code and owns the copyrights in the
22 * portions it created.
23 *
24 * This file, and the files included with this file, is distributed
25 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29 * ENJOYMENT OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * http://www.helixcommunity.org/content/tck
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point HE-AAC decoder
40 * Jon Recker (jrecker@real.com)
41 * February 2005
42 *
43 * sbrimdct.c - inverse MDCT without clipping or interleaving, for input to SBR
44 **************************************************************************************/
45
46#include "coder.h"
47#include "assembly.h"
48
49/**************************************************************************************
50 * Function: DecWindowOverlapNoClip
51 *
52 * Description: apply synthesis window, do overlap-add without clipping,
53 * for winSequence LONG-LONG
54 *
55 * Inputs: input buffer (output of type-IV DCT)
56 * overlap buffer (saved from last time)
57 * window type (sin or KBD) for input buffer
58 * window type (sin or KBD) for overlap buffer
59 *
60 * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
61 *
62 * Return: none
63 *
64 * Notes: use this function when the decoded PCM is going to the SBR decoder
65 **************************************************************************************/
66void DecWindowOverlapNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
67{
68 int in, w0, w1, f0, f1;
69 int *buf1, *over1, *out1;
70 const int *wndPrev, *wndCurr;
71
72 buf0 += (1024 >> 1);
73 buf1 = buf0 - 1;
74 out1 = out0 + 1024 - 1;
75 over1 = over0 + 1024 - 1;
76
77 wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
78 if (winTypeCurr == winTypePrev) {
79 /* cut window loads in half since current and overlap sections use same symmetric window */
80 do {
81 w0 = *wndPrev++;
82 w1 = *wndPrev++;
83 in = *buf0++;
84
85 f0 = MULSHIFT32(w0, in);
86 f1 = MULSHIFT32(w1, in);
87
88 in = *over0;
89 *out0++ = in - f0;
90
91 in = *over1;
92 *out1-- = in + f1;
93
94 in = *buf1--;
95 *over1-- = MULSHIFT32(w0, in);
96 *over0++ = MULSHIFT32(w1, in);
97 } while (over0 < over1);
98 } else {
99 /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */
100 wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
101 do {
102 w0 = *wndPrev++;
103 w1 = *wndPrev++;
104 in = *buf0++;
105
106 f0 = MULSHIFT32(w0, in);
107 f1 = MULSHIFT32(w1, in);
108
109 in = *over0;
110 *out0++ = in - f0;
111
112 in = *over1;
113 *out1-- = in + f1;
114
115 w0 = *wndCurr++;
116 w1 = *wndCurr++;
117 in = *buf1--;
118
119 *over1-- = MULSHIFT32(w0, in);
120 *over0++ = MULSHIFT32(w1, in);
121 } while (over0 < over1);
122 }
123}
124
125/**************************************************************************************
126 * Function: DecWindowOverlapLongStart
127 *
128 * Description: apply synthesis window, do overlap-add, without clipping
129 * for winSequence LONG-START
130 *
131 * Inputs: input buffer (output of type-IV DCT)
132 * overlap buffer (saved from last time)
133 * window type (sin or KBD) for input buffer
134 * window type (sin or KBD) for overlap buffer
135 *
136 * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
137 *
138 * Return: none
139 *
140 * Notes: use this function when the decoded PCM is going to the SBR decoder
141 **************************************************************************************/
142void DecWindowOverlapLongStartNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
143{
144 int i, in, w0, w1, f0, f1;
145 int *buf1, *over1, *out1;
146 const int *wndPrev, *wndCurr;
147
148 buf0 += (1024 >> 1);
149 buf1 = buf0 - 1;
150 out1 = out0 + 1024 - 1;
151 over1 = over0 + 1024 - 1;
152
153 wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
154 i = 448; /* 2 outputs, 2 overlaps per loop */
155 do {
156 w0 = *wndPrev++;
157 w1 = *wndPrev++;
158 in = *buf0++;
159
160 f0 = MULSHIFT32(w0, in);
161 f1 = MULSHIFT32(w1, in);
162
163 in = *over0;
164 *out0++ = in - f0;
165
166 in = *over1;
167 *out1-- = in + f1;
168
169 in = *buf1--;
170
171 *over1-- = 0; /* Wn = 0 for n = (2047, 2046, ... 1600) */
172 *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */
173 } while (--i);
174
175 wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
176
177 /* do 64 more loops - 2 outputs, 2 overlaps per loop */
178 do {
179 w0 = *wndPrev++;
180 w1 = *wndPrev++;
181 in = *buf0++;
182
183 f0 = MULSHIFT32(w0, in);
184 f1 = MULSHIFT32(w1, in);
185
186 in = *over0;
187 *out0++ = in - f0;
188
189 in = *over1;
190 *out1-- = in + f1;
191
192 w0 = *wndCurr++; /* W[0], W[1], ... --> W[255], W[254], ... */
193 w1 = *wndCurr++; /* W[127], W[126], ... --> W[128], W[129], ... */
194 in = *buf1--;
195
196 *over1-- = MULSHIFT32(w0, in); /* Wn = short window for n = (1599, 1598, ... , 1536) */
197 *over0++ = MULSHIFT32(w1, in); /* Wn = short window for n = (1472, 1473, ... , 1535) */
198 } while (over0 < over1);
199}
200
201/**************************************************************************************
202 * Function: DecWindowOverlapLongStop
203 *
204 * Description: apply synthesis window, do overlap-add, without clipping
205 * for winSequence LONG-STOP
206 *
207 * Inputs: input buffer (output of type-IV DCT)
208 * overlap buffer (saved from last time)
209 * window type (sin or KBD) for input buffer
210 * window type (sin or KBD) for overlap buffer
211 *
212 * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
213 *
214 * Return: none
215 *
216 * Notes: use this function when the decoded PCM is going to the SBR decoder
217 **************************************************************************************/
218void DecWindowOverlapLongStopNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
219{
220 int i, in, w0, w1, f0, f1;
221 int *buf1, *over1, *out1;
222 const int *wndPrev, *wndCurr;
223
224 buf0 += (1024 >> 1);
225 buf1 = buf0 - 1;
226 out1 = out0 + 1024 - 1;
227 over1 = over0 + 1024 - 1;
228
229 wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
230 wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
231
232 i = 448; /* 2 outputs, 2 overlaps per loop */
233 do {
234 /* Wn = 0 for n = (0, 1, ... 447) */
235 /* Wn = 1 for n = (576, 577, ... 1023) */
236 in = *buf0++;
237 f1 = in >> 1; /* scale since skipping multiply by Q31 */
238
239 in = *over0;
240 *out0++ = in;
241
242 in = *over1;
243 *out1-- = in + f1;
244
245 w0 = *wndCurr++;
246 w1 = *wndCurr++;
247 in = *buf1--;
248
249 *over1-- = MULSHIFT32(w0, in);
250 *over0++ = MULSHIFT32(w1, in);
251 } while (--i);
252
253 /* do 64 more loops - 2 outputs, 2 overlaps per loop */
254 do {
255 w0 = *wndPrev++; /* W[0], W[1], ...W[63] */
256 w1 = *wndPrev++; /* W[127], W[126], ... W[64] */
257 in = *buf0++;
258
259 f0 = MULSHIFT32(w0, in);
260 f1 = MULSHIFT32(w1, in);
261
262 in = *over0;
263 *out0++ = in - f0;
264
265 in = *over1;
266 *out1-- = in + f1;
267
268 w0 = *wndCurr++;
269 w1 = *wndCurr++;
270 in = *buf1--;
271
272 *over1-- = MULSHIFT32(w0, in);
273 *over0++ = MULSHIFT32(w1, in);
274 } while (over0 < over1);
275}
276
277/**************************************************************************************
278 * Function: DecWindowOverlapShort
279 *
280 * Description: apply synthesis window, do overlap-add, without clipping
281 * for winSequence EIGHT-SHORT (does all 8 short blocks)
282 *
283 * Inputs: input buffer (output of type-IV DCT)
284 * overlap buffer (saved from last time)
285 * window type (sin or KBD) for input buffer
286 * window type (sin or KBD) for overlap buffer
287 *
288 * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
289 *
290 * Return: none
291 *
292 * Notes: use this function when the decoded PCM is going to the SBR decoder
293 **************************************************************************************/
294void DecWindowOverlapShortNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
295{
296 int i, in, w0, w1, f0, f1;
297 int *buf1, *over1, *out1;
298 const int *wndPrev, *wndCurr;
299
300 wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
301 wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
302
303 /* pcm[0-447] = 0 + overlap[0-447] */
304 i = 448;
305 do {
306 f0 = *over0++;
307 f1 = *over0++;
308 *out0++ = f0;
309 *out0++ = f1;
310 i -= 2;
311 } while (i);
312
313 /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */
314 out1 = out0 + (128 - 1);
315 over1 = over0 + 128 - 1;
316 buf0 += 64;
317 buf1 = buf0 - 1;
318 do {
319 w0 = *wndPrev++; /* W[0], W[1], ...W[63] */
320 w1 = *wndPrev++; /* W[127], W[126], ... W[64] */
321 in = *buf0++;
322
323 f0 = MULSHIFT32(w0, in);
324 f1 = MULSHIFT32(w1, in);
325
326 in = *over0;
327 *out0++ = in - f0;
328
329 in = *over1;
330 *out1-- = in + f1;
331
332 w0 = *wndCurr++;
333 w1 = *wndCurr++;
334 in = *buf1--;
335
336 /* save over0/over1 for next short block, in the slots just vacated */
337 *over1-- = MULSHIFT32(w0, in);
338 *over0++ = MULSHIFT32(w1, in);
339 } while (over0 < over1);
340
341 /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703]
342 * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831]
343 * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959]
344 */
345 for (i = 0; i < 3; i++) {
346 out0 += 64;
347 out1 = out0 + 128 - 1;
348 over0 += 64;
349 over1 = over0 + 128 - 1;
350 buf0 += 64;
351 buf1 = buf0 - 1;
352 wndCurr -= 128;
353
354 do {
355 w0 = *wndCurr++; /* W[0], W[1], ...W[63] */
356 w1 = *wndCurr++; /* W[127], W[126], ... W[64] */
357 in = *buf0++;
358
359 f0 = MULSHIFT32(w0, in);
360 f1 = MULSHIFT32(w1, in);
361
362 in = *(over0 - 128); /* from last short block */
363 in += *(over0 + 0); /* from last full frame */
364 *out0++ = in - f0;
365
366 in = *(over1 - 128); /* from last short block */
367 in += *(over1 + 0); /* from last full frame */
368 *out1-- = in + f1;
369
370 /* save over0/over1 for next short block, in the slots just vacated */
371 in = *buf1--;
372 *over1-- = MULSHIFT32(w0, in);
373 *over0++ = MULSHIFT32(w1, in);
374 } while (over0 < over1);
375 }
376
377 /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023]
378 * over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127]
379 */
380 out0 += 64;
381 over0 -= 832; /* points at overlap[64] */
382 over1 = over0 + 128 - 1; /* points at overlap[191] */
383 buf0 += 64;
384 buf1 = buf0 - 1;
385 wndCurr -= 128;
386 do {
387 w0 = *wndCurr++; /* W[0], W[1], ...W[63] */
388 w1 = *wndCurr++; /* W[127], W[126], ... W[64] */
389 in = *buf0++;
390
391 f0 = MULSHIFT32(w0, in);
392 f1 = MULSHIFT32(w1, in);
393
394 in = *(over0 + 768); /* from last short block */
395 in += *(over0 + 896); /* from last full frame */
396 *out0++ = in - f0;
397
398 in = *(over1 + 768); /* from last short block */
399 *(over1 - 128) = in + f1;
400
401 in = *buf1--;
402 *over1-- = MULSHIFT32(w0, in); /* save in overlap[128-191] */
403 *over0++ = MULSHIFT32(w1, in); /* save in overlap[64-127] */
404 } while (over0 < over1);
405
406 /* over0 now points at overlap[128] */
407
408 /* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127]
409 * over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127]
410 * over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127]
411 * over[448-576] = Wc[128-255] * block7[128-255]
412 */
413 for (i = 0; i < 3; i++) {
414 over0 += 64;
415 over1 = over0 + 128 - 1;
416 buf0 += 64;
417 buf1 = buf0 - 1;
418 wndCurr -= 128;
419 do {
420 w0 = *wndCurr++; /* W[0], W[1], ...W[63] */
421 w1 = *wndCurr++; /* W[127], W[126], ... W[64] */
422 in = *buf0++;
423
424 f0 = MULSHIFT32(w0, in);
425 f1 = MULSHIFT32(w1, in);
426
427 /* from last short block */
428 *(over0 - 128) -= f0;
429 *(over1 - 128) += f1;
430
431 in = *buf1--;
432 *over1-- = MULSHIFT32(w0, in);
433 *over0++ = MULSHIFT32(w1, in);
434 } while (over0 < over1);
435 }
436
437 /* over[576-1024] = 0 */
438 i = 448;
439 over0 += 64;
440 do {
441 *over0++ = 0;
442 *over0++ = 0;
443 *over0++ = 0;
444 *over0++ = 0;
445 i -= 4;
446 } while (i);
447}
448