blob: abec00816b4fe96e847c9312370a755bd10a1782
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: sbrimdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $ |
3 | * |
4 | * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. |
5 | * |
6 | * The contents of this file, and the files included with this file, |
7 | * are subject to the current version of the RealNetworks Public |
8 | * Source License (the "RPSL") available at |
9 | * http://www.helixcommunity.org/content/rpsl unless you have licensed |
10 | * the file under the current version of the RealNetworks Community |
11 | * Source License (the "RCSL") available at |
12 | * http://www.helixcommunity.org/content/rcsl, in which case the RCSL |
13 | * will apply. You may also obtain the license terms directly from |
14 | * RealNetworks. You may not use this file except in compliance with |
15 | * the RPSL or, if you have a valid RCSL with RealNetworks applicable |
16 | * to this file, the RCSL. Please see the applicable RPSL or RCSL for |
17 | * the rights, obligations and limitations governing use of the |
18 | * contents of the file. |
19 | * |
20 | * This file is part of the Helix DNA Technology. RealNetworks is the |
21 | * developer of the Original Code and owns the copyrights in the |
22 | * portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed |
25 | * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY |
26 | * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS |
27 | * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES |
28 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET |
29 | * ENJOYMENT OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * http://www.helixcommunity.org/content/tck |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point HE-AAC decoder |
40 | * Jon Recker (jrecker@real.com) |
41 | * February 2005 |
42 | * |
43 | * sbrimdct.c - inverse MDCT without clipping or interleaving, for input to SBR |
44 | **************************************************************************************/ |
45 | |
46 | #include "coder.h" |
47 | #include "assembly.h" |
48 | |
49 | /************************************************************************************** |
50 | * Function: DecWindowOverlapNoClip |
51 | * |
52 | * Description: apply synthesis window, do overlap-add without clipping, |
53 | * for winSequence LONG-LONG |
54 | * |
55 | * Inputs: input buffer (output of type-IV DCT) |
56 | * overlap buffer (saved from last time) |
57 | * window type (sin or KBD) for input buffer |
58 | * window type (sin or KBD) for overlap buffer |
59 | * |
60 | * Outputs: one channel, one frame of 32-bit PCM, non-interleaved |
61 | * |
62 | * Return: none |
63 | * |
64 | * Notes: use this function when the decoded PCM is going to the SBR decoder |
65 | **************************************************************************************/ |
66 | void DecWindowOverlapNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev) |
67 | { |
68 | int in, w0, w1, f0, f1; |
69 | int *buf1, *over1, *out1; |
70 | const int *wndPrev, *wndCurr; |
71 | |
72 | buf0 += (1024 >> 1); |
73 | buf1 = buf0 - 1; |
74 | out1 = out0 + 1024 - 1; |
75 | over1 = over0 + 1024 - 1; |
76 | |
77 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
78 | if (winTypeCurr == winTypePrev) { |
79 | /* cut window loads in half since current and overlap sections use same symmetric window */ |
80 | do { |
81 | w0 = *wndPrev++; |
82 | w1 = *wndPrev++; |
83 | in = *buf0++; |
84 | |
85 | f0 = MULSHIFT32(w0, in); |
86 | f1 = MULSHIFT32(w1, in); |
87 | |
88 | in = *over0; |
89 | *out0++ = in - f0; |
90 | |
91 | in = *over1; |
92 | *out1-- = in + f1; |
93 | |
94 | in = *buf1--; |
95 | *over1-- = MULSHIFT32(w0, in); |
96 | *over0++ = MULSHIFT32(w1, in); |
97 | } while (over0 < over1); |
98 | } else { |
99 | /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */ |
100 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
101 | do { |
102 | w0 = *wndPrev++; |
103 | w1 = *wndPrev++; |
104 | in = *buf0++; |
105 | |
106 | f0 = MULSHIFT32(w0, in); |
107 | f1 = MULSHIFT32(w1, in); |
108 | |
109 | in = *over0; |
110 | *out0++ = in - f0; |
111 | |
112 | in = *over1; |
113 | *out1-- = in + f1; |
114 | |
115 | w0 = *wndCurr++; |
116 | w1 = *wndCurr++; |
117 | in = *buf1--; |
118 | |
119 | *over1-- = MULSHIFT32(w0, in); |
120 | *over0++ = MULSHIFT32(w1, in); |
121 | } while (over0 < over1); |
122 | } |
123 | } |
124 | |
125 | /************************************************************************************** |
126 | * Function: DecWindowOverlapLongStart |
127 | * |
128 | * Description: apply synthesis window, do overlap-add, without clipping |
129 | * for winSequence LONG-START |
130 | * |
131 | * Inputs: input buffer (output of type-IV DCT) |
132 | * overlap buffer (saved from last time) |
133 | * window type (sin or KBD) for input buffer |
134 | * window type (sin or KBD) for overlap buffer |
135 | * |
136 | * Outputs: one channel, one frame of 32-bit PCM, non-interleaved |
137 | * |
138 | * Return: none |
139 | * |
140 | * Notes: use this function when the decoded PCM is going to the SBR decoder |
141 | **************************************************************************************/ |
142 | void DecWindowOverlapLongStartNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev) |
143 | { |
144 | int i, in, w0, w1, f0, f1; |
145 | int *buf1, *over1, *out1; |
146 | const int *wndPrev, *wndCurr; |
147 | |
148 | buf0 += (1024 >> 1); |
149 | buf1 = buf0 - 1; |
150 | out1 = out0 + 1024 - 1; |
151 | over1 = over0 + 1024 - 1; |
152 | |
153 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
154 | i = 448; /* 2 outputs, 2 overlaps per loop */ |
155 | do { |
156 | w0 = *wndPrev++; |
157 | w1 = *wndPrev++; |
158 | in = *buf0++; |
159 | |
160 | f0 = MULSHIFT32(w0, in); |
161 | f1 = MULSHIFT32(w1, in); |
162 | |
163 | in = *over0; |
164 | *out0++ = in - f0; |
165 | |
166 | in = *over1; |
167 | *out1-- = in + f1; |
168 | |
169 | in = *buf1--; |
170 | |
171 | *over1-- = 0; /* Wn = 0 for n = (2047, 2046, ... 1600) */ |
172 | *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */ |
173 | } while (--i); |
174 | |
175 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
176 | |
177 | /* do 64 more loops - 2 outputs, 2 overlaps per loop */ |
178 | do { |
179 | w0 = *wndPrev++; |
180 | w1 = *wndPrev++; |
181 | in = *buf0++; |
182 | |
183 | f0 = MULSHIFT32(w0, in); |
184 | f1 = MULSHIFT32(w1, in); |
185 | |
186 | in = *over0; |
187 | *out0++ = in - f0; |
188 | |
189 | in = *over1; |
190 | *out1-- = in + f1; |
191 | |
192 | w0 = *wndCurr++; /* W[0], W[1], ... --> W[255], W[254], ... */ |
193 | w1 = *wndCurr++; /* W[127], W[126], ... --> W[128], W[129], ... */ |
194 | in = *buf1--; |
195 | |
196 | *over1-- = MULSHIFT32(w0, in); /* Wn = short window for n = (1599, 1598, ... , 1536) */ |
197 | *over0++ = MULSHIFT32(w1, in); /* Wn = short window for n = (1472, 1473, ... , 1535) */ |
198 | } while (over0 < over1); |
199 | } |
200 | |
201 | /************************************************************************************** |
202 | * Function: DecWindowOverlapLongStop |
203 | * |
204 | * Description: apply synthesis window, do overlap-add, without clipping |
205 | * for winSequence LONG-STOP |
206 | * |
207 | * Inputs: input buffer (output of type-IV DCT) |
208 | * overlap buffer (saved from last time) |
209 | * window type (sin or KBD) for input buffer |
210 | * window type (sin or KBD) for overlap buffer |
211 | * |
212 | * Outputs: one channel, one frame of 32-bit PCM, non-interleaved |
213 | * |
214 | * Return: none |
215 | * |
216 | * Notes: use this function when the decoded PCM is going to the SBR decoder |
217 | **************************************************************************************/ |
218 | void DecWindowOverlapLongStopNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev) |
219 | { |
220 | int i, in, w0, w1, f0, f1; |
221 | int *buf1, *over1, *out1; |
222 | const int *wndPrev, *wndCurr; |
223 | |
224 | buf0 += (1024 >> 1); |
225 | buf1 = buf0 - 1; |
226 | out1 = out0 + 1024 - 1; |
227 | over1 = over0 + 1024 - 1; |
228 | |
229 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
230 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]); |
231 | |
232 | i = 448; /* 2 outputs, 2 overlaps per loop */ |
233 | do { |
234 | /* Wn = 0 for n = (0, 1, ... 447) */ |
235 | /* Wn = 1 for n = (576, 577, ... 1023) */ |
236 | in = *buf0++; |
237 | f1 = in >> 1; /* scale since skipping multiply by Q31 */ |
238 | |
239 | in = *over0; |
240 | *out0++ = in; |
241 | |
242 | in = *over1; |
243 | *out1-- = in + f1; |
244 | |
245 | w0 = *wndCurr++; |
246 | w1 = *wndCurr++; |
247 | in = *buf1--; |
248 | |
249 | *over1-- = MULSHIFT32(w0, in); |
250 | *over0++ = MULSHIFT32(w1, in); |
251 | } while (--i); |
252 | |
253 | /* do 64 more loops - 2 outputs, 2 overlaps per loop */ |
254 | do { |
255 | w0 = *wndPrev++; /* W[0], W[1], ...W[63] */ |
256 | w1 = *wndPrev++; /* W[127], W[126], ... W[64] */ |
257 | in = *buf0++; |
258 | |
259 | f0 = MULSHIFT32(w0, in); |
260 | f1 = MULSHIFT32(w1, in); |
261 | |
262 | in = *over0; |
263 | *out0++ = in - f0; |
264 | |
265 | in = *over1; |
266 | *out1-- = in + f1; |
267 | |
268 | w0 = *wndCurr++; |
269 | w1 = *wndCurr++; |
270 | in = *buf1--; |
271 | |
272 | *over1-- = MULSHIFT32(w0, in); |
273 | *over0++ = MULSHIFT32(w1, in); |
274 | } while (over0 < over1); |
275 | } |
276 | |
277 | /************************************************************************************** |
278 | * Function: DecWindowOverlapShort |
279 | * |
280 | * Description: apply synthesis window, do overlap-add, without clipping |
281 | * for winSequence EIGHT-SHORT (does all 8 short blocks) |
282 | * |
283 | * Inputs: input buffer (output of type-IV DCT) |
284 | * overlap buffer (saved from last time) |
285 | * window type (sin or KBD) for input buffer |
286 | * window type (sin or KBD) for overlap buffer |
287 | * |
288 | * Outputs: one channel, one frame of 32-bit PCM, non-interleaved |
289 | * |
290 | * Return: none |
291 | * |
292 | * Notes: use this function when the decoded PCM is going to the SBR decoder |
293 | **************************************************************************************/ |
294 | void DecWindowOverlapShortNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev) |
295 | { |
296 | int i, in, w0, w1, f0, f1; |
297 | int *buf1, *over1, *out1; |
298 | const int *wndPrev, *wndCurr; |
299 | |
300 | wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
301 | wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]); |
302 | |
303 | /* pcm[0-447] = 0 + overlap[0-447] */ |
304 | i = 448; |
305 | do { |
306 | f0 = *over0++; |
307 | f1 = *over0++; |
308 | *out0++ = f0; |
309 | *out0++ = f1; |
310 | i -= 2; |
311 | } while (i); |
312 | |
313 | /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */ |
314 | out1 = out0 + (128 - 1); |
315 | over1 = over0 + 128 - 1; |
316 | buf0 += 64; |
317 | buf1 = buf0 - 1; |
318 | do { |
319 | w0 = *wndPrev++; /* W[0], W[1], ...W[63] */ |
320 | w1 = *wndPrev++; /* W[127], W[126], ... W[64] */ |
321 | in = *buf0++; |
322 | |
323 | f0 = MULSHIFT32(w0, in); |
324 | f1 = MULSHIFT32(w1, in); |
325 | |
326 | in = *over0; |
327 | *out0++ = in - f0; |
328 | |
329 | in = *over1; |
330 | *out1-- = in + f1; |
331 | |
332 | w0 = *wndCurr++; |
333 | w1 = *wndCurr++; |
334 | in = *buf1--; |
335 | |
336 | /* save over0/over1 for next short block, in the slots just vacated */ |
337 | *over1-- = MULSHIFT32(w0, in); |
338 | *over0++ = MULSHIFT32(w1, in); |
339 | } while (over0 < over1); |
340 | |
341 | /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703] |
342 | * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831] |
343 | * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959] |
344 | */ |
345 | for (i = 0; i < 3; i++) { |
346 | out0 += 64; |
347 | out1 = out0 + 128 - 1; |
348 | over0 += 64; |
349 | over1 = over0 + 128 - 1; |
350 | buf0 += 64; |
351 | buf1 = buf0 - 1; |
352 | wndCurr -= 128; |
353 | |
354 | do { |
355 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
356 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
357 | in = *buf0++; |
358 | |
359 | f0 = MULSHIFT32(w0, in); |
360 | f1 = MULSHIFT32(w1, in); |
361 | |
362 | in = *(over0 - 128); /* from last short block */ |
363 | in += *(over0 + 0); /* from last full frame */ |
364 | *out0++ = in - f0; |
365 | |
366 | in = *(over1 - 128); /* from last short block */ |
367 | in += *(over1 + 0); /* from last full frame */ |
368 | *out1-- = in + f1; |
369 | |
370 | /* save over0/over1 for next short block, in the slots just vacated */ |
371 | in = *buf1--; |
372 | *over1-- = MULSHIFT32(w0, in); |
373 | *over0++ = MULSHIFT32(w1, in); |
374 | } while (over0 < over1); |
375 | } |
376 | |
377 | /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023] |
378 | * over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127] |
379 | */ |
380 | out0 += 64; |
381 | over0 -= 832; /* points at overlap[64] */ |
382 | over1 = over0 + 128 - 1; /* points at overlap[191] */ |
383 | buf0 += 64; |
384 | buf1 = buf0 - 1; |
385 | wndCurr -= 128; |
386 | do { |
387 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
388 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
389 | in = *buf0++; |
390 | |
391 | f0 = MULSHIFT32(w0, in); |
392 | f1 = MULSHIFT32(w1, in); |
393 | |
394 | in = *(over0 + 768); /* from last short block */ |
395 | in += *(over0 + 896); /* from last full frame */ |
396 | *out0++ = in - f0; |
397 | |
398 | in = *(over1 + 768); /* from last short block */ |
399 | *(over1 - 128) = in + f1; |
400 | |
401 | in = *buf1--; |
402 | *over1-- = MULSHIFT32(w0, in); /* save in overlap[128-191] */ |
403 | *over0++ = MULSHIFT32(w1, in); /* save in overlap[64-127] */ |
404 | } while (over0 < over1); |
405 | |
406 | /* over0 now points at overlap[128] */ |
407 | |
408 | /* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127] |
409 | * over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127] |
410 | * over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127] |
411 | * over[448-576] = Wc[128-255] * block7[128-255] |
412 | */ |
413 | for (i = 0; i < 3; i++) { |
414 | over0 += 64; |
415 | over1 = over0 + 128 - 1; |
416 | buf0 += 64; |
417 | buf1 = buf0 - 1; |
418 | wndCurr -= 128; |
419 | do { |
420 | w0 = *wndCurr++; /* W[0], W[1], ...W[63] */ |
421 | w1 = *wndCurr++; /* W[127], W[126], ... W[64] */ |
422 | in = *buf0++; |
423 | |
424 | f0 = MULSHIFT32(w0, in); |
425 | f1 = MULSHIFT32(w1, in); |
426 | |
427 | /* from last short block */ |
428 | *(over0 - 128) -= f0; |
429 | *(over1 - 128) += f1; |
430 | |
431 | in = *buf1--; |
432 | *over1-- = MULSHIFT32(w0, in); |
433 | *over0++ = MULSHIFT32(w1, in); |
434 | } while (over0 < over1); |
435 | } |
436 | |
437 | /* over[576-1024] = 0 */ |
438 | i = 448; |
439 | over0 += 64; |
440 | do { |
441 | *over0++ = 0; |
442 | *over0++ = 0; |
443 | *over0++ = 0; |
444 | *over0++ = 0; |
445 | i -= 4; |
446 | } while (i); |
447 | } |
448 |