summaryrefslogtreecommitdiff
path: root/audio_codec/libfaad/helixaac/assembly.h (plain)
blob: a0f8095d4e8325f231868bc251b971e2bb557728
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: assembly.h,v 1.9 2007/02/28 07:10:21 gahluwalia Exp $
3 *
4 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file,
7 * are subject to the current version of the RealNetworks Public
8 * Source License (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the current version of the RealNetworks Community
11 * Source License (the "RCSL") available at
12 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13 * will apply. You may also obtain the license terms directly from
14 * RealNetworks. You may not use this file except in compliance with
15 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16 * to this file, the RCSL. Please see the applicable RPSL or RCSL for
17 * the rights, obligations and limitations governing use of the
18 * contents of the file.
19 *
20 * This file is part of the Helix DNA Technology. RealNetworks is the
21 * developer of the Original Code and owns the copyrights in the
22 * portions it created.
23 *
24 * This file, and the files included with this file, is distributed
25 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29 * ENJOYMENT OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * http://www.helixcommunity.org/content/tck
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point HE-AAC decoder
40 * Jon Recker (jrecker@real.com)
41 * February 2005
42 *
43 * assembly.h - inline assembly language functions and prototypes
44 *
45 * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y),
46 * returns top 32-bits of 64-bit result
47 * CLIPTOSHORT(x) convert 32-bit integer to 16-bit short,
48 * clipping to [-32768, 32767]
49 * FASTABS(x) branchless absolute value of signed integer x
50 * CLZ(x) count leading zeros on signed integer x
51 * MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y)
52 **************************************************************************************/
53
54#ifndef _ASSEMBLY_H
55#define _ASSEMBLY_H
56
57/* toolchain: MSFT Visual C++
58 * target architecture: x86
59 */
60#if (defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))
61
62#pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
63
64static __inline int MULSHIFT32(int x, int y)
65{
66 __asm {
67 mov eax, x
68 imul y
69 mov eax, edx
70 }
71}
72
73static __inline short CLIPTOSHORT(int x)
74{
75 int sign;
76
77 /* clip to [-32768, 32767] */
78 sign = x >> 31;
79 if (sign != (x >> 15)) {
80 x = sign ^((1 << 15) - 1);
81 }
82
83 return (short)x;
84}
85
86static __inline int FASTABS(int x)
87{
88 int sign;
89
90 sign = x >> (sizeof(int) * 8 - 1);
91 x ^= sign;
92 x -= sign;
93
94 return x;
95}
96
97static __inline int CLZ(int x)
98{
99 int numZeros;
100
101 if (!x) {
102 return 32;
103 }
104
105 /* count leading zeros with binary search */
106 numZeros = 1;
107 if (!((unsigned int)x >> 16)) {
108 numZeros += 16;
109 x <<= 16;
110 }
111 if (!((unsigned int)x >> 24)) {
112 numZeros += 8;
113 x <<= 8;
114 }
115 if (!((unsigned int)x >> 28)) {
116 numZeros += 4;
117 x <<= 4;
118 }
119 if (!((unsigned int)x >> 30)) {
120 numZeros += 2;
121 x <<= 2;
122 }
123
124 numZeros -= ((unsigned int)x >> 31);
125
126 return numZeros;
127}
128
129#ifdef __CW32__
130typedef long long Word64;
131#else
132typedef __int64 Word64;
133#endif
134
135typedef union _U64 {
136 Word64 w64;
137 struct {
138 /* x86 = little endian */
139 unsigned int lo32;
140 signed int hi32;
141 } r;
142} U64;
143
144/* returns 64-bit value in [edx:eax] */
145static __inline Word64 MADD64(Word64 sum64, int x, int y)
146{
147#if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
148 /* Workaround for the Symbian emulator because of non existing longlong.lib and
149 * hence __allmul not defined. */
150 __asm {
151 mov eax, x
152 imul y
153 add dword ptr sum64, eax
154 adc dword ptr sum64 + 4, edx
155 }
156#else
157 sum64 += (Word64)x * (Word64)y;
158#endif
159
160 return sum64;
161}
162
163/* toolchain: MSFT Embedded Visual C++
164 * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
165 */
166#elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
167
168static __inline short CLIPTOSHORT(int x)
169{
170 int sign;
171
172 /* clip to [-32768, 32767] */
173 sign = x >> 31;
174 if (sign != (x >> 15)) {
175 x = sign ^((1 << 15) - 1);
176 }
177
178 return (short)x;
179}
180
181static __inline int FASTABS(int x)
182{
183 int sign;
184
185 sign = x >> (sizeof(int) * 8 - 1);
186 x ^= sign;
187 x -= sign;
188
189 return x;
190}
191
192static __inline int CLZ(int x)
193{
194 int numZeros;
195
196 if (!x) {
197 return 32;
198 }
199
200 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
201 numZeros = 1;
202 if (!((unsigned int)x >> 16)) {
203 numZeros += 16;
204 x <<= 16;
205 }
206 if (!((unsigned int)x >> 24)) {
207 numZeros += 8;
208 x <<= 8;
209 }
210 if (!((unsigned int)x >> 28)) {
211 numZeros += 4;
212 x <<= 4;
213 }
214 if (!((unsigned int)x >> 30)) {
215 numZeros += 2;
216 x <<= 2;
217 }
218
219 numZeros -= ((unsigned int)x >> 31);
220
221 return numZeros;
222}
223
224/* implemented in asmfunc.s */
225#ifdef __cplusplus
226extern "C" {
227#endif
228
229 typedef __int64 Word64;
230
231 typedef union _U64 {
232 Word64 w64;
233 struct {
234 /* ARM WinCE = little endian */
235 unsigned int lo32;
236 signed int hi32;
237 } r;
238 } U64;
239
240 /* manual name mangling for just this platform (must match labels in .s file) */
241#define MULSHIFT32 raac_MULSHIFT32
242#define MADD64 raac_MADD64
243
244 int MULSHIFT32(int x, int y);
245 Word64 MADD64(Word64 sum64, int x, int y);
246
247#ifdef __cplusplus
248}
249#endif
250
251/* toolchain: ARM ADS or RealView
252 * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
253 */
254#elif (defined (__arm) && defined (__ARMCC_VERSION)) || (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
255
256static __inline int MULSHIFT32(int x, int y)
257{
258 /* rules for smull RdLo, RdHi, Rm, Rs:
259 * RdHi != Rm
260 * RdLo != Rm
261 * RdHi != RdLo
262 */
263 int zlow;
264 __asm {
265 smull zlow, y, x, y
266 }
267
268 return y;
269}
270
271static __inline short CLIPTOSHORT(int x)
272{
273 int sign;
274
275 /* clip to [-32768, 32767] */
276 sign = x >> 31;
277 if (sign != (x >> 15)) {
278 x = sign ^((1 << 15) - 1);
279 }
280
281 return (short)x;
282}
283
284static __inline int FASTABS(int x)
285{
286 int sign;
287
288 sign = x >> (sizeof(int) * 8 - 1);
289 x ^= sign;
290 x -= sign;
291
292 return x;
293}
294
295static __inline int CLZ(int x)
296{
297 int numZeros;
298
299 if (!x) {
300 return 32;
301 }
302
303 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
304 numZeros = 1;
305 if (!((unsigned int)x >> 16)) {
306 numZeros += 16;
307 x <<= 16;
308 }
309 if (!((unsigned int)x >> 24)) {
310 numZeros += 8;
311 x <<= 8;
312 }
313 if (!((unsigned int)x >> 28)) {
314 numZeros += 4;
315 x <<= 4;
316 }
317 if (!((unsigned int)x >> 30)) {
318 numZeros += 2;
319 x <<= 2;
320 }
321
322 numZeros -= ((unsigned int)x >> 31);
323
324 return numZeros;
325
326 /* ARM code would look like this, but do NOT use inline asm in ADS for this,
327 because you can't safely use the status register flags intermixed with C code
328
329 __asm {
330 mov numZeros, #1
331 tst x, 0xffff0000
332 addeq numZeros, numZeros, #16
333 moveq x, x, lsl #16
334 tst x, 0xff000000
335 addeq numZeros, numZeros, #8
336 moveq x, x, lsl #8
337 tst x, 0xf0000000
338 addeq numZeros, numZeros, #4
339 moveq x, x, lsl #4
340 tst x, 0xc0000000
341 addeq numZeros, numZeros, #2
342 moveq x, x, lsl #2
343 sub numZeros, numZeros, x, lsr #31
344 }
345 */
346 /* reference:
347 numZeros = 0;
348 while (!(x & 0x80000000)) {
349 numZeros++;
350 x <<= 1;
351 }
352 */
353}
354
355typedef __int64 Word64;
356
357typedef union _U64 {
358 Word64 w64;
359 struct {
360 /* ARM ADS = little endian */
361 unsigned int lo32;
362 signed int hi32;
363 } r;
364} U64;
365
366static __inline Word64 MADD64(Word64 sum64, int x, int y)
367{
368 U64 u;
369 u.w64 = sum64;
370
371 __asm {
372 smlal u.r.lo32, u.r.hi32, x, y
373 }
374
375 return u.w64;
376}
377
378/* toolchain: ARM gcc
379 * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
380 */
381#elif defined(__GNUC__) && defined(__arm__)
382
383static __inline__ int MULSHIFT32(int x, int y)
384{
385 int zlow;
386 __asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
387 return y;
388}
389
390static __inline short CLIPTOSHORT(int x)
391{
392 int sign;
393
394 /* clip to [-32768, 32767] */
395 sign = x >> 31;
396 if (sign != (x >> 15)) {
397 x = sign ^((1 << 15) - 1);
398 }
399
400 return (short)x;
401}
402
403static __inline int FASTABS(int x)
404{
405 int sign;
406
407 sign = x >> (sizeof(int) * 8 - 1);
408 x ^= sign;
409 x -= sign;
410
411 return x;
412}
413
414static __inline int CLZ(int x)
415{
416 int numZeros;
417
418 if (!x) {
419 return (sizeof(int) * 8);
420 }
421
422 numZeros = 0;
423 while (!(x & 0x80000000)) {
424 numZeros++;
425 x <<= 1;
426 }
427
428 return numZeros;
429}
430
431typedef long long Word64;
432
433typedef union _U64 {
434 Word64 w64;
435 struct {
436 /* ARM ADS = little endian */
437 unsigned int lo32;
438 signed int hi32;
439 } r;
440} U64;
441
442static __inline Word64 MADD64(Word64 sum64, int x, int y)
443{
444 U64 u;
445 u.w64 = sum64;
446
447 __asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
448
449 return u.w64;
450}
451
452/* toolchain: x86 gcc
453 * target architecture: x86
454 */
455#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
456
457typedef long long Word64;
458
459static __inline__ int MULSHIFT32(int x, int y)
460{
461 int z;
462
463 z = (Word64)x * (Word64)y >> 32;
464
465 return z;
466}
467
468static __inline short CLIPTOSHORT(int x)
469{
470 int sign;
471
472 /* clip to [-32768, 32767] */
473 sign = x >> 31;
474 if (sign != (x >> 15)) {
475 x = sign ^((1 << 15) - 1);
476 }
477
478 return (short)x;
479}
480
481static __inline int FASTABS(int x)
482{
483 int sign;
484
485 sign = x >> (sizeof(int) * 8 - 1);
486 x ^= sign;
487 x -= sign;
488
489 return x;
490}
491
492static __inline int CLZ(int x)
493{
494 int numZeros;
495
496 if (!x) {
497 return 32;
498 }
499
500 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
501 numZeros = 1;
502 if (!((unsigned int)x >> 16)) {
503 numZeros += 16;
504 x <<= 16;
505 }
506 if (!((unsigned int)x >> 24)) {
507 numZeros += 8;
508 x <<= 8;
509 }
510 if (!((unsigned int)x >> 28)) {
511 numZeros += 4;
512 x <<= 4;
513 }
514 if (!((unsigned int)x >> 30)) {
515 numZeros += 2;
516 x <<= 2;
517 }
518
519 numZeros -= ((unsigned int)x >> 31);
520
521 return numZeros;
522}
523
524typedef union _U64 {
525 Word64 w64;
526 struct {
527 /* x86 = little endian */
528 unsigned int lo32;
529 signed int hi32;
530 } r;
531} U64;
532
533static __inline Word64 MADD64(Word64 sum64, int x, int y)
534{
535 sum64 += (Word64)x * (Word64)y;
536
537 return sum64;
538}
539
540#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
541
542typedef long long Word64;
543
544static __inline__ int MULSHIFT32(int x, int y)
545{
546 int z;
547
548 z = (Word64)x * (Word64)y >> 32;
549
550 return z;
551}
552
553static __inline short CLIPTOSHORT(int x)
554{
555 int sign;
556
557 /* clip to [-32768, 32767] */
558 sign = x >> 31;
559 if (sign != (x >> 15)) {
560 x = sign ^((1 << 15) - 1);
561 }
562
563 return (short)x;
564}
565
566static __inline int FASTABS(int x)
567{
568 int sign;
569
570 sign = x >> (sizeof(int) * 8 - 1);
571 x ^= sign;
572 x -= sign;
573
574 return x;
575}
576
577static __inline int CLZ(int x)
578{
579 int numZeros;
580
581 if (!x) {
582 return 32;
583 }
584
585 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
586 numZeros = 1;
587 if (!((unsigned int)x >> 16)) {
588 numZeros += 16;
589 x <<= 16;
590 }
591 if (!((unsigned int)x >> 24)) {
592 numZeros += 8;
593 x <<= 8;
594 }
595 if (!((unsigned int)x >> 28)) {
596 numZeros += 4;
597 x <<= 4;
598 }
599 if (!((unsigned int)x >> 30)) {
600 numZeros += 2;
601 x <<= 2;
602 }
603
604 numZeros -= ((unsigned int)x >> 31);
605
606 return numZeros;
607}
608
609typedef union _U64 {
610 Word64 w64;
611 struct {
612 /* PowerPC = big endian */
613 signed int hi32;
614 unsigned int lo32;
615 } r;
616} U64;
617
618static __inline Word64 MADD64(Word64 sum64, int x, int y)
619{
620 sum64 += (Word64)x * (Word64)y;
621
622 return sum64;
623}
624
625#else
626
627#error Unsupported platform in assembly.h
628
629#endif /* platforms */
630
631#endif /* _ASSEMBLY_H */
632