summaryrefslogtreecommitdiff
path: root/audio_codec/libfaad/helixaac/assembly_mw.h (plain)
blob: 335f4b0567e2c367e7fd35638f2b02c18b7bafe6
1/* ***** BEGIN LICENSE BLOCK *****
2 * Source last modified: $Id: assembly.h,v 1.7 2005/11/10 00:04:40 margotm Exp $
3 *
4 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file,
7 * are subject to the current version of the RealNetworks Public
8 * Source License (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the current version of the RealNetworks Community
11 * Source License (the "RCSL") available at
12 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13 * will apply. You may also obtain the license terms directly from
14 * RealNetworks. You may not use this file except in compliance with
15 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16 * to this file, the RCSL. Please see the applicable RPSL or RCSL for
17 * the rights, obligations and limitations governing use of the
18 * contents of the file.
19 *
20 * This file is part of the Helix DNA Technology. RealNetworks is the
21 * developer of the Original Code and owns the copyrights in the
22 * portions it created.
23 *
24 * This file, and the files included with this file, is distributed
25 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29 * ENJOYMENT OR NON-INFRINGEMENT.
30 *
31 * Technology Compatibility Kit Test Suite(s) Location:
32 * http://www.helixcommunity.org/content/tck
33 *
34 * Contributor(s):
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38/**************************************************************************************
39 * Fixed-point HE-AAC decoder
40 * Jon Recker (jrecker@real.com)
41 * February 2005
42 *
43 * assembly.h - inline assembly language functions and prototypes
44 *
45 * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y),
46 * returns top 32-bits of 64-bit result
47 * CLIPTOSHORT(x) convert 32-bit integer to 16-bit short,
48 * clipping to [-32768, 32767]
49 * FASTABS(x) branchless absolute value of signed integer x
50 * CLZ(x) count leading zeros on signed integer x
51 * MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y)
52 **************************************************************************************/
53
54#ifndef _ASSEMBLY_H
55#define _ASSEMBLY_H
56
57//#define _Inline inline
58#define _ARC32
59/* toolchain: MSFT Visual C++
60 * target architecture: x86
61 */
62#if (defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))
63
64#pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
65
66static __inline int MULSHIFT32(int x, int y)
67{
68 __asm {
69 mov eax, x
70 imul y
71 mov eax, edx
72 }
73}
74
75static __inline short CLIPTOSHORT(int x)
76{
77 int sign;
78
79 /* clip to [-32768, 32767] */
80 sign = x >> 31;
81 if (sign != (x >> 15)) {
82 x = sign ^((1 << 15) - 1);
83 }
84
85 return (short)x;
86}
87
88static __inline int FASTABS(int x)
89{
90 int sign;
91
92 sign = x >> (sizeof(int) * 8 - 1);
93 x ^= sign;
94 x -= sign;
95
96 return x;
97}
98
99static __inline int CLZ(int x)
100{
101 int numZeros;
102
103 if (!x) {
104 return 32;
105 }
106
107 /* count leading zeros with binary search */
108 numZeros = 1;
109 if (!((unsigned int)x >> 16)) {
110 numZeros += 16;
111 x <<= 16;
112 }
113 if (!((unsigned int)x >> 24)) {
114 numZeros += 8;
115 x <<= 8;
116 }
117 if (!((unsigned int)x >> 28)) {
118 numZeros += 4;
119 x <<= 4;
120 }
121 if (!((unsigned int)x >> 30)) {
122 numZeros += 2;
123 x <<= 2;
124 }
125
126 numZeros -= ((unsigned int)x >> 31);
127
128 return numZeros;
129}
130
131#ifdef __CW32__
132typedef long long Word64;
133#else
134typedef __int64 Word64;
135#endif
136
137typedef union _U64 {
138 Word64 w64;
139 struct {
140 /* x86 = little endian */
141 unsigned int lo32;
142 signed int hi32;
143 } r;
144} U64;
145
146/* returns 64-bit value in [edx:eax] */
147static __inline Word64 madd64(Word64 sum64, int x, int y)
148{
149#if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
150 /* Workaround for the Symbian emulator because of non existing longlong.lib and
151 * hence __allmul not defined. */
152 __asm {
153 mov eax, x
154 imul y
155 add dword ptr sum64, eax
156 adc dword ptr sum64 + 4, edx
157 }
158#else
159 sum64 += (Word64)x * (Word64)y;
160
161 /* equivalent to return (sum + ((__int64)x * y)); */
162#endif
163}
164
165#define SET_ZERO(x) x=0
166#define MADD64(sum64, x, y) sum64=madd64(sum64, x, y)
167#define ADD64(x64, y64) x64 += y64;
168
169/* toolchain: MSFT Embedded Visual C++
170 * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
171 */
172#elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
173
174static __inline short CLIPTOSHORT(int x)
175{
176 int sign;
177
178 /* clip to [-32768, 32767] */
179 sign = x >> 31;
180 if (sign != (x >> 15)) {
181 x = sign ^((1 << 15) - 1);
182 }
183
184 return (short)x;
185}
186
187static __inline int FASTABS(int x)
188{
189 int sign;
190
191 sign = x >> (sizeof(int) * 8 - 1);
192 x ^= sign;
193 x -= sign;
194
195 return x;
196}
197
198static __inline int CLZ(int x)
199{
200 int numZeros;
201
202 if (!x) {
203 return 32;
204 }
205
206 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
207 numZeros = 1;
208 if (!((unsigned int)x >> 16)) {
209 numZeros += 16;
210 x <<= 16;
211 }
212 if (!((unsigned int)x >> 24)) {
213 numZeros += 8;
214 x <<= 8;
215 }
216 if (!((unsigned int)x >> 28)) {
217 numZeros += 4;
218 x <<= 4;
219 }
220 if (!((unsigned int)x >> 30)) {
221 numZeros += 2;
222 x <<= 2;
223 }
224
225 numZeros -= ((unsigned int)x >> 31);
226
227 return numZeros;
228}
229
230/* implemented in asmfunc.s */
231#ifdef __cplusplus
232extern "C" {
233#endif
234
235 typedef __int64 Word64;
236
237 typedef union _U64 {
238 Word64 w64;
239 struct {
240 /* ARM WinCE = little endian */
241 unsigned int lo32;
242 signed int hi32;
243 } r;
244 } U64;
245
246 /* manual name mangling for just this platform (must match labels in .s file) */
247#define MULSHIFT32 raac_MULSHIFT32
248#define MADD64 raac_MADD64
249
250 int MULSHIFT32(int x, int y);
251 Word64 MADD64(Word64 sum64, int x, int y);
252
253#ifdef __cplusplus
254}
255#endif
256
257/* toolchain: ARM ADS or RealView
258 * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
259 */
260#elif (defined (__arm) && defined (__ARMCC_VERSION)) || (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
261
262static __inline int MULSHIFT32(int x, int y)
263{
264 /* rules for smull RdLo, RdHi, Rm, Rs:
265 * RdHi != Rm
266 * RdLo != Rm
267 * RdHi != RdLo
268 */
269 int zlow;
270 __asm {
271 smull zlow, y, x, y
272 }
273
274 return y;
275}
276
277static __inline short CLIPTOSHORT(int x)
278{
279 int sign;
280
281 /* clip to [-32768, 32767] */
282 sign = x >> 31;
283 if (sign != (x >> 15)) {
284 x = sign ^((1 << 15) - 1);
285 }
286
287 return (short)x;
288}
289
290static __inline int FASTABS(int x)
291{
292 int sign;
293
294 sign = x >> (sizeof(int) * 8 - 1);
295 x ^= sign;
296 x -= sign;
297
298 return x;
299}
300
301static __inline int CLZ(int x)
302{
303 int numZeros;
304
305 if (!x) {
306 return 32;
307 }
308
309 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
310 numZeros = 1;
311 if (!((unsigned int)x >> 16)) {
312 numZeros += 16;
313 x <<= 16;
314 }
315 if (!((unsigned int)x >> 24)) {
316 numZeros += 8;
317 x <<= 8;
318 }
319 if (!((unsigned int)x >> 28)) {
320 numZeros += 4;
321 x <<= 4;
322 }
323 if (!((unsigned int)x >> 30)) {
324 numZeros += 2;
325 x <<= 2;
326 }
327
328 numZeros -= ((unsigned int)x >> 31);
329
330 return numZeros;
331
332 /* ARM code would look like this, but do NOT use inline asm in ADS for this,
333 because you can't safely use the status register flags intermixed with C code
334
335 __asm {
336 mov numZeros, #1
337 tst x, 0xffff0000
338 addeq numZeros, numZeros, #16
339 moveq x, x, lsl #16
340 tst x, 0xff000000
341 addeq numZeros, numZeros, #8
342 moveq x, x, lsl #8
343 tst x, 0xf0000000
344 addeq numZeros, numZeros, #4
345 moveq x, x, lsl #4
346 tst x, 0xc0000000
347 addeq numZeros, numZeros, #2
348 moveq x, x, lsl #2
349 sub numZeros, numZeros, x, lsr #31
350 }
351 */
352 /* reference:
353 numZeros = 0;
354 while (!(x & 0x80000000)) {
355 numZeros++;
356 x <<= 1;
357 }
358 */
359}
360
361typedef __int64 Word64;
362
363typedef union _U64 {
364 Word64 w64;
365 struct {
366 /* ARM ADS = little endian */
367 unsigned int lo32;
368 signed int hi32;
369 } r;
370} U64;
371
372static __inline Word64 MADD64(Word64 sum64, int x, int y)
373{
374 U64 u;
375 u.w64 = sum64;
376
377 __asm {
378 smlal u.r.lo32, u.r.hi32, x, y
379 }
380
381 return u.w64;
382}
383
384/* toolchain: ARM gcc
385 * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
386 */
387#elif defined(__GNUC__) && defined(__arm__)
388
389static __inline__ int MULSHIFT32(int x, int y)
390{
391 int zlow;
392 __asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
393 return y;
394}
395
396static __inline short CLIPTOSHORT(int x)
397{
398 int sign;
399
400 /* clip to [-32768, 32767] */
401 sign = x >> 31;
402 if (sign != (x >> 15)) {
403 x = sign ^((1 << 15) - 1);
404 }
405
406 return (short)x;
407}
408
409static __inline int FASTABS(int x)
410{
411 int sign;
412
413 sign = x >> (sizeof(int) * 8 - 1);
414 x ^= sign;
415 x -= sign;
416
417 return x;
418}
419
420static __inline int CLZ(int x)
421{
422 int numZeros;
423
424 if (!x) {
425 return (sizeof(int) * 8);
426 }
427
428 numZeros = 0;
429 while (!(x & 0x80000000)) {
430 numZeros++;
431 x <<= 1;
432 }
433
434 return numZeros;
435}
436
437typedef long long Word64;
438
439typedef union _U64 {
440 Word64 w64;
441 struct {
442 /* ARM ADS = little endian */
443 unsigned int lo32;
444 signed int hi32;
445 } r;
446} U64;
447
448static __inline Word64 MADD64(Word64 sum64, int x, int y)
449{
450 U64 u;
451 u.w64 = sum64;
452
453 __asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
454
455 return u.w64;
456}
457
458/* toolchain: x86 gcc
459 * target architecture: x86
460 */
461#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
462
463typedef long long Word64;
464
465static __inline__ int MULSHIFT32(int x, int y)
466{
467 int z;
468
469 z = (Word64)x * (Word64)y >> 32;
470
471 return z;
472}
473
474static __inline short CLIPTOSHORT(int x)
475{
476 int sign;
477
478 /* clip to [-32768, 32767] */
479 sign = x >> 31;
480 if (sign != (x >> 15)) {
481 x = sign ^((1 << 15) - 1);
482 }
483
484 return (short)x;
485}
486
487static __inline int FASTABS(int x)
488{
489 int sign;
490
491 sign = x >> (sizeof(int) * 8 - 1);
492 x ^= sign;
493 x -= sign;
494
495 return x;
496}
497
498static __inline int CLZ(int x)
499{
500 int numZeros;
501
502 if (!x) {
503 return 32;
504 }
505
506 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
507 numZeros = 1;
508 if (!((unsigned int)x >> 16)) {
509 numZeros += 16;
510 x <<= 16;
511 }
512 if (!((unsigned int)x >> 24)) {
513 numZeros += 8;
514 x <<= 8;
515 }
516 if (!((unsigned int)x >> 28)) {
517 numZeros += 4;
518 x <<= 4;
519 }
520 if (!((unsigned int)x >> 30)) {
521 numZeros += 2;
522 x <<= 2;
523 }
524
525 numZeros -= ((unsigned int)x >> 31);
526
527 return numZeros;
528}
529
530typedef union _U64 {
531 Word64 w64;
532 struct {
533 /* x86 = little endian */
534 unsigned int lo32;
535 signed int hi32;
536 } r;
537} U64;
538
539static __inline Word64 MADD64(Word64 sum64, int x, int y)
540{
541 sum64 += (Word64)x * (Word64)y;
542
543 return sum64;
544}
545
546#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
547
548typedef long long Word64;
549
550static __inline__ int MULSHIFT32(int x, int y)
551{
552 int z;
553
554 z = (Word64)x * (Word64)y >> 32;
555
556 return z;
557}
558
559static __inline short CLIPTOSHORT(int x)
560{
561 int sign;
562
563 /* clip to [-32768, 32767] */
564 sign = x >> 31;
565 if (sign != (x >> 15)) {
566 x = sign ^((1 << 15) - 1);
567 }
568
569 return (short)x;
570}
571
572static __inline int FASTABS(int x)
573{
574 int sign;
575
576 sign = x >> (sizeof(int) * 8 - 1);
577 x ^= sign;
578 x -= sign;
579
580 return x;
581}
582
583static __inline int CLZ(int x)
584{
585 int numZeros;
586
587 if (!x) {
588 return 32;
589 }
590
591 /* count leading zeros with binary search (function should be 17 ARM instructions total) */
592 numZeros = 1;
593 if (!((unsigned int)x >> 16)) {
594 numZeros += 16;
595 x <<= 16;
596 }
597 if (!((unsigned int)x >> 24)) {
598 numZeros += 8;
599 x <<= 8;
600 }
601 if (!((unsigned int)x >> 28)) {
602 numZeros += 4;
603 x <<= 4;
604 }
605 if (!((unsigned int)x >> 30)) {
606 numZeros += 2;
607 x <<= 2;
608 }
609
610 numZeros -= ((unsigned int)x >> 31);
611
612 return numZeros;
613}
614
615typedef union _U64 {
616 Word64 w64;
617 struct {
618 /* PowerPC = big endian */
619 signed int hi32;
620 unsigned int lo32;
621 } r;
622} U64;
623
624static __inline Word64 MADD64(Word64 sum64, int x, int y)
625{
626 sum64 += (Word64)x * (Word64)y;
627
628 return sum64;
629}
630
631#elif defined(_ARC32)
632
633_Asm _Inline int MULSHIFT32(int x, int y)
634{
635 % reg x, y
636 mullw 0, x, y
637 machlw % r0, x, y
638 % error
639}
640
641_Asm _Inline short CLIPTOSHORT(int x)
642{
643 % reg x
644 min % r0, x, 0x7fff
645 max % r0, % r0, -0x8000
646 % error
647}
648
649_Asm _Inline int FASTABS(int x)
650{
651 % reg x
652 abs % r0, x
653 % error
654}
655
656_Asm _Inline int CLZ(int x)
657{
658 /* assume x>0, if x<0 should return 0 */
659 % reg x;
660 norm % r0, x
661 add % r0, % r0, 1
662 % error
663}
664#endif
665typedef struct {
666 unsigned int lo32;
667 signed int hi32;
668} Word64;
669
670typedef union _U64 {
671 Word64 w64;
672 struct {
673 unsigned int lo32;
674 signed int hi32;
675 } r;
676} U64;
677
678_Asm _Inline unsigned add64_lo(unsigned int xlo, unsigned int ylo)
679{
680 % reg xlo, ylo;
681 add.f % r0, xlo, ylo
682 % error
683}
684
685_Asm _Inline int add64_hi(unsigned int xhi, unsigned int yhi)
686{
687 % reg xhi, yhi;
688 adc % r0, xhi, yhi
689 % error
690}
691
692_Asm _Inline unsigned madd64_lo(unsigned lo, int a, int b)
693{
694 % reg lo, a, b;
695 mpy % r0, a, b
696 add.f % r0, lo, % r0
697 % error
698}
699
700
701_Asm _Inline int madd64_hi(int hi, int a, int b)
702{
703 % reg hi, a, b;
704 mpyh % r0, a, b
705 adc % r0, hi, % r0
706 % error
707}
708
709
710
711_Asm _Inline void madd64(int a, int b)
712{
713 % reg a, b;
714 mulhlw 0, a, b
715 maclw 0, a, b
716 % error
717}
718
719_Asm _Inline int madd64hi(int hi)
720{
721 % reg hi
722 //mov %r0, %acc2
723 adc % r0, hi, % acc1
724 % error
725}
726
727_Asm _Inline int madd64lo(int lo)
728{
729 % reg lo
730 //mov %r0, %acc1
731 add.f % r0, lo, % acc2
732 % error
733}
734
735
736
737#define SET_ZERO(x) x.lo32 = x.hi32 = 0
738
739#define MADD64(w64, a, b) madd64(a, b); w64.lo32 = madd64lo(w64.lo32); w64.hi32 = madd64hi(w64.hi32);
740
741#define ADD64(x64, y64) x64.lo32 = add64_lo(x64.lo32,y64.lo32); x64.hi32 = add64_hi(x64.hi32,y64.hi32);
742
743#endif /* _ASSEMBLY_H */
744