platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: assembly.h,v 1.7 2005/11/10 00:04:40 margotm Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * assembly.h - inline assembly language functions and prototypes
44  *
45  * MULSHIFT32(x, y)         signed multiply of two 32-bit integers (x and y),
46  *                            returns top 32-bits of 64-bit result
47  * CLIPTOSHORT(x)           convert 32-bit integer to 16-bit short,
48  *                            clipping to [-32768, 32767]
49  * FASTABS(x)               branchless absolute value of signed integer x
50  * CLZ(x)                   count leading zeros on signed integer x
51  * MADD64(sum64, x, y)      64-bit multiply accumulate: sum64 += (x*y)
52  **************************************************************************************/
53
54 #ifndef _ASSEMBLY_H
55 #define _ASSEMBLY_H
56
57 //#define _Inline inline
58 #define _ARC32
59 /* toolchain:           MSFT Visual C++
60  * target architecture: x86
61  */
62 #if (defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))
63
64 #pragma warning( disable : 4035 )   /* complains about inline asm not returning a value */
65
66 static __inline int MULSHIFT32(int x, int y)
67 {
68     __asm {
69         mov     eax, x
70         imul    y
71         mov     eax, edx
72     }
73 }
74
75 static __inline short CLIPTOSHORT(int x)
76 {
77     int sign;
78
79     /* clip to [-32768, 32767] */
80     sign = x >> 31;
81     if (sign != (x >> 15)) {
82         x = sign ^((1 << 15) - 1);
83     }
84
85     return (short)x;
86 }
87
88 static __inline int FASTABS(int x)
89 {
90     int sign;
91
92     sign = x >> (sizeof(int) * 8 - 1);
93     x ^= sign;
94     x -= sign;
95
96     return x;
97 }
98
99 static __inline int CLZ(int x)
100 {
101     int numZeros;
102
103     if (!x) {
104         return 32;
105     }
106
107     /* count leading zeros with binary search */
108     numZeros = 1;
109     if (!((unsigned int)x >> 16))   {
110         numZeros += 16;
111         x <<= 16;
112     }
113     if (!((unsigned int)x >> 24))   {
114         numZeros +=  8;
115         x <<=  8;
116     }
117     if (!((unsigned int)x >> 28))   {
118         numZeros +=  4;
119         x <<=  4;
120     }
121     if (!((unsigned int)x >> 30))   {
122         numZeros +=  2;
123         x <<=  2;
124     }
125
126     numZeros -= ((unsigned int)x >> 31);
127
128     return numZeros;
129 }
130
131 #ifdef __CW32__
132 typedef long long Word64;
133 #else
134 typedef __int64 Word64;
135 #endif
136
137 typedef union _U64 {
138     Word64 w64;
139     struct {
140         /* x86 = little endian */
141         unsigned int lo32;
142         signed int   hi32;
143     } r;
144 } U64;
145
146 /* returns 64-bit value in [edx:eax] */
147 static __inline Word64 madd64(Word64 sum64, int x, int y)
148 {
149 #if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
150     /* Workaround for the Symbian emulator because of non existing longlong.lib and
151      * hence __allmul not defined. */
152     __asm {
153         mov     eax, x
154         imul    y
155         add     dword ptr sum64, eax
156         adc     dword ptr sum64 + 4, edx
157     }
158 #else
159     sum64 += (Word64)x * (Word64)y;
160
161     /* equivalent to return (sum + ((__int64)x * y)); */
162 #endif
163 }
164
165 #define SET_ZERO(x) x=0
166 #define MADD64(sum64, x, y) sum64=madd64(sum64, x, y)
167 #define ADD64(x64, y64) x64 += y64;
168
169 /* toolchain:           MSFT Embedded Visual C++
170  * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
171  */
172 #elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
173
174 static __inline short CLIPTOSHORT(int x)
175 {
176     int sign;
177
178     /* clip to [-32768, 32767] */
179     sign = x >> 31;
180     if (sign != (x >> 15)) {
181         x = sign ^((1 << 15) - 1);
182     }
183
184     return (short)x;
185 }
186
187 static __inline int FASTABS(int x)
188 {
189     int sign;
190
191     sign = x >> (sizeof(int) * 8 - 1);
192     x ^= sign;
193     x -= sign;
194
195     return x;
196 }
197
198 static __inline int CLZ(int x)
199 {
200     int numZeros;
201
202     if (!x) {
203         return 32;
204     }
205
206     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
207     numZeros = 1;
208     if (!((unsigned int)x >> 16))   {
209         numZeros += 16;
210         x <<= 16;
211     }
212     if (!((unsigned int)x >> 24))   {
213         numZeros +=  8;
214         x <<=  8;
215     }
216     if (!((unsigned int)x >> 28))   {
217         numZeros +=  4;
218         x <<=  4;
219     }
220     if (!((unsigned int)x >> 30))   {
221         numZeros +=  2;
222         x <<=  2;
223     }
224
225     numZeros -= ((unsigned int)x >> 31);
226
227     return numZeros;
228 }
229
230 /* implemented in asmfunc.s */
231 #ifdef __cplusplus
232 extern "C" {
233 #endif
234
235     typedef __int64 Word64;
236
237     typedef union _U64 {
238         Word64 w64;
239         struct {
240             /* ARM WinCE = little endian */
241             unsigned int lo32;
242             signed int   hi32;
243         } r;
244     } U64;
245
246     /* manual name mangling for just this platform (must match labels in .s file) */
247 #define MULSHIFT32  raac_MULSHIFT32
248 #define MADD64      raac_MADD64
249
250     int MULSHIFT32(int x, int y);
251     Word64 MADD64(Word64 sum64, int x, int y);
252
253 #ifdef __cplusplus
254 }
255 #endif
256
257 /* toolchain:           ARM ADS or RealView
258  * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
259  */
260 #elif (defined (__arm) && defined (__ARMCC_VERSION)) || (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
261
262 static __inline int MULSHIFT32(int x, int y)
263 {
264     /* rules for smull RdLo, RdHi, Rm, Rs:
265      *   RdHi != Rm
266      *   RdLo != Rm
267      *   RdHi != RdLo
268      */
269     int zlow;
270     __asm {
271         smull zlow, y, x, y
272     }
273
274     return y;
275 }
276
277 static __inline short CLIPTOSHORT(int x)
278 {
279     int sign;
280
281     /* clip to [-32768, 32767] */
282     sign = x >> 31;
283     if (sign != (x >> 15)) {
284         x = sign ^((1 << 15) - 1);
285     }
286
287     return (short)x;
288 }
289
290 static __inline int FASTABS(int x)
291 {
292     int sign;
293
294     sign = x >> (sizeof(int) * 8 - 1);
295     x ^= sign;
296     x -= sign;
297
298     return x;
299 }
300
301 static __inline int CLZ(int x)
302 {
303     int numZeros;
304
305     if (!x) {
306         return 32;
307     }
308
309     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
310     numZeros = 1;
311     if (!((unsigned int)x >> 16))   {
312         numZeros += 16;
313         x <<= 16;
314     }
315     if (!((unsigned int)x >> 24))   {
316         numZeros +=  8;
317         x <<=  8;
318     }
319     if (!((unsigned int)x >> 28))   {
320         numZeros +=  4;
321         x <<=  4;
322     }
323     if (!((unsigned int)x >> 30))   {
324         numZeros +=  2;
325         x <<=  2;
326     }
327
328     numZeros -= ((unsigned int)x >> 31);
329
330     return numZeros;
331
332     /* ARM code would look like this, but do NOT use inline asm in ADS for this,
333        because you can't safely use the status register flags intermixed with C code
334
335         __asm {
336             mov     numZeros, #1
337             tst     x, 0xffff0000
338             addeq   numZeros, numZeros, #16
339             moveq   x, x, lsl #16
340             tst     x, 0xff000000
341             addeq   numZeros, numZeros, #8
342             moveq   x, x, lsl #8
343             tst     x, 0xf0000000
344             addeq   numZeros, numZeros, #4
345             moveq   x, x, lsl #4
346             tst     x, 0xc0000000
347             addeq   numZeros, numZeros, #2
348             moveq   x, x, lsl #2
349             sub     numZeros, numZeros, x, lsr #31
350         }
351     */
352     /* reference:
353         numZeros = 0;
354         while (!(x & 0x80000000)) {
355             numZeros++;
356             x <<= 1;
357         }
358     */
359 }
360
361 typedef __int64 Word64;
362
363 typedef union _U64 {
364     Word64 w64;
365     struct {
366         /* ARM ADS = little endian */
367         unsigned int lo32;
368         signed int   hi32;
369     } r;
370 } U64;
371
372 static __inline Word64 MADD64(Word64 sum64, int x, int y)
373 {
374     U64 u;
375     u.w64 = sum64;
376
377     __asm {
378         smlal u.r.lo32, u.r.hi32, x, y
379     }
380
381     return u.w64;
382 }
383
384 /* toolchain:           ARM gcc
385  * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
386  */
387 #elif defined(__GNUC__) && defined(__arm__)
388
389 static __inline__ int MULSHIFT32(int x, int y)
390 {
391     int zlow;
392     __asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
393     return y;
394 }
395
396 static __inline short CLIPTOSHORT(int x)
397 {
398     int sign;
399
400     /* clip to [-32768, 32767] */
401     sign = x >> 31;
402     if (sign != (x >> 15)) {
403         x = sign ^((1 << 15) - 1);
404     }
405
406     return (short)x;
407 }
408
409 static __inline int FASTABS(int x)
410 {
411     int sign;
412
413     sign = x >> (sizeof(int) * 8 - 1);
414     x ^= sign;
415     x -= sign;
416
417     return x;
418 }
419
420 static __inline int CLZ(int x)
421 {
422     int numZeros;
423
424     if (!x) {
425         return (sizeof(int) * 8);
426     }
427
428     numZeros = 0;
429     while (!(x & 0x80000000)) {
430         numZeros++;
431         x <<= 1;
432     }
433
434     return numZeros;
435 }
436
437 typedef long long Word64;
438
439 typedef union _U64 {
440     Word64 w64;
441     struct {
442         /* ARM ADS = little endian */
443         unsigned int lo32;
444         signed int   hi32;
445     } r;
446 } U64;
447
448 static __inline Word64 MADD64(Word64 sum64, int x, int y)
449 {
450     U64 u;
451     u.w64 = sum64;
452
453     __asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
454
455     return u.w64;
456 }
457
458 /* toolchain:           x86 gcc
459  * target architecture: x86
460  */
461 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
462
463 typedef long long Word64;
464
465 static __inline__ int MULSHIFT32(int x, int y)
466 {
467     int z;
468
469     z = (Word64)x * (Word64)y >> 32;
470
471     return z;
472 }
473
474 static __inline short CLIPTOSHORT(int x)
475 {
476     int sign;
477
478     /* clip to [-32768, 32767] */
479     sign = x >> 31;
480     if (sign != (x >> 15)) {
481         x = sign ^((1 << 15) - 1);
482     }
483
484     return (short)x;
485 }
486
487 static __inline int FASTABS(int x)
488 {
489     int sign;
490
491     sign = x >> (sizeof(int) * 8 - 1);
492     x ^= sign;
493     x -= sign;
494
495     return x;
496 }
497
498 static __inline int CLZ(int x)
499 {
500     int numZeros;
501
502     if (!x) {
503         return 32;
504     }
505
506     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
507     numZeros = 1;
508     if (!((unsigned int)x >> 16))   {
509         numZeros += 16;
510         x <<= 16;
511     }
512     if (!((unsigned int)x >> 24))   {
513         numZeros +=  8;
514         x <<=  8;
515     }
516     if (!((unsigned int)x >> 28))   {
517         numZeros +=  4;
518         x <<=  4;
519     }
520     if (!((unsigned int)x >> 30))   {
521         numZeros +=  2;
522         x <<=  2;
523     }
524
525     numZeros -= ((unsigned int)x >> 31);
526
527     return numZeros;
528 }
529
530 typedef union _U64 {
531     Word64 w64;
532     struct {
533         /* x86 = little endian */
534         unsigned int lo32;
535         signed int   hi32;
536     } r;
537 } U64;
538
539 static __inline Word64 MADD64(Word64 sum64, int x, int y)
540 {
541     sum64 += (Word64)x * (Word64)y;
542
543     return sum64;
544 }
545
546 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
547
548 typedef long long Word64;
549
550 static __inline__ int MULSHIFT32(int x, int y)
551 {
552     int z;
553
554     z = (Word64)x * (Word64)y >> 32;
555
556     return z;
557 }
558
559 static __inline short CLIPTOSHORT(int x)
560 {
561     int sign;
562
563     /* clip to [-32768, 32767] */
564     sign = x >> 31;
565     if (sign != (x >> 15)) {
566         x = sign ^((1 << 15) - 1);
567     }
568
569     return (short)x;
570 }
571
572 static __inline int FASTABS(int x)
573 {
574     int sign;
575
576     sign = x >> (sizeof(int) * 8 - 1);
577     x ^= sign;
578     x -= sign;
579
580     return x;
581 }
582
583 static __inline int CLZ(int x)
584 {
585     int numZeros;
586
587     if (!x) {
588         return 32;
589     }
590
591     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
592     numZeros = 1;
593     if (!((unsigned int)x >> 16))   {
594         numZeros += 16;
595         x <<= 16;
596     }
597     if (!((unsigned int)x >> 24))   {
598         numZeros +=  8;
599         x <<=  8;
600     }
601     if (!((unsigned int)x >> 28))   {
602         numZeros +=  4;
603         x <<=  4;
604     }
605     if (!((unsigned int)x >> 30))   {
606         numZeros +=  2;
607         x <<=  2;
608     }
609
610     numZeros -= ((unsigned int)x >> 31);
611
612     return numZeros;
613 }
614
615 typedef union _U64 {
616     Word64 w64;
617     struct {
618         /* PowerPC = big endian */
619         signed int   hi32;
620         unsigned int lo32;
621     } r;
622 } U64;
623
624 static __inline Word64 MADD64(Word64 sum64, int x, int y)
625 {
626     sum64 += (Word64)x * (Word64)y;
627
628     return sum64;
629 }
630
631 #elif defined(_ARC32)
632
633 _Asm _Inline int MULSHIFT32(int x, int y)
634 {
635     % reg x, y
636     mullw 0, x, y
637     machlw % r0, x, y
638     % error
639 }
640
641 _Asm _Inline short CLIPTOSHORT(int x)
642 {
643     % reg x
644     min % r0, x, 0x7fff
645     max % r0, % r0, -0x8000
646     % error
647 }
648
649 _Asm _Inline int FASTABS(int x)
650 {
651     % reg x
652     abs % r0, x
653     % error
654 }
655
656 _Asm _Inline int CLZ(int x)
657 {
658     /* assume x>0, if x<0 should return 0 */
659     % reg x;
660     norm % r0, x
661     add % r0, % r0, 1
662     % error
663 }
664 #endif
665 typedef struct {
666     unsigned int lo32;
667     signed int hi32;
668 } Word64;
669
670 typedef union _U64 {
671     Word64 w64;
672     struct {
673         unsigned int lo32;
674         signed int   hi32;
675     } r;
676 } U64;
677
678 _Asm _Inline unsigned add64_lo(unsigned int xlo, unsigned int ylo)
679 {
680     % reg xlo, ylo;
681     add.f % r0, xlo, ylo
682     % error
683 }
684
685 _Asm _Inline int add64_hi(unsigned int xhi, unsigned int yhi)
686 {
687     % reg xhi, yhi;
688     adc % r0, xhi, yhi
689     % error
690 }
691
692 _Asm _Inline unsigned madd64_lo(unsigned lo, int a, int b)
693 {
694     % reg lo, a, b;
695     mpy % r0, a, b
696     add.f % r0, lo, % r0
697     % error
698 }
699
700
701 _Asm _Inline int madd64_hi(int hi, int a, int b)
702 {
703     % reg hi, a, b;
704     mpyh % r0, a, b
705     adc % r0, hi, % r0
706     % error
707 }
708
709
710
711 _Asm _Inline void madd64(int a, int b)
712 {
713     % reg  a, b;
714     mulhlw 0, a, b
715     maclw  0, a, b
716     % error
717 }
718
719 _Asm _Inline int madd64hi(int hi)
720 {
721     % reg hi
722     //mov   %r0, %acc2
723     adc % r0, hi, % acc1
724     % error
725 }
726
727 _Asm _Inline int madd64lo(int lo)
728 {
729     % reg lo
730     //mov   %r0, %acc1
731     add.f % r0, lo, % acc2
732     % error
733 }
734
735
736
737 #define SET_ZERO(x) x.lo32 = x.hi32 = 0
738
739 #define MADD64(w64, a, b) madd64(a, b); w64.lo32 = madd64lo(w64.lo32);  w64.hi32 = madd64hi(w64.hi32);
740
741 #define ADD64(x64, y64) x64.lo32 = add64_lo(x64.lo32,y64.lo32); x64.hi32 = add64_hi(x64.hi32,y64.hi32);
742
743 #endif /* _ASSEMBLY_H */
744
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: assembly.h,v 1.7 2005/11/10 00:04:40 margotm Exp $
3	*
4	* Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5	*
6	* The contents of this file, and the files included with this file,
7	* are subject to the current version of the RealNetworks Public
8	* Source License (the "RPSL") available at
9	* http://www.helixcommunity.org/content/rpsl unless you have licensed
10	* the file under the current version of the RealNetworks Community
11	* Source License (the "RCSL") available at
12	* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13	* will apply. You may also obtain the license terms directly from
14	* RealNetworks. You may not use this file except in compliance with
15	* the RPSL or, if you have a valid RCSL with RealNetworks applicable
16	* to this file, the RCSL. Please see the applicable RPSL or RCSL for
17	* the rights, obligations and limitations governing use of the
18	* contents of the file.
19	*
20	* This file is part of the Helix DNA Technology. RealNetworks is the
21	* developer of the Original Code and owns the copyrights in the
22	* portions it created.
23	*
24	* This file, and the files included with this file, is distributed
25	* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26	* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27	* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29	* ENJOYMENT OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* http://www.helixcommunity.org/content/tck
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point HE-AAC decoder
40	* Jon Recker (jrecker@real.com)
41	* February 2005
42	*
43	* assembly.h - inline assembly language functions and prototypes
44	*
45	* MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y),
46	* returns top 32-bits of 64-bit result
47	* CLIPTOSHORT(x) convert 32-bit integer to 16-bit short,
48	* clipping to [-32768, 32767]
49	* FASTABS(x) branchless absolute value of signed integer x
50	* CLZ(x) count leading zeros on signed integer x
51	* MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y)
52	**************************************************************************************/
53
54	#ifndef _ASSEMBLY_H
55	#define _ASSEMBLY_H
56
57	//#define _Inline inline
58	#define _ARC32
59	/* toolchain: MSFT Visual C++
60	* target architecture: x86
61	*/
62	#if (defined (_WIN32) && !defined (_WIN32_WCE)) \|\| (defined (__WINS__) && defined (_SYMBIAN)) \|\| (defined (WINCE_EMULATOR)) \|\| (defined (_OPENWAVE_SIMULATOR))
63
64	#pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
65
66	static __inline int MULSHIFT32(int x, int y)
67	{
68	__asm {
69	mov eax, x
70	imul y
71	mov eax, edx
72	}
73	}
74
75	static __inline short CLIPTOSHORT(int x)
76	{
77	int sign;
78
79	/* clip to [-32768, 32767] */
80	sign = x >> 31;
81	if (sign != (x >> 15)) {
82	x = sign ^((1 << 15) - 1);
83	}
84
85	return (short)x;
86	}
87
88	static __inline int FASTABS(int x)
89	{
90	int sign;
91
92	sign = x >> (sizeof(int) * 8 - 1);
93	x ^= sign;
94	x -= sign;
95
96	return x;
97	}
98
99	static __inline int CLZ(int x)
100	{
101	int numZeros;
102
103	if (!x) {
104	return 32;
105	}
106
107	/* count leading zeros with binary search */
108	numZeros = 1;
109	if (!((unsigned int)x >> 16)) {
110	numZeros += 16;
111	x <<= 16;
112	}
113	if (!((unsigned int)x >> 24)) {
114	numZeros += 8;
115	x <<= 8;
116	}
117	if (!((unsigned int)x >> 28)) {
118	numZeros += 4;
119	x <<= 4;
120	}
121	if (!((unsigned int)x >> 30)) {
122	numZeros += 2;
123	x <<= 2;
124	}
125
126	numZeros -= ((unsigned int)x >> 31);
127
128	return numZeros;
129	}
130
131	#ifdef __CW32__
132	typedef long long Word64;
133	#else
134	typedef __int64 Word64;
135	#endif
136
137	typedef union _U64 {
138	Word64 w64;
139	struct {
140	/* x86 = little endian */
141	unsigned int lo32;
142	signed int hi32;
143	} r;
144	} U64;
145
146	/* returns 64-bit value in [edx:eax] */
147	static __inline Word64 madd64(Word64 sum64, int x, int y)
148	{
149	#if (defined (_SYMBIAN_61_) \|\| defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
150	/* Workaround for the Symbian emulator because of non existing longlong.lib and
151	* hence __allmul not defined. */
152	__asm {
153	mov eax, x
154	imul y
155	add dword ptr sum64, eax
156	adc dword ptr sum64 + 4, edx
157	}
158	#else
159	sum64 += (Word64)x * (Word64)y;
160
161	/* equivalent to return (sum + ((__int64)x * y)); */
162	#endif
163	}
164
165	#define SET_ZERO(x) x=0
166	#define MADD64(sum64, x, y) sum64=madd64(sum64, x, y)
167	#define ADD64(x64, y64) x64 += y64;
168
169	/* toolchain: MSFT Embedded Visual C++
170	* target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
171	*/
172	#elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
173
174	static __inline short CLIPTOSHORT(int x)
175	{
176	int sign;
177
178	/* clip to [-32768, 32767] */
179	sign = x >> 31;
180	if (sign != (x >> 15)) {
181	x = sign ^((1 << 15) - 1);
182	}
183
184	return (short)x;
185	}
186
187	static __inline int FASTABS(int x)
188	{
189	int sign;
190
191	sign = x >> (sizeof(int) * 8 - 1);
192	x ^= sign;
193	x -= sign;
194
195	return x;
196	}
197
198	static __inline int CLZ(int x)
199	{
200	int numZeros;
201
202	if (!x) {
203	return 32;
204	}
205
206	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
207	numZeros = 1;
208	if (!((unsigned int)x >> 16)) {
209	numZeros += 16;
210	x <<= 16;
211	}
212	if (!((unsigned int)x >> 24)) {
213	numZeros += 8;
214	x <<= 8;
215	}
216	if (!((unsigned int)x >> 28)) {
217	numZeros += 4;
218	x <<= 4;
219	}
220	if (!((unsigned int)x >> 30)) {
221	numZeros += 2;
222	x <<= 2;
223	}
224
225	numZeros -= ((unsigned int)x >> 31);
226
227	return numZeros;
228	}
229
230	/* implemented in asmfunc.s */
231	#ifdef __cplusplus
232	extern "C" {
233	#endif
234
235	typedef __int64 Word64;
236
237	typedef union _U64 {
238	Word64 w64;
239	struct {
240	/* ARM WinCE = little endian */
241	unsigned int lo32;
242	signed int hi32;
243	} r;
244	} U64;
245
246	/* manual name mangling for just this platform (must match labels in .s file) */
247	#define MULSHIFT32 raac_MULSHIFT32
248	#define MADD64 raac_MADD64
249
250	int MULSHIFT32(int x, int y);
251	Word64 MADD64(Word64 sum64, int x, int y);
252
253	#ifdef __cplusplus
254	}
255	#endif
256
257	/* toolchain: ARM ADS or RealView
258	* target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
259	*/
260	#elif (defined (__arm) && defined (__ARMCC_VERSION)) \|\| (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
261
262	static __inline int MULSHIFT32(int x, int y)
263	{
264	/* rules for smull RdLo, RdHi, Rm, Rs:
265	* RdHi != Rm
266	* RdLo != Rm
267	* RdHi != RdLo
268	*/
269	int zlow;
270	__asm {
271	smull zlow, y, x, y
272	}
273
274	return y;
275	}
276
277	static __inline short CLIPTOSHORT(int x)
278	{
279	int sign;
280
281	/* clip to [-32768, 32767] */
282	sign = x >> 31;
283	if (sign != (x >> 15)) {
284	x = sign ^((1 << 15) - 1);
285	}
286
287	return (short)x;
288	}
289
290	static __inline int FASTABS(int x)
291	{
292	int sign;
293
294	sign = x >> (sizeof(int) * 8 - 1);
295	x ^= sign;
296	x -= sign;
297
298	return x;
299	}
300
301	static __inline int CLZ(int x)
302	{
303	int numZeros;
304
305	if (!x) {
306	return 32;
307	}
308
309	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
310	numZeros = 1;
311	if (!((unsigned int)x >> 16)) {
312	numZeros += 16;
313	x <<= 16;
314	}
315	if (!((unsigned int)x >> 24)) {
316	numZeros += 8;
317	x <<= 8;
318	}
319	if (!((unsigned int)x >> 28)) {
320	numZeros += 4;
321	x <<= 4;
322	}
323	if (!((unsigned int)x >> 30)) {
324	numZeros += 2;
325	x <<= 2;
326	}
327
328	numZeros -= ((unsigned int)x >> 31);
329
330	return numZeros;
331
332	/* ARM code would look like this, but do NOT use inline asm in ADS for this,
333	because you can't safely use the status register flags intermixed with C code
334
335	__asm {
336	mov numZeros, #1
337	tst x, 0xffff0000
338	addeq numZeros, numZeros, #16
339	moveq x, x, lsl #16
340	tst x, 0xff000000
341	addeq numZeros, numZeros, #8
342	moveq x, x, lsl #8
343	tst x, 0xf0000000
344	addeq numZeros, numZeros, #4
345	moveq x, x, lsl #4
346	tst x, 0xc0000000
347	addeq numZeros, numZeros, #2
348	moveq x, x, lsl #2
349	sub numZeros, numZeros, x, lsr #31
350	}
351	*/
352	/* reference:
353	numZeros = 0;
354	while (!(x & 0x80000000)) {
355	numZeros++;
356	x <<= 1;
357	}
358	*/
359	}
360
361	typedef __int64 Word64;
362
363	typedef union _U64 {
364	Word64 w64;
365	struct {
366	/* ARM ADS = little endian */
367	unsigned int lo32;
368	signed int hi32;
369	} r;
370	} U64;
371
372	static __inline Word64 MADD64(Word64 sum64, int x, int y)
373	{
374	U64 u;
375	u.w64 = sum64;
376
377	__asm {
378	smlal u.r.lo32, u.r.hi32, x, y
379	}
380
381	return u.w64;
382	}
383
384	/* toolchain: ARM gcc
385	* target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
386	*/
387	#elif defined(__GNUC__) && defined(__arm__)
388
389	static __inline__ int MULSHIFT32(int x, int y)
390	{
391	int zlow;
392	__asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
393	return y;
394	}
395
396	static __inline short CLIPTOSHORT(int x)
397	{
398	int sign;
399
400	/* clip to [-32768, 32767] */
401	sign = x >> 31;
402	if (sign != (x >> 15)) {
403	x = sign ^((1 << 15) - 1);
404	}
405
406	return (short)x;
407	}
408
409	static __inline int FASTABS(int x)
410	{
411	int sign;
412
413	sign = x >> (sizeof(int) * 8 - 1);
414	x ^= sign;
415	x -= sign;
416
417	return x;
418	}
419
420	static __inline int CLZ(int x)
421	{
422	int numZeros;
423
424	if (!x) {
425	return (sizeof(int) * 8);
426	}
427
428	numZeros = 0;
429	while (!(x & 0x80000000)) {
430	numZeros++;
431	x <<= 1;
432	}
433
434	return numZeros;
435	}
436
437	typedef long long Word64;
438
439	typedef union _U64 {
440	Word64 w64;
441	struct {
442	/* ARM ADS = little endian */
443	unsigned int lo32;
444	signed int hi32;
445	} r;
446	} U64;
447
448	static __inline Word64 MADD64(Word64 sum64, int x, int y)
449	{
450	U64 u;
451	u.w64 = sum64;
452
453	__asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
454
455	return u.w64;
456	}
457
458	/* toolchain: x86 gcc
459	* target architecture: x86
460	*/
461	#elif defined(__GNUC__) && (defined(__i386__) \|\| defined(__amd64__)) \|\| (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
462
463	typedef long long Word64;
464
465	static __inline__ int MULSHIFT32(int x, int y)
466	{
467	int z;
468
469	z = (Word64)x * (Word64)y >> 32;
470
471	return z;
472	}
473
474	static __inline short CLIPTOSHORT(int x)
475	{
476	int sign;
477
478	/* clip to [-32768, 32767] */
479	sign = x >> 31;
480	if (sign != (x >> 15)) {
481	x = sign ^((1 << 15) - 1);
482	}
483
484	return (short)x;
485	}
486
487	static __inline int FASTABS(int x)
488	{
489	int sign;
490
491	sign = x >> (sizeof(int) * 8 - 1);
492	x ^= sign;
493	x -= sign;
494
495	return x;
496	}
497
498	static __inline int CLZ(int x)
499	{
500	int numZeros;
501
502	if (!x) {
503	return 32;
504	}
505
506	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
507	numZeros = 1;
508	if (!((unsigned int)x >> 16)) {
509	numZeros += 16;
510	x <<= 16;
511	}
512	if (!((unsigned int)x >> 24)) {
513	numZeros += 8;
514	x <<= 8;
515	}
516	if (!((unsigned int)x >> 28)) {
517	numZeros += 4;
518	x <<= 4;
519	}
520	if (!((unsigned int)x >> 30)) {
521	numZeros += 2;
522	x <<= 2;
523	}
524
525	numZeros -= ((unsigned int)x >> 31);
526
527	return numZeros;
528	}
529
530	typedef union _U64 {
531	Word64 w64;
532	struct {
533	/* x86 = little endian */
534	unsigned int lo32;
535	signed int hi32;
536	} r;
537	} U64;
538
539	static __inline Word64 MADD64(Word64 sum64, int x, int y)
540	{
541	sum64 += (Word64)x * (Word64)y;
542
543	return sum64;
544	}
545
546	#elif defined(__GNUC__) && (defined(__powerpc__) \|\| defined(__POWERPC__)) \|\| (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
547
548	typedef long long Word64;
549
550	static __inline__ int MULSHIFT32(int x, int y)
551	{
552	int z;
553
554	z = (Word64)x * (Word64)y >> 32;
555
556	return z;
557	}
558
559	static __inline short CLIPTOSHORT(int x)
560	{
561	int sign;
562
563	/* clip to [-32768, 32767] */
564	sign = x >> 31;
565	if (sign != (x >> 15)) {
566	x = sign ^((1 << 15) - 1);
567	}
568
569	return (short)x;
570	}
571
572	static __inline int FASTABS(int x)
573	{
574	int sign;
575
576	sign = x >> (sizeof(int) * 8 - 1);
577	x ^= sign;
578	x -= sign;
579
580	return x;
581	}
582
583	static __inline int CLZ(int x)
584	{
585	int numZeros;
586
587	if (!x) {
588	return 32;
589	}
590
591	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
592	numZeros = 1;
593	if (!((unsigned int)x >> 16)) {
594	numZeros += 16;
595	x <<= 16;
596	}
597	if (!((unsigned int)x >> 24)) {
598	numZeros += 8;
599	x <<= 8;
600	}
601	if (!((unsigned int)x >> 28)) {
602	numZeros += 4;
603	x <<= 4;
604	}
605	if (!((unsigned int)x >> 30)) {
606	numZeros += 2;
607	x <<= 2;
608	}
609
610	numZeros -= ((unsigned int)x >> 31);
611
612	return numZeros;
613	}
614
615	typedef union _U64 {
616	Word64 w64;
617	struct {
618	/* PowerPC = big endian */
619	signed int hi32;
620	unsigned int lo32;
621	} r;
622	} U64;
623
624	static __inline Word64 MADD64(Word64 sum64, int x, int y)
625	{
626	sum64 += (Word64)x * (Word64)y;
627
628	return sum64;
629	}
630
631	#elif defined(_ARC32)
632
633	_Asm _Inline int MULSHIFT32(int x, int y)
634	{
635	% reg x, y
636	mullw 0, x, y
637	machlw % r0, x, y
638	% error
639	}
640
641	_Asm _Inline short CLIPTOSHORT(int x)
642	{
643	% reg x
644	min % r0, x, 0x7fff
645	max % r0, % r0, -0x8000
646	% error
647	}
648
649	_Asm _Inline int FASTABS(int x)
650	{
651	% reg x
652	abs % r0, x
653	% error
654	}
655
656	_Asm _Inline int CLZ(int x)
657	{
658	/* assume x>0, if x<0 should return 0 */
659	% reg x;
660	norm % r0, x
661	add % r0, % r0, 1
662	% error
663	}
664	#endif
665	typedef struct {
666	unsigned int lo32;
667	signed int hi32;
668	} Word64;
669
670	typedef union _U64 {
671	Word64 w64;
672	struct {
673	unsigned int lo32;
674	signed int hi32;
675	} r;
676	} U64;
677
678	_Asm _Inline unsigned add64_lo(unsigned int xlo, unsigned int ylo)
679	{
680	% reg xlo, ylo;
681	add.f % r0, xlo, ylo
682	% error
683	}
684
685	_Asm _Inline int add64_hi(unsigned int xhi, unsigned int yhi)
686	{
687	% reg xhi, yhi;
688	adc % r0, xhi, yhi
689	% error
690	}
691
692	_Asm _Inline unsigned madd64_lo(unsigned lo, int a, int b)
693	{
694	% reg lo, a, b;
695	mpy % r0, a, b
696	add.f % r0, lo, % r0
697	% error
698	}
699
700
701	_Asm _Inline int madd64_hi(int hi, int a, int b)
702	{
703	% reg hi, a, b;
704	mpyh % r0, a, b
705	adc % r0, hi, % r0
706	% error
707	}
708
709
710
711	_Asm _Inline void madd64(int a, int b)
712	{
713	% reg a, b;
714	mulhlw 0, a, b
715	maclw 0, a, b
716	% error
717	}
718
719	_Asm _Inline int madd64hi(int hi)
720	{
721	% reg hi
722	//mov %r0, %acc2
723	adc % r0, hi, % acc1
724	% error
725	}
726
727	_Asm _Inline int madd64lo(int lo)
728	{
729	% reg lo
730	//mov %r0, %acc1
731	add.f % r0, lo, % acc2
732	% error
733	}
734
735
736
737	#define SET_ZERO(x) x.lo32 = x.hi32 = 0
738
739	#define MADD64(w64, a, b) madd64(a, b); w64.lo32 = madd64lo(w64.lo32); w64.hi32 = madd64hi(w64.hi32);
740
741	#define ADD64(x64, y64) x64.lo32 = add64_lo(x64.lo32,y64.lo32); x64.hi32 = add64_hi(x64.hi32,y64.hi32);
742
743	#endif /* _ASSEMBLY_H */
744