platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: assembly.h,v 1.9 2007/02/28 07:10:21 gahluwalia Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * assembly.h - inline assembly language functions and prototypes
44  *
45  * MULSHIFT32(x, y)             signed multiply of two 32-bit integers (x and y),
46  *                            returns top 32-bits of 64-bit result
47  * CLIPTOSHORT(x)                       convert 32-bit integer to 16-bit short,
48  *                            clipping to [-32768, 32767]
49  * FASTABS(x)               branchless absolute value of signed integer x
50  * CLZ(x)                   count leading zeros on signed integer x
51  * MADD64(sum64, x, y)          64-bit multiply accumulate: sum64 += (x*y)
52  **************************************************************************************/
53
54 #ifndef _ASSEMBLY_H
55 #define _ASSEMBLY_H
56
57 /* toolchain:           MSFT Visual C++
58  * target architecture: x86
59  */
60 #if 0//(defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))
61
62 #pragma warning( disable : 4035 )       /* complains about inline asm not returning a value */
63
64 static __inline int MULSHIFT32(int x, int y)
65 {
66     __asm {
67         mov             eax, x
68         imul        y
69         mov         eax, edx
70     }
71 }
72
73 static __inline short CLIPTOSHORT(int x)
74 {
75     int sign;
76
77     /* clip to [-32768, 32767] */
78     sign = x >> 31;
79     if (sign != (x >> 15)) {
80         x = sign ^((1 << 15) - 1);
81     }
82
83     return (short)x;
84 }
85
86 static __inline int FASTABS(int x)
87 {
88     int sign;
89
90     sign = x >> (sizeof(int) * 8 - 1);
91     x ^= sign;
92     x -= sign;
93
94     return x;
95 }
96
97 static __inline int CLZ(int x)
98 {
99     int numZeros;
100
101     if (!x) {
102         return 32;
103     }
104
105     /* count leading zeros with binary search */
106     numZeros = 1;
107     if (!((unsigned int)x >> 16))   {
108         numZeros += 16;
109         x <<= 16;
110     }
111     if (!((unsigned int)x >> 24))   {
112         numZeros +=  8;
113         x <<=  8;
114     }
115     if (!((unsigned int)x >> 28))   {
116         numZeros +=  4;
117         x <<=  4;
118     }
119     if (!((unsigned int)x >> 30))   {
120         numZeros +=  2;
121         x <<=  2;
122     }
123
124     numZeros -= ((unsigned int)x >> 31);
125
126     return numZeros;
127 }
128
129 #ifdef __CW32__
130 typedef long long Word64;
131 #else
132 typedef __int64 Word64;
133 #endif
134
135 typedef union _U64 {
136     Word64 w64;
137     struct {
138         /* x86 = little endian */
139         unsigned int lo32;
140         signed int   hi32;
141     } r;
142 } U64;
143
144 /* returns 64-bit value in [edx:eax] */
145 static __inline Word64 MADD64(Word64 sum64, int x, int y)
146 {
147 #if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
148     /* Workaround for the Symbian emulator because of non existing longlong.lib and
149      * hence __allmul not defined. */
150     __asm {
151         mov     eax, x
152         imul    y
153         add     dword ptr sum64, eax
154         adc     dword ptr sum64 + 4, edx
155     }
156 #else
157     sum64 += (Word64)x * (Word64)y;
158 #endif
159
160     return sum64;
161 }
162
163 /* toolchain:           MSFT Embedded Visual C++
164  * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
165  */
166 #elif 1//defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
167
168 static  short CLIPTOSHORT(int x)
169 {
170     int sign;
171
172     /* clip to [-32768, 32767] */
173     sign = x >> 31;
174     if (sign != (x >> 15)) {
175         x = sign ^((1 << 15) - 1);
176     }
177
178     return (short)x;
179 }
180
181 static  int FASTABS(int x)
182 {
183     int sign;
184
185     sign = x >> (sizeof(int) * 8 - 1);
186     x ^= sign;
187     x -= sign;
188
189     return x;
190 }
191
192 static  int CLZ(int x)
193 {
194     int numZeros;
195
196     if (!x) {
197         return 32;
198     }
199
200     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
201     numZeros = 1;
202     if (!((unsigned int)x >> 16))   {
203         numZeros += 16;
204         x <<= 16;
205     }
206     if (!((unsigned int)x >> 24))   {
207         numZeros +=  8;
208         x <<=  8;
209     }
210     if (!((unsigned int)x >> 28))   {
211         numZeros +=  4;
212         x <<=  4;
213     }
214     if (!((unsigned int)x >> 30))   {
215         numZeros +=  2;
216         x <<=  2;
217     }
218
219     numZeros -= ((unsigned int)x >> 31);
220
221     return numZeros;
222 }
223
224 /* implemented in asmfunc.s */
225 #ifdef __cplusplus
226 extern "C" {
227 #endif
228
229     typedef long long Word64;
230
231     typedef union _U64 {
232         Word64 w64;
233         struct {
234             /* ARM WinCE = little endian */
235             unsigned int lo32;
236             signed int   hi32;
237         } r;
238     } U64;
239
240     /* manual name mangling for just this platform (must match labels in .s file) */
241 //#define MULSHIFT32      raac_MULSHIFT32
242 //#define MADD64          raac_MADD64
243 static  int MULSHIFT32(int x, int y)
244 {
245 	long c;
246 	c = (long long)x * y;
247 	return (int)c;
248 }
249
250 static  Word64 MADD64(Word64 sum64, int x, int y)
251 {
252 	sum64 += (long long)x * y;
253 	return sum64;
254 }
255
256 #ifdef __cplusplus
257 }
258 #endif
259
260 /* toolchain:           ARM ADS or RealView
261  * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
262  */
263 #elif (defined (__arm) && defined (__ARMCC_VERSION)) || (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
264
265 static __inline int MULSHIFT32(int x, int y)
266 {
267     /* rules for smull RdLo, RdHi, Rm, Rs:
268      *   RdHi != Rm
269      *   RdLo != Rm
270      *   RdHi != RdLo
271      */
272     int zlow;
273     __asm {
274         smull zlow, y, x, y
275     }
276
277     return y;
278 }
279
280 static __inline short CLIPTOSHORT(int x)
281 {
282     int sign;
283
284     /* clip to [-32768, 32767] */
285     sign = x >> 31;
286     if (sign != (x >> 15)) {
287         x = sign ^((1 << 15) - 1);
288     }
289
290     return (short)x;
291 }
292
293 static __inline int FASTABS(int x)
294 {
295     int sign;
296
297     sign = x >> (sizeof(int) * 8 - 1);
298     x ^= sign;
299     x -= sign;
300
301     return x;
302 }
303
304 static __inline int CLZ(int x)
305 {
306     int numZeros;
307
308     if (!x) {
309         return 32;
310     }
311
312     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
313     numZeros = 1;
314     if (!((unsigned int)x >> 16))   {
315         numZeros += 16;
316         x <<= 16;
317     }
318     if (!((unsigned int)x >> 24))   {
319         numZeros +=  8;
320         x <<=  8;
321     }
322     if (!((unsigned int)x >> 28))   {
323         numZeros +=  4;
324         x <<=  4;
325     }
326     if (!((unsigned int)x >> 30))   {
327         numZeros +=  2;
328         x <<=  2;
329     }
330
331     numZeros -= ((unsigned int)x >> 31);
332
333     return numZeros;
334
335     /* ARM code would look like this, but do NOT use inline asm in ADS for this,
336        because you can't safely use the status register flags intermixed with C code
337
338             __asm {
339                 mov         numZeros, #1
340                     tst             x, 0xffff0000
341                     addeq   numZeros, numZeros, #16
342                     moveq   x, x, lsl #16
343                     tst             x, 0xff000000
344                     addeq   numZeros, numZeros, #8
345                     moveq   x, x, lsl #8
346                     tst             x, 0xf0000000
347                     addeq   numZeros, numZeros, #4
348                     moveq   x, x, lsl #4
349                     tst             x, 0xc0000000
350                     addeq   numZeros, numZeros, #2
351                     moveq   x, x, lsl #2
352                     sub             numZeros, numZeros, x, lsr #31
353             }
354     */
355     /* reference:
356             numZeros = 0;
357             while (!(x & 0x80000000)) {
358                     numZeros++;
359                     x <<= 1;
360             }
361     */
362 }
363
364 typedef __int64 Word64;
365
366 typedef union _U64 {
367     Word64 w64;
368     struct {
369         /* ARM ADS = little endian */
370         unsigned int lo32;
371         signed int   hi32;
372     } r;
373 } U64;
374
375 static __inline Word64 MADD64(Word64 sum64, int x, int y)
376 {
377     U64 u;
378     u.w64 = sum64;
379
380     __asm {
381         smlal u.r.lo32, u.r.hi32, x, y
382     }
383
384     return u.w64;
385 }
386
387 /* toolchain:           ARM gcc
388  * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
389  */
390 #elif defined(__GNUC__) && defined(__arm__)
391
392 static __inline__ int MULSHIFT32(int x, int y)
393 {
394     int zlow;
395     __asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
396     return y;
397 }
398
399 static __inline short CLIPTOSHORT(int x)
400 {
401     int sign;
402
403     /* clip to [-32768, 32767] */
404     sign = x >> 31;
405     if (sign != (x >> 15)) {
406         x = sign ^((1 << 15) - 1);
407     }
408
409     return (short)x;
410 }
411
412 static __inline int FASTABS(int x)
413 {
414     int sign;
415
416     sign = x >> (sizeof(int) * 8 - 1);
417     x ^= sign;
418     x -= sign;
419
420     return x;
421 }
422
423 static __inline int CLZ(int x)
424 {
425     int numZeros;
426
427     if (!x) {
428         return (sizeof(int) * 8);
429     }
430
431     numZeros = 0;
432     while (!(x & 0x80000000)) {
433         numZeros++;
434         x <<= 1;
435     }
436
437     return numZeros;
438 }
439
440 typedef long long Word64;
441
442 typedef union _U64 {
443     Word64 w64;
444     struct {
445         /* ARM ADS = little endian */
446         unsigned int lo32;
447         signed int   hi32;
448     } r;
449 } U64;
450
451 static __inline Word64 MADD64(Word64 sum64, int x, int y)
452 {
453     U64 u;
454     u.w64 = sum64;
455
456     __asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
457
458     return u.w64;
459 }
460
461 /* toolchain:           x86 gcc
462  * target architecture: x86
463  */
464 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
465
466 typedef long long Word64;
467
468 static __inline__ int MULSHIFT32(int x, int y)
469 {
470     int z;
471
472     z = (Word64)x * (Word64)y >> 32;
473
474     return z;
475 }
476
477 static __inline short CLIPTOSHORT(int x)
478 {
479     int sign;
480
481     /* clip to [-32768, 32767] */
482     sign = x >> 31;
483     if (sign != (x >> 15)) {
484         x = sign ^((1 << 15) - 1);
485     }
486
487     return (short)x;
488 }
489
490 static __inline int FASTABS(int x)
491 {
492     int sign;
493
494     sign = x >> (sizeof(int) * 8 - 1);
495     x ^= sign;
496     x -= sign;
497
498     return x;
499 }
500
501 static __inline int CLZ(int x)
502 {
503     int numZeros;
504
505     if (!x) {
506         return 32;
507     }
508
509     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
510     numZeros = 1;
511     if (!((unsigned int)x >> 16))   {
512         numZeros += 16;
513         x <<= 16;
514     }
515     if (!((unsigned int)x >> 24))   {
516         numZeros +=  8;
517         x <<=  8;
518     }
519     if (!((unsigned int)x >> 28))   {
520         numZeros +=  4;
521         x <<=  4;
522     }
523     if (!((unsigned int)x >> 30))   {
524         numZeros +=  2;
525         x <<=  2;
526     }
527
528     numZeros -= ((unsigned int)x >> 31);
529
530     return numZeros;
531 }
532
533 typedef union _U64 {
534     Word64 w64;
535     struct {
536         /* x86 = little endian */
537         unsigned int lo32;
538         signed int   hi32;
539     } r;
540 } U64;
541
542 static __inline Word64 MADD64(Word64 sum64, int x, int y)
543 {
544     sum64 += (Word64)x * (Word64)y;
545
546     return sum64;
547 }
548
549 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
550
551 typedef long long Word64;
552
553 static __inline__ int MULSHIFT32(int x, int y)
554 {
555     int z;
556
557     z = (Word64)x * (Word64)y >> 32;
558
559     return z;
560 }
561
562 static __inline short CLIPTOSHORT(int x)
563 {
564     int sign;
565
566     /* clip to [-32768, 32767] */
567     sign = x >> 31;
568     if (sign != (x >> 15)) {
569         x = sign ^((1 << 15) - 1);
570     }
571
572     return (short)x;
573 }
574
575 static __inline int FASTABS(int x)
576 {
577     int sign;
578
579     sign = x >> (sizeof(int) * 8 - 1);
580     x ^= sign;
581     x -= sign;
582
583     return x;
584 }
585
586 static __inline int CLZ(int x)
587 {
588     int numZeros;
589
590     if (!x) {
591         return 32;
592     }
593
594     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
595     numZeros = 1;
596     if (!((unsigned int)x >> 16))   {
597         numZeros += 16;
598         x <<= 16;
599     }
600     if (!((unsigned int)x >> 24))   {
601         numZeros +=  8;
602         x <<=  8;
603     }
604     if (!((unsigned int)x >> 28))   {
605         numZeros +=  4;
606         x <<=  4;
607     }
608     if (!((unsigned int)x >> 30))   {
609         numZeros +=  2;
610         x <<=  2;
611     }
612
613     numZeros -= ((unsigned int)x >> 31);
614
615     return numZeros;
616 }
617
618 typedef union _U64 {
619     Word64 w64;
620     struct {
621         /* PowerPC = big endian */
622         signed int   hi32;
623         unsigned int lo32;
624     } r;
625 } U64;
626
627 static __inline Word64 MADD64(Word64 sum64, int x, int y)
628 {
629     sum64 += (Word64)x * (Word64)y;
630
631     return sum64;
632 }
633
634 #else
635
636 #error Unsupported platform in assembly.h
637
638 #endif  /* platforms */
639
640 #endif /* _ASSEMBLY_H */
641
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: assembly.h,v 1.9 2007/02/28 07:10:21 gahluwalia Exp $
3	*
4	* Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5	*
6	* The contents of this file, and the files included with this file,
7	* are subject to the current version of the RealNetworks Public
8	* Source License (the "RPSL") available at
9	* http://www.helixcommunity.org/content/rpsl unless you have licensed
10	* the file under the current version of the RealNetworks Community
11	* Source License (the "RCSL") available at
12	* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13	* will apply. You may also obtain the license terms directly from
14	* RealNetworks. You may not use this file except in compliance with
15	* the RPSL or, if you have a valid RCSL with RealNetworks applicable
16	* to this file, the RCSL. Please see the applicable RPSL or RCSL for
17	* the rights, obligations and limitations governing use of the
18	* contents of the file.
19	*
20	* This file is part of the Helix DNA Technology. RealNetworks is the
21	* developer of the Original Code and owns the copyrights in the
22	* portions it created.
23	*
24	* This file, and the files included with this file, is distributed
25	* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26	* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27	* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29	* ENJOYMENT OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* http://www.helixcommunity.org/content/tck
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point HE-AAC decoder
40	* Jon Recker (jrecker@real.com)
41	* February 2005
42	*
43	* assembly.h - inline assembly language functions and prototypes
44	*
45	* MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y),
46	* returns top 32-bits of 64-bit result
47	* CLIPTOSHORT(x) convert 32-bit integer to 16-bit short,
48	* clipping to [-32768, 32767]
49	* FASTABS(x) branchless absolute value of signed integer x
50	* CLZ(x) count leading zeros on signed integer x
51	* MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y)
52	**************************************************************************************/
53
54	#ifndef _ASSEMBLY_H
55	#define _ASSEMBLY_H
56
57	/* toolchain: MSFT Visual C++
58	* target architecture: x86
59	*/
60	#if 0//(defined (_WIN32) && !defined (_WIN32_WCE)) \|\| (defined (__WINS__) && defined (_SYMBIAN)) \|\| (defined (WINCE_EMULATOR)) \|\| (defined (_OPENWAVE_SIMULATOR))
61
62	#pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
63
64	static __inline int MULSHIFT32(int x, int y)
65	{
66	__asm {
67	mov eax, x
68	imul y
69	mov eax, edx
70	}
71	}
72
73	static __inline short CLIPTOSHORT(int x)
74	{
75	int sign;
76
77	/* clip to [-32768, 32767] */
78	sign = x >> 31;
79	if (sign != (x >> 15)) {
80	x = sign ^((1 << 15) - 1);
81	}
82
83	return (short)x;
84	}
85
86	static __inline int FASTABS(int x)
87	{
88	int sign;
89
90	sign = x >> (sizeof(int) * 8 - 1);
91	x ^= sign;
92	x -= sign;
93
94	return x;
95	}
96
97	static __inline int CLZ(int x)
98	{
99	int numZeros;
100
101	if (!x) {
102	return 32;
103	}
104
105	/* count leading zeros with binary search */
106	numZeros = 1;
107	if (!((unsigned int)x >> 16)) {
108	numZeros += 16;
109	x <<= 16;
110	}
111	if (!((unsigned int)x >> 24)) {
112	numZeros += 8;
113	x <<= 8;
114	}
115	if (!((unsigned int)x >> 28)) {
116	numZeros += 4;
117	x <<= 4;
118	}
119	if (!((unsigned int)x >> 30)) {
120	numZeros += 2;
121	x <<= 2;
122	}
123
124	numZeros -= ((unsigned int)x >> 31);
125
126	return numZeros;
127	}
128
129	#ifdef __CW32__
130	typedef long long Word64;
131	#else
132	typedef __int64 Word64;
133	#endif
134
135	typedef union _U64 {
136	Word64 w64;
137	struct {
138	/* x86 = little endian */
139	unsigned int lo32;
140	signed int hi32;
141	} r;
142	} U64;
143
144	/* returns 64-bit value in [edx:eax] */
145	static __inline Word64 MADD64(Word64 sum64, int x, int y)
146	{
147	#if (defined (_SYMBIAN_61_) \|\| defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
148	/* Workaround for the Symbian emulator because of non existing longlong.lib and
149	* hence __allmul not defined. */
150	__asm {
151	mov eax, x
152	imul y
153	add dword ptr sum64, eax
154	adc dword ptr sum64 + 4, edx
155	}
156	#else
157	sum64 += (Word64)x * (Word64)y;
158	#endif
159
160	return sum64;
161	}
162
163	/* toolchain: MSFT Embedded Visual C++
164	* target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
165	*/
166	#elif 1//defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
167
168	static short CLIPTOSHORT(int x)
169	{
170	int sign;
171
172	/* clip to [-32768, 32767] */
173	sign = x >> 31;
174	if (sign != (x >> 15)) {
175	x = sign ^((1 << 15) - 1);
176	}
177
178	return (short)x;
179	}
180
181	static int FASTABS(int x)
182	{
183	int sign;
184
185	sign = x >> (sizeof(int) * 8 - 1);
186	x ^= sign;
187	x -= sign;
188
189	return x;
190	}
191
192	static int CLZ(int x)
193	{
194	int numZeros;
195
196	if (!x) {
197	return 32;
198	}
199
200	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
201	numZeros = 1;
202	if (!((unsigned int)x >> 16)) {
203	numZeros += 16;
204	x <<= 16;
205	}
206	if (!((unsigned int)x >> 24)) {
207	numZeros += 8;
208	x <<= 8;
209	}
210	if (!((unsigned int)x >> 28)) {
211	numZeros += 4;
212	x <<= 4;
213	}
214	if (!((unsigned int)x >> 30)) {
215	numZeros += 2;
216	x <<= 2;
217	}
218
219	numZeros -= ((unsigned int)x >> 31);
220
221	return numZeros;
222	}
223
224	/* implemented in asmfunc.s */
225	#ifdef __cplusplus
226	extern "C" {
227	#endif
228
229	typedef long long Word64;
230
231	typedef union _U64 {
232	Word64 w64;
233	struct {
234	/* ARM WinCE = little endian */
235	unsigned int lo32;
236	signed int hi32;
237	} r;
238	} U64;
239
240	/* manual name mangling for just this platform (must match labels in .s file) */
241	//#define MULSHIFT32 raac_MULSHIFT32
242	//#define MADD64 raac_MADD64
243	static int MULSHIFT32(int x, int y)
244	{
245	long c;
246	c = (long long)x * y;
247	return (int)c;
248	}
249
250	static Word64 MADD64(Word64 sum64, int x, int y)
251	{
252	sum64 += (long long)x * y;
253	return sum64;
254	}
255
256	#ifdef __cplusplus
257	}
258	#endif
259
260	/* toolchain: ARM ADS or RealView
261	* target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
262	*/
263	#elif (defined (__arm) && defined (__ARMCC_VERSION)) \|\| (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
264
265	static __inline int MULSHIFT32(int x, int y)
266	{
267	/* rules for smull RdLo, RdHi, Rm, Rs:
268	* RdHi != Rm
269	* RdLo != Rm
270	* RdHi != RdLo
271	*/
272	int zlow;
273	__asm {
274	smull zlow, y, x, y
275	}
276
277	return y;
278	}
279
280	static __inline short CLIPTOSHORT(int x)
281	{
282	int sign;
283
284	/* clip to [-32768, 32767] */
285	sign = x >> 31;
286	if (sign != (x >> 15)) {
287	x = sign ^((1 << 15) - 1);
288	}
289
290	return (short)x;
291	}
292
293	static __inline int FASTABS(int x)
294	{
295	int sign;
296
297	sign = x >> (sizeof(int) * 8 - 1);
298	x ^= sign;
299	x -= sign;
300
301	return x;
302	}
303
304	static __inline int CLZ(int x)
305	{
306	int numZeros;
307
308	if (!x) {
309	return 32;
310	}
311
312	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
313	numZeros = 1;
314	if (!((unsigned int)x >> 16)) {
315	numZeros += 16;
316	x <<= 16;
317	}
318	if (!((unsigned int)x >> 24)) {
319	numZeros += 8;
320	x <<= 8;
321	}
322	if (!((unsigned int)x >> 28)) {
323	numZeros += 4;
324	x <<= 4;
325	}
326	if (!((unsigned int)x >> 30)) {
327	numZeros += 2;
328	x <<= 2;
329	}
330
331	numZeros -= ((unsigned int)x >> 31);
332
333	return numZeros;
334
335	/* ARM code would look like this, but do NOT use inline asm in ADS for this,
336	because you can't safely use the status register flags intermixed with C code
337
338	__asm {
339	mov numZeros, #1
340	tst x, 0xffff0000
341	addeq numZeros, numZeros, #16
342	moveq x, x, lsl #16
343	tst x, 0xff000000
344	addeq numZeros, numZeros, #8
345	moveq x, x, lsl #8
346	tst x, 0xf0000000
347	addeq numZeros, numZeros, #4
348	moveq x, x, lsl #4
349	tst x, 0xc0000000
350	addeq numZeros, numZeros, #2
351	moveq x, x, lsl #2
352	sub numZeros, numZeros, x, lsr #31
353	}
354	*/
355	/* reference:
356	numZeros = 0;
357	while (!(x & 0x80000000)) {
358	numZeros++;
359	x <<= 1;
360	}
361	*/
362	}
363
364	typedef __int64 Word64;
365
366	typedef union _U64 {
367	Word64 w64;
368	struct {
369	/* ARM ADS = little endian */
370	unsigned int lo32;
371	signed int hi32;
372	} r;
373	} U64;
374
375	static __inline Word64 MADD64(Word64 sum64, int x, int y)
376	{
377	U64 u;
378	u.w64 = sum64;
379
380	__asm {
381	smlal u.r.lo32, u.r.hi32, x, y
382	}
383
384	return u.w64;
385	}
386
387	/* toolchain: ARM gcc
388	* target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
389	*/
390	#elif defined(__GNUC__) && defined(__arm__)
391
392	static __inline__ int MULSHIFT32(int x, int y)
393	{
394	int zlow;
395	__asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
396	return y;
397	}
398
399	static __inline short CLIPTOSHORT(int x)
400	{
401	int sign;
402
403	/* clip to [-32768, 32767] */
404	sign = x >> 31;
405	if (sign != (x >> 15)) {
406	x = sign ^((1 << 15) - 1);
407	}
408
409	return (short)x;
410	}
411
412	static __inline int FASTABS(int x)
413	{
414	int sign;
415
416	sign = x >> (sizeof(int) * 8 - 1);
417	x ^= sign;
418	x -= sign;
419
420	return x;
421	}
422
423	static __inline int CLZ(int x)
424	{
425	int numZeros;
426
427	if (!x) {
428	return (sizeof(int) * 8);
429	}
430
431	numZeros = 0;
432	while (!(x & 0x80000000)) {
433	numZeros++;
434	x <<= 1;
435	}
436
437	return numZeros;
438	}
439
440	typedef long long Word64;
441
442	typedef union _U64 {
443	Word64 w64;
444	struct {
445	/* ARM ADS = little endian */
446	unsigned int lo32;
447	signed int hi32;
448	} r;
449	} U64;
450
451	static __inline Word64 MADD64(Word64 sum64, int x, int y)
452	{
453	U64 u;
454	u.w64 = sum64;
455
456	__asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
457
458	return u.w64;
459	}
460
461	/* toolchain: x86 gcc
462	* target architecture: x86
463	*/
464	#elif defined(__GNUC__) && (defined(__i386__) \|\| defined(__amd64__)) \|\| (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
465
466	typedef long long Word64;
467
468	static __inline__ int MULSHIFT32(int x, int y)
469	{
470	int z;
471
472	z = (Word64)x * (Word64)y >> 32;
473
474	return z;
475	}
476
477	static __inline short CLIPTOSHORT(int x)
478	{
479	int sign;
480
481	/* clip to [-32768, 32767] */
482	sign = x >> 31;
483	if (sign != (x >> 15)) {
484	x = sign ^((1 << 15) - 1);
485	}
486
487	return (short)x;
488	}
489
490	static __inline int FASTABS(int x)
491	{
492	int sign;
493
494	sign = x >> (sizeof(int) * 8 - 1);
495	x ^= sign;
496	x -= sign;
497
498	return x;
499	}
500
501	static __inline int CLZ(int x)
502	{
503	int numZeros;
504
505	if (!x) {
506	return 32;
507	}
508
509	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
510	numZeros = 1;
511	if (!((unsigned int)x >> 16)) {
512	numZeros += 16;
513	x <<= 16;
514	}
515	if (!((unsigned int)x >> 24)) {
516	numZeros += 8;
517	x <<= 8;
518	}
519	if (!((unsigned int)x >> 28)) {
520	numZeros += 4;
521	x <<= 4;
522	}
523	if (!((unsigned int)x >> 30)) {
524	numZeros += 2;
525	x <<= 2;
526	}
527
528	numZeros -= ((unsigned int)x >> 31);
529
530	return numZeros;
531	}
532
533	typedef union _U64 {
534	Word64 w64;
535	struct {
536	/* x86 = little endian */
537	unsigned int lo32;
538	signed int hi32;
539	} r;
540	} U64;
541
542	static __inline Word64 MADD64(Word64 sum64, int x, int y)
543	{
544	sum64 += (Word64)x * (Word64)y;
545
546	return sum64;
547	}
548
549	#elif defined(__GNUC__) && (defined(__powerpc__) \|\| defined(__POWERPC__)) \|\| (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
550
551	typedef long long Word64;
552
553	static __inline__ int MULSHIFT32(int x, int y)
554	{
555	int z;
556
557	z = (Word64)x * (Word64)y >> 32;
558
559	return z;
560	}
561
562	static __inline short CLIPTOSHORT(int x)
563	{
564	int sign;
565
566	/* clip to [-32768, 32767] */
567	sign = x >> 31;
568	if (sign != (x >> 15)) {
569	x = sign ^((1 << 15) - 1);
570	}
571
572	return (short)x;
573	}
574
575	static __inline int FASTABS(int x)
576	{
577	int sign;
578
579	sign = x >> (sizeof(int) * 8 - 1);
580	x ^= sign;
581	x -= sign;
582
583	return x;
584	}
585
586	static __inline int CLZ(int x)
587	{
588	int numZeros;
589
590	if (!x) {
591	return 32;
592	}
593
594	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
595	numZeros = 1;
596	if (!((unsigned int)x >> 16)) {
597	numZeros += 16;
598	x <<= 16;
599	}
600	if (!((unsigned int)x >> 24)) {
601	numZeros += 8;
602	x <<= 8;
603	}
604	if (!((unsigned int)x >> 28)) {
605	numZeros += 4;
606	x <<= 4;
607	}
608	if (!((unsigned int)x >> 30)) {
609	numZeros += 2;
610	x <<= 2;
611	}
612
613	numZeros -= ((unsigned int)x >> 31);
614
615	return numZeros;
616	}
617
618	typedef union _U64 {
619	Word64 w64;
620	struct {
621	/* PowerPC = big endian */
622	signed int hi32;
623	unsigned int lo32;
624	} r;
625	} U64;
626
627	static __inline Word64 MADD64(Word64 sum64, int x, int y)
628	{
629	sum64 += (Word64)x * (Word64)y;
630
631	return sum64;
632	}
633
634	#else
635
636	#error Unsupported platform in assembly.h
637
638	#endif /* platforms */
639
640	#endif /* _ASSEMBLY_H */
641