platform/hardware/amlogic/LibAudio.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* ***** BEGIN LICENSE BLOCK *****
2  * Source last modified: $Id: assembly.h,v 1.9 2007/02/28 07:10:21 gahluwalia Exp $
3  *
4  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5  *
6  * The contents of this file, and the files included with this file,
7  * are subject to the current version of the RealNetworks Public
8  * Source License (the "RPSL") available at
9  * http://www.helixcommunity.org/content/rpsl unless you have licensed
10  * the file under the current version of the RealNetworks Community
11  * Source License (the "RCSL") available at
12  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13  * will apply. You may also obtain the license terms directly from
14  * RealNetworks.  You may not use this file except in compliance with
15  * the RPSL or, if you have a valid RCSL with RealNetworks applicable
16  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
17  * the rights, obligations and limitations governing use of the
18  * contents of the file.
19  *
20  * This file is part of the Helix DNA Technology. RealNetworks is the
21  * developer of the Original Code and owns the copyrights in the
22  * portions it created.
23  *
24  * This file, and the files included with this file, is distributed
25  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29  * ENJOYMENT OR NON-INFRINGEMENT.
30  *
31  * Technology Compatibility Kit Test Suite(s) Location:
32  *    http://www.helixcommunity.org/content/tck
33  *
34  * Contributor(s):
35  *
36  * ***** END LICENSE BLOCK ***** */
37
38 /**************************************************************************************
39  * Fixed-point HE-AAC decoder
40  * Jon Recker (jrecker@real.com)
41  * February 2005
42  *
43  * assembly.h - inline assembly language functions and prototypes
44  *
45  * MULSHIFT32(x, y)             signed multiply of two 32-bit integers (x and y),
46  *                            returns top 32-bits of 64-bit result
47  * CLIPTOSHORT(x)                       convert 32-bit integer to 16-bit short,
48  *                            clipping to [-32768, 32767]
49  * FASTABS(x)               branchless absolute value of signed integer x
50  * CLZ(x)                   count leading zeros on signed integer x
51  * MADD64(sum64, x, y)          64-bit multiply accumulate: sum64 += (x*y)
52  **************************************************************************************/
53
54 #ifndef _ASSEMBLY_H
55 #define _ASSEMBLY_H
56
57 /* toolchain:           MSFT Visual C++
58  * target architecture: x86
59  */
60 #if (defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))
61
62 #pragma warning( disable : 4035 )       /* complains about inline asm not returning a value */
63
64 static __inline int MULSHIFT32(int x, int y)
65 {
66     __asm {
67         mov             eax, x
68         imul        y
69         mov         eax, edx
70     }
71 }
72
73 static __inline short CLIPTOSHORT(int x)
74 {
75     int sign;
76
77     /* clip to [-32768, 32767] */
78     sign = x >> 31;
79     if (sign != (x >> 15)) {
80         x = sign ^((1 << 15) - 1);
81     }
82
83     return (short)x;
84 }
85
86 static __inline int FASTABS(int x)
87 {
88     int sign;
89
90     sign = x >> (sizeof(int) * 8 - 1);
91     x ^= sign;
92     x -= sign;
93
94     return x;
95 }
96
97 static __inline int CLZ(int x)
98 {
99     int numZeros;
100
101     if (!x) {
102         return 32;
103     }
104
105     /* count leading zeros with binary search */
106     numZeros = 1;
107     if (!((unsigned int)x >> 16))   {
108         numZeros += 16;
109         x <<= 16;
110     }
111     if (!((unsigned int)x >> 24))   {
112         numZeros +=  8;
113         x <<=  8;
114     }
115     if (!((unsigned int)x >> 28))   {
116         numZeros +=  4;
117         x <<=  4;
118     }
119     if (!((unsigned int)x >> 30))   {
120         numZeros +=  2;
121         x <<=  2;
122     }
123
124     numZeros -= ((unsigned int)x >> 31);
125
126     return numZeros;
127 }
128
129 #ifdef __CW32__
130 typedef long long Word64;
131 #else
132 typedef __int64 Word64;
133 #endif
134
135 typedef union _U64 {
136     Word64 w64;
137     struct {
138         /* x86 = little endian */
139         unsigned int lo32;
140         signed int   hi32;
141     } r;
142 } U64;
143
144 /* returns 64-bit value in [edx:eax] */
145 static __inline Word64 MADD64(Word64 sum64, int x, int y)
146 {
147 #if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
148     /* Workaround for the Symbian emulator because of non existing longlong.lib and
149      * hence __allmul not defined. */
150     __asm {
151         mov     eax, x
152         imul    y
153         add     dword ptr sum64, eax
154         adc     dword ptr sum64 + 4, edx
155     }
156 #else
157     sum64 += (Word64)x * (Word64)y;
158 #endif
159
160     return sum64;
161 }
162
163 /* toolchain:           MSFT Embedded Visual C++
164  * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
165  */
166 #elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
167
168 static __inline short CLIPTOSHORT(int x)
169 {
170     int sign;
171
172     /* clip to [-32768, 32767] */
173     sign = x >> 31;
174     if (sign != (x >> 15)) {
175         x = sign ^((1 << 15) - 1);
176     }
177
178     return (short)x;
179 }
180
181 static __inline int FASTABS(int x)
182 {
183     int sign;
184
185     sign = x >> (sizeof(int) * 8 - 1);
186     x ^= sign;
187     x -= sign;
188
189     return x;
190 }
191
192 static __inline int CLZ(int x)
193 {
194     int numZeros;
195
196     if (!x) {
197         return 32;
198     }
199
200     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
201     numZeros = 1;
202     if (!((unsigned int)x >> 16))   {
203         numZeros += 16;
204         x <<= 16;
205     }
206     if (!((unsigned int)x >> 24))   {
207         numZeros +=  8;
208         x <<=  8;
209     }
210     if (!((unsigned int)x >> 28))   {
211         numZeros +=  4;
212         x <<=  4;
213     }
214     if (!((unsigned int)x >> 30))   {
215         numZeros +=  2;
216         x <<=  2;
217     }
218
219     numZeros -= ((unsigned int)x >> 31);
220
221     return numZeros;
222 }
223
224 /* implemented in asmfunc.s */
225 #ifdef __cplusplus
226 extern "C" {
227 #endif
228
229     typedef __int64 Word64;
230
231     typedef union _U64 {
232         Word64 w64;
233         struct {
234             /* ARM WinCE = little endian */
235             unsigned int lo32;
236             signed int   hi32;
237         } r;
238     } U64;
239
240     /* manual name mangling for just this platform (must match labels in .s file) */
241 #define MULSHIFT32      raac_MULSHIFT32
242 #define MADD64          raac_MADD64
243
244     int MULSHIFT32(int x, int y);
245     Word64 MADD64(Word64 sum64, int x, int y);
246
247 #ifdef __cplusplus
248 }
249 #endif
250
251 /* toolchain:           ARM ADS or RealView
252  * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
253  */
254 #elif (defined (__arm) && defined (__ARMCC_VERSION)) || (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
255
256 static __inline int MULSHIFT32(int x, int y)
257 {
258     /* rules for smull RdLo, RdHi, Rm, Rs:
259      *   RdHi != Rm
260      *   RdLo != Rm
261      *   RdHi != RdLo
262      */
263     int zlow;
264     __asm {
265         smull zlow, y, x, y
266     }
267
268     return y;
269 }
270
271 static __inline short CLIPTOSHORT(int x)
272 {
273     int sign;
274
275     /* clip to [-32768, 32767] */
276     sign = x >> 31;
277     if (sign != (x >> 15)) {
278         x = sign ^((1 << 15) - 1);
279     }
280
281     return (short)x;
282 }
283
284 static __inline int FASTABS(int x)
285 {
286     int sign;
287
288     sign = x >> (sizeof(int) * 8 - 1);
289     x ^= sign;
290     x -= sign;
291
292     return x;
293 }
294
295 static __inline int CLZ(int x)
296 {
297     int numZeros;
298
299     if (!x) {
300         return 32;
301     }
302
303     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
304     numZeros = 1;
305     if (!((unsigned int)x >> 16))   {
306         numZeros += 16;
307         x <<= 16;
308     }
309     if (!((unsigned int)x >> 24))   {
310         numZeros +=  8;
311         x <<=  8;
312     }
313     if (!((unsigned int)x >> 28))   {
314         numZeros +=  4;
315         x <<=  4;
316     }
317     if (!((unsigned int)x >> 30))   {
318         numZeros +=  2;
319         x <<=  2;
320     }
321
322     numZeros -= ((unsigned int)x >> 31);
323
324     return numZeros;
325
326     /* ARM code would look like this, but do NOT use inline asm in ADS for this,
327        because you can't safely use the status register flags intermixed with C code
328
329             __asm {
330                 mov         numZeros, #1
331                     tst             x, 0xffff0000
332                     addeq   numZeros, numZeros, #16
333                     moveq   x, x, lsl #16
334                     tst             x, 0xff000000
335                     addeq   numZeros, numZeros, #8
336                     moveq   x, x, lsl #8
337                     tst             x, 0xf0000000
338                     addeq   numZeros, numZeros, #4
339                     moveq   x, x, lsl #4
340                     tst             x, 0xc0000000
341                     addeq   numZeros, numZeros, #2
342                     moveq   x, x, lsl #2
343                     sub             numZeros, numZeros, x, lsr #31
344             }
345     */
346     /* reference:
347             numZeros = 0;
348             while (!(x & 0x80000000)) {
349                     numZeros++;
350                     x <<= 1;
351             }
352     */
353 }
354
355 typedef __int64 Word64;
356
357 typedef union _U64 {
358     Word64 w64;
359     struct {
360         /* ARM ADS = little endian */
361         unsigned int lo32;
362         signed int   hi32;
363     } r;
364 } U64;
365
366 static __inline Word64 MADD64(Word64 sum64, int x, int y)
367 {
368     U64 u;
369     u.w64 = sum64;
370
371     __asm {
372         smlal u.r.lo32, u.r.hi32, x, y
373     }
374
375     return u.w64;
376 }
377
378 /* toolchain:           ARM gcc
379  * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
380  */
381 #elif defined(__GNUC__) && defined(__arm__)
382
383 static __inline__ int MULSHIFT32(int x, int y)
384 {
385     int zlow;
386     __asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
387     return y;
388 }
389
390 static __inline short CLIPTOSHORT(int x)
391 {
392     int sign;
393
394     /* clip to [-32768, 32767] */
395     sign = x >> 31;
396     if (sign != (x >> 15)) {
397         x = sign ^((1 << 15) - 1);
398     }
399
400     return (short)x;
401 }
402
403 static __inline int FASTABS(int x)
404 {
405     int sign;
406
407     sign = x >> (sizeof(int) * 8 - 1);
408     x ^= sign;
409     x -= sign;
410
411     return x;
412 }
413
414 static __inline int CLZ(int x)
415 {
416     int numZeros;
417
418     if (!x) {
419         return (sizeof(int) * 8);
420     }
421
422     numZeros = 0;
423     while (!(x & 0x80000000)) {
424         numZeros++;
425         x <<= 1;
426     }
427
428     return numZeros;
429 }
430
431 typedef long long Word64;
432
433 typedef union _U64 {
434     Word64 w64;
435     struct {
436         /* ARM ADS = little endian */
437         unsigned int lo32;
438         signed int   hi32;
439     } r;
440 } U64;
441
442 static __inline Word64 MADD64(Word64 sum64, int x, int y)
443 {
444     U64 u;
445     u.w64 = sum64;
446
447     __asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
448
449     return u.w64;
450 }
451
452 /* toolchain:           x86 gcc
453  * target architecture: x86
454  */
455 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
456
457 typedef long long Word64;
458
459 static __inline__ int MULSHIFT32(int x, int y)
460 {
461     int z;
462
463     z = (Word64)x * (Word64)y >> 32;
464
465     return z;
466 }
467
468 static __inline short CLIPTOSHORT(int x)
469 {
470     int sign;
471
472     /* clip to [-32768, 32767] */
473     sign = x >> 31;
474     if (sign != (x >> 15)) {
475         x = sign ^((1 << 15) - 1);
476     }
477
478     return (short)x;
479 }
480
481 static __inline int FASTABS(int x)
482 {
483     int sign;
484
485     sign = x >> (sizeof(int) * 8 - 1);
486     x ^= sign;
487     x -= sign;
488
489     return x;
490 }
491
492 static __inline int CLZ(int x)
493 {
494     int numZeros;
495
496     if (!x) {
497         return 32;
498     }
499
500     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
501     numZeros = 1;
502     if (!((unsigned int)x >> 16))   {
503         numZeros += 16;
504         x <<= 16;
505     }
506     if (!((unsigned int)x >> 24))   {
507         numZeros +=  8;
508         x <<=  8;
509     }
510     if (!((unsigned int)x >> 28))   {
511         numZeros +=  4;
512         x <<=  4;
513     }
514     if (!((unsigned int)x >> 30))   {
515         numZeros +=  2;
516         x <<=  2;
517     }
518
519     numZeros -= ((unsigned int)x >> 31);
520
521     return numZeros;
522 }
523
524 typedef union _U64 {
525     Word64 w64;
526     struct {
527         /* x86 = little endian */
528         unsigned int lo32;
529         signed int   hi32;
530     } r;
531 } U64;
532
533 static __inline Word64 MADD64(Word64 sum64, int x, int y)
534 {
535     sum64 += (Word64)x * (Word64)y;
536
537     return sum64;
538 }
539
540 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
541
542 typedef long long Word64;
543
544 static __inline__ int MULSHIFT32(int x, int y)
545 {
546     int z;
547
548     z = (Word64)x * (Word64)y >> 32;
549
550     return z;
551 }
552
553 static __inline short CLIPTOSHORT(int x)
554 {
555     int sign;
556
557     /* clip to [-32768, 32767] */
558     sign = x >> 31;
559     if (sign != (x >> 15)) {
560         x = sign ^((1 << 15) - 1);
561     }
562
563     return (short)x;
564 }
565
566 static __inline int FASTABS(int x)
567 {
568     int sign;
569
570     sign = x >> (sizeof(int) * 8 - 1);
571     x ^= sign;
572     x -= sign;
573
574     return x;
575 }
576
577 static __inline int CLZ(int x)
578 {
579     int numZeros;
580
581     if (!x) {
582         return 32;
583     }
584
585     /* count leading zeros with binary search (function should be 17 ARM instructions total) */
586     numZeros = 1;
587     if (!((unsigned int)x >> 16))   {
588         numZeros += 16;
589         x <<= 16;
590     }
591     if (!((unsigned int)x >> 24))   {
592         numZeros +=  8;
593         x <<=  8;
594     }
595     if (!((unsigned int)x >> 28))   {
596         numZeros +=  4;
597         x <<=  4;
598     }
599     if (!((unsigned int)x >> 30))   {
600         numZeros +=  2;
601         x <<=  2;
602     }
603
604     numZeros -= ((unsigned int)x >> 31);
605
606     return numZeros;
607 }
608
609 typedef union _U64 {
610     Word64 w64;
611     struct {
612         /* PowerPC = big endian */
613         signed int   hi32;
614         unsigned int lo32;
615     } r;
616 } U64;
617
618 static __inline Word64 MADD64(Word64 sum64, int x, int y)
619 {
620     sum64 += (Word64)x * (Word64)y;
621
622     return sum64;
623 }
624
625 #else
626
627 #error Unsupported platform in assembly.h
628
629 #endif  /* platforms */
630
631 #endif /* _ASSEMBLY_H */
632
1	/* *** BEGIN LICENSE BLOCK ***
2	* Source last modified: $Id: assembly.h,v 1.9 2007/02/28 07:10:21 gahluwalia Exp $
3	*
4	* Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
5	*
6	* The contents of this file, and the files included with this file,
7	* are subject to the current version of the RealNetworks Public
8	* Source License (the "RPSL") available at
9	* http://www.helixcommunity.org/content/rpsl unless you have licensed
10	* the file under the current version of the RealNetworks Community
11	* Source License (the "RCSL") available at
12	* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
13	* will apply. You may also obtain the license terms directly from
14	* RealNetworks. You may not use this file except in compliance with
15	* the RPSL or, if you have a valid RCSL with RealNetworks applicable
16	* to this file, the RCSL. Please see the applicable RPSL or RCSL for
17	* the rights, obligations and limitations governing use of the
18	* contents of the file.
19	*
20	* This file is part of the Helix DNA Technology. RealNetworks is the
21	* developer of the Original Code and owns the copyrights in the
22	* portions it created.
23	*
24	* This file, and the files included with this file, is distributed
25	* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
26	* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
27	* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
28	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
29	* ENJOYMENT OR NON-INFRINGEMENT.
30	*
31	* Technology Compatibility Kit Test Suite(s) Location:
32	* http://www.helixcommunity.org/content/tck
33	*
34	* Contributor(s):
35	*
36	* *** END LICENSE BLOCK *** */
37
38	/**************************************************************************************
39	* Fixed-point HE-AAC decoder
40	* Jon Recker (jrecker@real.com)
41	* February 2005
42	*
43	* assembly.h - inline assembly language functions and prototypes
44	*
45	* MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y),
46	* returns top 32-bits of 64-bit result
47	* CLIPTOSHORT(x) convert 32-bit integer to 16-bit short,
48	* clipping to [-32768, 32767]
49	* FASTABS(x) branchless absolute value of signed integer x
50	* CLZ(x) count leading zeros on signed integer x
51	* MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y)
52	**************************************************************************************/
53
54	#ifndef _ASSEMBLY_H
55	#define _ASSEMBLY_H
56
57	/* toolchain: MSFT Visual C++
58	* target architecture: x86
59	*/
60	#if (defined (_WIN32) && !defined (_WIN32_WCE)) \|\| (defined (__WINS__) && defined (_SYMBIAN)) \|\| (defined (WINCE_EMULATOR)) \|\| (defined (_OPENWAVE_SIMULATOR))
61
62	#pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
63
64	static __inline int MULSHIFT32(int x, int y)
65	{
66	__asm {
67	mov eax, x
68	imul y
69	mov eax, edx
70	}
71	}
72
73	static __inline short CLIPTOSHORT(int x)
74	{
75	int sign;
76
77	/* clip to [-32768, 32767] */
78	sign = x >> 31;
79	if (sign != (x >> 15)) {
80	x = sign ^((1 << 15) - 1);
81	}
82
83	return (short)x;
84	}
85
86	static __inline int FASTABS(int x)
87	{
88	int sign;
89
90	sign = x >> (sizeof(int) * 8 - 1);
91	x ^= sign;
92	x -= sign;
93
94	return x;
95	}
96
97	static __inline int CLZ(int x)
98	{
99	int numZeros;
100
101	if (!x) {
102	return 32;
103	}
104
105	/* count leading zeros with binary search */
106	numZeros = 1;
107	if (!((unsigned int)x >> 16)) {
108	numZeros += 16;
109	x <<= 16;
110	}
111	if (!((unsigned int)x >> 24)) {
112	numZeros += 8;
113	x <<= 8;
114	}
115	if (!((unsigned int)x >> 28)) {
116	numZeros += 4;
117	x <<= 4;
118	}
119	if (!((unsigned int)x >> 30)) {
120	numZeros += 2;
121	x <<= 2;
122	}
123
124	numZeros -= ((unsigned int)x >> 31);
125
126	return numZeros;
127	}
128
129	#ifdef __CW32__
130	typedef long long Word64;
131	#else
132	typedef __int64 Word64;
133	#endif
134
135	typedef union _U64 {
136	Word64 w64;
137	struct {
138	/* x86 = little endian */
139	unsigned int lo32;
140	signed int hi32;
141	} r;
142	} U64;
143
144	/* returns 64-bit value in [edx:eax] */
145	static __inline Word64 MADD64(Word64 sum64, int x, int y)
146	{
147	#if (defined (_SYMBIAN_61_) \|\| defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
148	/* Workaround for the Symbian emulator because of non existing longlong.lib and
149	* hence __allmul not defined. */
150	__asm {
151	mov eax, x
152	imul y
153	add dword ptr sum64, eax
154	adc dword ptr sum64 + 4, edx
155	}
156	#else
157	sum64 += (Word64)x * (Word64)y;
158	#endif
159
160	return sum64;
161	}
162
163	/* toolchain: MSFT Embedded Visual C++
164	* target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
165	*/
166	#elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
167
168	static __inline short CLIPTOSHORT(int x)
169	{
170	int sign;
171
172	/* clip to [-32768, 32767] */
173	sign = x >> 31;
174	if (sign != (x >> 15)) {
175	x = sign ^((1 << 15) - 1);
176	}
177
178	return (short)x;
179	}
180
181	static __inline int FASTABS(int x)
182	{
183	int sign;
184
185	sign = x >> (sizeof(int) * 8 - 1);
186	x ^= sign;
187	x -= sign;
188
189	return x;
190	}
191
192	static __inline int CLZ(int x)
193	{
194	int numZeros;
195
196	if (!x) {
197	return 32;
198	}
199
200	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
201	numZeros = 1;
202	if (!((unsigned int)x >> 16)) {
203	numZeros += 16;
204	x <<= 16;
205	}
206	if (!((unsigned int)x >> 24)) {
207	numZeros += 8;
208	x <<= 8;
209	}
210	if (!((unsigned int)x >> 28)) {
211	numZeros += 4;
212	x <<= 4;
213	}
214	if (!((unsigned int)x >> 30)) {
215	numZeros += 2;
216	x <<= 2;
217	}
218
219	numZeros -= ((unsigned int)x >> 31);
220
221	return numZeros;
222	}
223
224	/* implemented in asmfunc.s */
225	#ifdef __cplusplus
226	extern "C" {
227	#endif
228
229	typedef __int64 Word64;
230
231	typedef union _U64 {
232	Word64 w64;
233	struct {
234	/* ARM WinCE = little endian */
235	unsigned int lo32;
236	signed int hi32;
237	} r;
238	} U64;
239
240	/* manual name mangling for just this platform (must match labels in .s file) */
241	#define MULSHIFT32 raac_MULSHIFT32
242	#define MADD64 raac_MADD64
243
244	int MULSHIFT32(int x, int y);
245	Word64 MADD64(Word64 sum64, int x, int y);
246
247	#ifdef __cplusplus
248	}
249	#endif
250
251	/* toolchain: ARM ADS or RealView
252	* target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
253	*/
254	#elif (defined (__arm) && defined (__ARMCC_VERSION)) \|\| (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__))
255
256	static __inline int MULSHIFT32(int x, int y)
257	{
258	/* rules for smull RdLo, RdHi, Rm, Rs:
259	* RdHi != Rm
260	* RdLo != Rm
261	* RdHi != RdLo
262	*/
263	int zlow;
264	__asm {
265	smull zlow, y, x, y
266	}
267
268	return y;
269	}
270
271	static __inline short CLIPTOSHORT(int x)
272	{
273	int sign;
274
275	/* clip to [-32768, 32767] */
276	sign = x >> 31;
277	if (sign != (x >> 15)) {
278	x = sign ^((1 << 15) - 1);
279	}
280
281	return (short)x;
282	}
283
284	static __inline int FASTABS(int x)
285	{
286	int sign;
287
288	sign = x >> (sizeof(int) * 8 - 1);
289	x ^= sign;
290	x -= sign;
291
292	return x;
293	}
294
295	static __inline int CLZ(int x)
296	{
297	int numZeros;
298
299	if (!x) {
300	return 32;
301	}
302
303	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
304	numZeros = 1;
305	if (!((unsigned int)x >> 16)) {
306	numZeros += 16;
307	x <<= 16;
308	}
309	if (!((unsigned int)x >> 24)) {
310	numZeros += 8;
311	x <<= 8;
312	}
313	if (!((unsigned int)x >> 28)) {
314	numZeros += 4;
315	x <<= 4;
316	}
317	if (!((unsigned int)x >> 30)) {
318	numZeros += 2;
319	x <<= 2;
320	}
321
322	numZeros -= ((unsigned int)x >> 31);
323
324	return numZeros;
325
326	/* ARM code would look like this, but do NOT use inline asm in ADS for this,
327	because you can't safely use the status register flags intermixed with C code
328
329	__asm {
330	mov numZeros, #1
331	tst x, 0xffff0000
332	addeq numZeros, numZeros, #16
333	moveq x, x, lsl #16
334	tst x, 0xff000000
335	addeq numZeros, numZeros, #8
336	moveq x, x, lsl #8
337	tst x, 0xf0000000
338	addeq numZeros, numZeros, #4
339	moveq x, x, lsl #4
340	tst x, 0xc0000000
341	addeq numZeros, numZeros, #2
342	moveq x, x, lsl #2
343	sub numZeros, numZeros, x, lsr #31
344	}
345	*/
346	/* reference:
347	numZeros = 0;
348	while (!(x & 0x80000000)) {
349	numZeros++;
350	x <<= 1;
351	}
352	*/
353	}
354
355	typedef __int64 Word64;
356
357	typedef union _U64 {
358	Word64 w64;
359	struct {
360	/* ARM ADS = little endian */
361	unsigned int lo32;
362	signed int hi32;
363	} r;
364	} U64;
365
366	static __inline Word64 MADD64(Word64 sum64, int x, int y)
367	{
368	U64 u;
369	u.w64 = sum64;
370
371	__asm {
372	smlal u.r.lo32, u.r.hi32, x, y
373	}
374
375	return u.w64;
376	}
377
378	/* toolchain: ARM gcc
379	* target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
380	*/
381	#elif defined(__GNUC__) && defined(__arm__)
382
383	static __inline__ int MULSHIFT32(int x, int y)
384	{
385	int zlow;
386	__asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc");
387	return y;
388	}
389
390	static __inline short CLIPTOSHORT(int x)
391	{
392	int sign;
393
394	/* clip to [-32768, 32767] */
395	sign = x >> 31;
396	if (sign != (x >> 15)) {
397	x = sign ^((1 << 15) - 1);
398	}
399
400	return (short)x;
401	}
402
403	static __inline int FASTABS(int x)
404	{
405	int sign;
406
407	sign = x >> (sizeof(int) * 8 - 1);
408	x ^= sign;
409	x -= sign;
410
411	return x;
412	}
413
414	static __inline int CLZ(int x)
415	{
416	int numZeros;
417
418	if (!x) {
419	return (sizeof(int) * 8);
420	}
421
422	numZeros = 0;
423	while (!(x & 0x80000000)) {
424	numZeros++;
425	x <<= 1;
426	}
427
428	return numZeros;
429	}
430
431	typedef long long Word64;
432
433	typedef union _U64 {
434	Word64 w64;
435	struct {
436	/* ARM ADS = little endian */
437	unsigned int lo32;
438	signed int hi32;
439	} r;
440	} U64;
441
442	static __inline Word64 MADD64(Word64 sum64, int x, int y)
443	{
444	U64 u;
445	u.w64 = sum64;
446
447	__asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc");
448
449	return u.w64;
450	}
451
452	/* toolchain: x86 gcc
453	* target architecture: x86
454	*/
455	#elif defined(__GNUC__) && (defined(__i386__) \|\| defined(__amd64__)) \|\| (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
456
457	typedef long long Word64;
458
459	static __inline__ int MULSHIFT32(int x, int y)
460	{
461	int z;
462
463	z = (Word64)x * (Word64)y >> 32;
464
465	return z;
466	}
467
468	static __inline short CLIPTOSHORT(int x)
469	{
470	int sign;
471
472	/* clip to [-32768, 32767] */
473	sign = x >> 31;
474	if (sign != (x >> 15)) {
475	x = sign ^((1 << 15) - 1);
476	}
477
478	return (short)x;
479	}
480
481	static __inline int FASTABS(int x)
482	{
483	int sign;
484
485	sign = x >> (sizeof(int) * 8 - 1);
486	x ^= sign;
487	x -= sign;
488
489	return x;
490	}
491
492	static __inline int CLZ(int x)
493	{
494	int numZeros;
495
496	if (!x) {
497	return 32;
498	}
499
500	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
501	numZeros = 1;
502	if (!((unsigned int)x >> 16)) {
503	numZeros += 16;
504	x <<= 16;
505	}
506	if (!((unsigned int)x >> 24)) {
507	numZeros += 8;
508	x <<= 8;
509	}
510	if (!((unsigned int)x >> 28)) {
511	numZeros += 4;
512	x <<= 4;
513	}
514	if (!((unsigned int)x >> 30)) {
515	numZeros += 2;
516	x <<= 2;
517	}
518
519	numZeros -= ((unsigned int)x >> 31);
520
521	return numZeros;
522	}
523
524	typedef union _U64 {
525	Word64 w64;
526	struct {
527	/* x86 = little endian */
528	unsigned int lo32;
529	signed int hi32;
530	} r;
531	} U64;
532
533	static __inline Word64 MADD64(Word64 sum64, int x, int y)
534	{
535	sum64 += (Word64)x * (Word64)y;
536
537	return sum64;
538	}
539
540	#elif defined(__GNUC__) && (defined(__powerpc__) \|\| defined(__POWERPC__)) \|\| (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
541
542	typedef long long Word64;
543
544	static __inline__ int MULSHIFT32(int x, int y)
545	{
546	int z;
547
548	z = (Word64)x * (Word64)y >> 32;
549
550	return z;
551	}
552
553	static __inline short CLIPTOSHORT(int x)
554	{
555	int sign;
556
557	/* clip to [-32768, 32767] */
558	sign = x >> 31;
559	if (sign != (x >> 15)) {
560	x = sign ^((1 << 15) - 1);
561	}
562
563	return (short)x;
564	}
565
566	static __inline int FASTABS(int x)
567	{
568	int sign;
569
570	sign = x >> (sizeof(int) * 8 - 1);
571	x ^= sign;
572	x -= sign;
573
574	return x;
575	}
576
577	static __inline int CLZ(int x)
578	{
579	int numZeros;
580
581	if (!x) {
582	return 32;
583	}
584
585	/* count leading zeros with binary search (function should be 17 ARM instructions total) */
586	numZeros = 1;
587	if (!((unsigned int)x >> 16)) {
588	numZeros += 16;
589	x <<= 16;
590	}
591	if (!((unsigned int)x >> 24)) {
592	numZeros += 8;
593	x <<= 8;
594	}
595	if (!((unsigned int)x >> 28)) {
596	numZeros += 4;
597	x <<= 4;
598	}
599	if (!((unsigned int)x >> 30)) {
600	numZeros += 2;
601	x <<= 2;
602	}
603
604	numZeros -= ((unsigned int)x >> 31);
605
606	return numZeros;
607	}
608
609	typedef union _U64 {
610	Word64 w64;
611	struct {
612	/* PowerPC = big endian */
613	signed int hi32;
614	unsigned int lo32;
615	} r;
616	} U64;
617
618	static __inline Word64 MADD64(Word64 sum64, int x, int y)
619	{
620	sum64 += (Word64)x * (Word64)y;
621
622	return sum64;
623	}
624
625	#else
626
627	#error Unsupported platform in assembly.h
628
629	#endif /* platforms */
630
631	#endif /* _ASSEMBLY_H */
632