blob: 43be7fc4fcfc7b12692208decc64820bf6ca3a1b
1 | /* ***** BEGIN LICENSE BLOCK ***** |
2 | * Source last modified: $Id: assembly.h,v 1.9 2007/02/28 07:10:21 gahluwalia Exp $ |
3 | * |
4 | * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. |
5 | * |
6 | * The contents of this file, and the files included with this file, |
7 | * are subject to the current version of the RealNetworks Public |
8 | * Source License (the "RPSL") available at |
9 | * http://www.helixcommunity.org/content/rpsl unless you have licensed |
10 | * the file under the current version of the RealNetworks Community |
11 | * Source License (the "RCSL") available at |
12 | * http://www.helixcommunity.org/content/rcsl, in which case the RCSL |
13 | * will apply. You may also obtain the license terms directly from |
14 | * RealNetworks. You may not use this file except in compliance with |
15 | * the RPSL or, if you have a valid RCSL with RealNetworks applicable |
16 | * to this file, the RCSL. Please see the applicable RPSL or RCSL for |
17 | * the rights, obligations and limitations governing use of the |
18 | * contents of the file. |
19 | * |
20 | * This file is part of the Helix DNA Technology. RealNetworks is the |
21 | * developer of the Original Code and owns the copyrights in the |
22 | * portions it created. |
23 | * |
24 | * This file, and the files included with this file, is distributed |
25 | * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY |
26 | * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS |
27 | * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES |
28 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET |
29 | * ENJOYMENT OR NON-INFRINGEMENT. |
30 | * |
31 | * Technology Compatibility Kit Test Suite(s) Location: |
32 | * http://www.helixcommunity.org/content/tck |
33 | * |
34 | * Contributor(s): |
35 | * |
36 | * ***** END LICENSE BLOCK ***** */ |
37 | |
38 | /************************************************************************************** |
39 | * Fixed-point HE-AAC decoder |
40 | * Jon Recker (jrecker@real.com) |
41 | * February 2005 |
42 | * |
43 | * assembly.h - inline assembly language functions and prototypes |
44 | * |
45 | * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y), |
46 | * returns top 32-bits of 64-bit result |
47 | * CLIPTOSHORT(x) convert 32-bit integer to 16-bit short, |
48 | * clipping to [-32768, 32767] |
49 | * FASTABS(x) branchless absolute value of signed integer x |
50 | * CLZ(x) count leading zeros on signed integer x |
51 | * MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y) |
52 | **************************************************************************************/ |
53 | |
54 | #ifndef _ASSEMBLY_H |
55 | #define _ASSEMBLY_H |
56 | |
57 | /* toolchain: MSFT Visual C++ |
58 | * target architecture: x86 |
59 | */ |
60 | #if 0//(defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR)) |
61 | |
62 | #pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */ |
63 | |
64 | static __inline int MULSHIFT32(int x, int y) |
65 | { |
66 | __asm { |
67 | mov eax, x |
68 | imul y |
69 | mov eax, edx |
70 | } |
71 | } |
72 | |
73 | static __inline short CLIPTOSHORT(int x) |
74 | { |
75 | int sign; |
76 | |
77 | /* clip to [-32768, 32767] */ |
78 | sign = x >> 31; |
79 | if (sign != (x >> 15)) { |
80 | x = sign ^((1 << 15) - 1); |
81 | } |
82 | |
83 | return (short)x; |
84 | } |
85 | |
86 | static __inline int FASTABS(int x) |
87 | { |
88 | int sign; |
89 | |
90 | sign = x >> (sizeof(int) * 8 - 1); |
91 | x ^= sign; |
92 | x -= sign; |
93 | |
94 | return x; |
95 | } |
96 | |
97 | static __inline int CLZ(int x) |
98 | { |
99 | int numZeros; |
100 | |
101 | if (!x) { |
102 | return 32; |
103 | } |
104 | |
105 | /* count leading zeros with binary search */ |
106 | numZeros = 1; |
107 | if (!((unsigned int)x >> 16)) { |
108 | numZeros += 16; |
109 | x <<= 16; |
110 | } |
111 | if (!((unsigned int)x >> 24)) { |
112 | numZeros += 8; |
113 | x <<= 8; |
114 | } |
115 | if (!((unsigned int)x >> 28)) { |
116 | numZeros += 4; |
117 | x <<= 4; |
118 | } |
119 | if (!((unsigned int)x >> 30)) { |
120 | numZeros += 2; |
121 | x <<= 2; |
122 | } |
123 | |
124 | numZeros -= ((unsigned int)x >> 31); |
125 | |
126 | return numZeros; |
127 | } |
128 | |
129 | #ifdef __CW32__ |
130 | typedef long long Word64; |
131 | #else |
132 | typedef __int64 Word64; |
133 | #endif |
134 | |
135 | typedef union _U64 { |
136 | Word64 w64; |
137 | struct { |
138 | /* x86 = little endian */ |
139 | unsigned int lo32; |
140 | signed int hi32; |
141 | } r; |
142 | } U64; |
143 | |
144 | /* returns 64-bit value in [edx:eax] */ |
145 | static __inline Word64 MADD64(Word64 sum64, int x, int y) |
146 | { |
147 | #if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__) |
148 | /* Workaround for the Symbian emulator because of non existing longlong.lib and |
149 | * hence __allmul not defined. */ |
150 | __asm { |
151 | mov eax, x |
152 | imul y |
153 | add dword ptr sum64, eax |
154 | adc dword ptr sum64 + 4, edx |
155 | } |
156 | #else |
157 | sum64 += (Word64)x * (Word64)y; |
158 | #endif |
159 | |
160 | return sum64; |
161 | } |
162 | |
163 | /* toolchain: MSFT Embedded Visual C++ |
164 | * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier) |
165 | */ |
166 | #elif 1//defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM) |
167 | |
168 | static short CLIPTOSHORT(int x) |
169 | { |
170 | int sign; |
171 | |
172 | /* clip to [-32768, 32767] */ |
173 | sign = x >> 31; |
174 | if (sign != (x >> 15)) { |
175 | x = sign ^((1 << 15) - 1); |
176 | } |
177 | |
178 | return (short)x; |
179 | } |
180 | |
181 | static int FASTABS(int x) |
182 | { |
183 | int sign; |
184 | |
185 | sign = x >> (sizeof(int) * 8 - 1); |
186 | x ^= sign; |
187 | x -= sign; |
188 | |
189 | return x; |
190 | } |
191 | |
192 | static int CLZ(int x) |
193 | { |
194 | int numZeros; |
195 | |
196 | if (!x) { |
197 | return 32; |
198 | } |
199 | |
200 | /* count leading zeros with binary search (function should be 17 ARM instructions total) */ |
201 | numZeros = 1; |
202 | if (!((unsigned int)x >> 16)) { |
203 | numZeros += 16; |
204 | x <<= 16; |
205 | } |
206 | if (!((unsigned int)x >> 24)) { |
207 | numZeros += 8; |
208 | x <<= 8; |
209 | } |
210 | if (!((unsigned int)x >> 28)) { |
211 | numZeros += 4; |
212 | x <<= 4; |
213 | } |
214 | if (!((unsigned int)x >> 30)) { |
215 | numZeros += 2; |
216 | x <<= 2; |
217 | } |
218 | |
219 | numZeros -= ((unsigned int)x >> 31); |
220 | |
221 | return numZeros; |
222 | } |
223 | |
224 | /* implemented in asmfunc.s */ |
225 | #ifdef __cplusplus |
226 | extern "C" { |
227 | #endif |
228 | |
229 | typedef long long Word64; |
230 | |
231 | typedef union _U64 { |
232 | Word64 w64; |
233 | struct { |
234 | /* ARM WinCE = little endian */ |
235 | unsigned int lo32; |
236 | signed int hi32; |
237 | } r; |
238 | } U64; |
239 | |
240 | /* manual name mangling for just this platform (must match labels in .s file) */ |
241 | //#define MULSHIFT32 raac_MULSHIFT32 |
242 | //#define MADD64 raac_MADD64 |
243 | static int MULSHIFT32(int x, int y) |
244 | { |
245 | long c; |
246 | c = (long long)x * y; |
247 | return (int)c; |
248 | } |
249 | |
250 | static Word64 MADD64(Word64 sum64, int x, int y) |
251 | { |
252 | sum64 += (long long)x * y; |
253 | return sum64; |
254 | } |
255 | |
256 | #ifdef __cplusplus |
257 | } |
258 | #endif |
259 | |
260 | /* toolchain: ARM ADS or RealView |
261 | * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier) |
262 | */ |
263 | #elif (defined (__arm) && defined (__ARMCC_VERSION)) || (defined(HELIX_CONFIG_SYMBIAN_GENERATE_MMP) && !defined(__GCCE__)) |
264 | |
265 | static __inline int MULSHIFT32(int x, int y) |
266 | { |
267 | /* rules for smull RdLo, RdHi, Rm, Rs: |
268 | * RdHi != Rm |
269 | * RdLo != Rm |
270 | * RdHi != RdLo |
271 | */ |
272 | int zlow; |
273 | __asm { |
274 | smull zlow, y, x, y |
275 | } |
276 | |
277 | return y; |
278 | } |
279 | |
280 | static __inline short CLIPTOSHORT(int x) |
281 | { |
282 | int sign; |
283 | |
284 | /* clip to [-32768, 32767] */ |
285 | sign = x >> 31; |
286 | if (sign != (x >> 15)) { |
287 | x = sign ^((1 << 15) - 1); |
288 | } |
289 | |
290 | return (short)x; |
291 | } |
292 | |
293 | static __inline int FASTABS(int x) |
294 | { |
295 | int sign; |
296 | |
297 | sign = x >> (sizeof(int) * 8 - 1); |
298 | x ^= sign; |
299 | x -= sign; |
300 | |
301 | return x; |
302 | } |
303 | |
304 | static __inline int CLZ(int x) |
305 | { |
306 | int numZeros; |
307 | |
308 | if (!x) { |
309 | return 32; |
310 | } |
311 | |
312 | /* count leading zeros with binary search (function should be 17 ARM instructions total) */ |
313 | numZeros = 1; |
314 | if (!((unsigned int)x >> 16)) { |
315 | numZeros += 16; |
316 | x <<= 16; |
317 | } |
318 | if (!((unsigned int)x >> 24)) { |
319 | numZeros += 8; |
320 | x <<= 8; |
321 | } |
322 | if (!((unsigned int)x >> 28)) { |
323 | numZeros += 4; |
324 | x <<= 4; |
325 | } |
326 | if (!((unsigned int)x >> 30)) { |
327 | numZeros += 2; |
328 | x <<= 2; |
329 | } |
330 | |
331 | numZeros -= ((unsigned int)x >> 31); |
332 | |
333 | return numZeros; |
334 | |
335 | /* ARM code would look like this, but do NOT use inline asm in ADS for this, |
336 | because you can't safely use the status register flags intermixed with C code |
337 | |
338 | __asm { |
339 | mov numZeros, #1 |
340 | tst x, 0xffff0000 |
341 | addeq numZeros, numZeros, #16 |
342 | moveq x, x, lsl #16 |
343 | tst x, 0xff000000 |
344 | addeq numZeros, numZeros, #8 |
345 | moveq x, x, lsl #8 |
346 | tst x, 0xf0000000 |
347 | addeq numZeros, numZeros, #4 |
348 | moveq x, x, lsl #4 |
349 | tst x, 0xc0000000 |
350 | addeq numZeros, numZeros, #2 |
351 | moveq x, x, lsl #2 |
352 | sub numZeros, numZeros, x, lsr #31 |
353 | } |
354 | */ |
355 | /* reference: |
356 | numZeros = 0; |
357 | while (!(x & 0x80000000)) { |
358 | numZeros++; |
359 | x <<= 1; |
360 | } |
361 | */ |
362 | } |
363 | |
364 | typedef __int64 Word64; |
365 | |
366 | typedef union _U64 { |
367 | Word64 w64; |
368 | struct { |
369 | /* ARM ADS = little endian */ |
370 | unsigned int lo32; |
371 | signed int hi32; |
372 | } r; |
373 | } U64; |
374 | |
375 | static __inline Word64 MADD64(Word64 sum64, int x, int y) |
376 | { |
377 | U64 u; |
378 | u.w64 = sum64; |
379 | |
380 | __asm { |
381 | smlal u.r.lo32, u.r.hi32, x, y |
382 | } |
383 | |
384 | return u.w64; |
385 | } |
386 | |
387 | /* toolchain: ARM gcc |
388 | * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier) |
389 | */ |
390 | #elif defined(__GNUC__) && defined(__arm__) |
391 | |
392 | static __inline__ int MULSHIFT32(int x, int y) |
393 | { |
394 | int zlow; |
395 | __asm__ volatile("smull %0,%1,%2,%3" : "=&r"(zlow), "=r"(y) : "r"(x), "1"(y) : "cc"); |
396 | return y; |
397 | } |
398 | |
399 | static __inline short CLIPTOSHORT(int x) |
400 | { |
401 | int sign; |
402 | |
403 | /* clip to [-32768, 32767] */ |
404 | sign = x >> 31; |
405 | if (sign != (x >> 15)) { |
406 | x = sign ^((1 << 15) - 1); |
407 | } |
408 | |
409 | return (short)x; |
410 | } |
411 | |
412 | static __inline int FASTABS(int x) |
413 | { |
414 | int sign; |
415 | |
416 | sign = x >> (sizeof(int) * 8 - 1); |
417 | x ^= sign; |
418 | x -= sign; |
419 | |
420 | return x; |
421 | } |
422 | |
423 | static __inline int CLZ(int x) |
424 | { |
425 | int numZeros; |
426 | |
427 | if (!x) { |
428 | return (sizeof(int) * 8); |
429 | } |
430 | |
431 | numZeros = 0; |
432 | while (!(x & 0x80000000)) { |
433 | numZeros++; |
434 | x <<= 1; |
435 | } |
436 | |
437 | return numZeros; |
438 | } |
439 | |
440 | typedef long long Word64; |
441 | |
442 | typedef union _U64 { |
443 | Word64 w64; |
444 | struct { |
445 | /* ARM ADS = little endian */ |
446 | unsigned int lo32; |
447 | signed int hi32; |
448 | } r; |
449 | } U64; |
450 | |
451 | static __inline Word64 MADD64(Word64 sum64, int x, int y) |
452 | { |
453 | U64 u; |
454 | u.w64 = sum64; |
455 | |
456 | __asm__ volatile("smlal %0,%1,%2,%3" : "+&r"(u.r.lo32), "+&r"(u.r.hi32) : "r"(x), "r"(y) : "cc"); |
457 | |
458 | return u.w64; |
459 | } |
460 | |
461 | /* toolchain: x86 gcc |
462 | * target architecture: x86 |
463 | */ |
464 | #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86)) |
465 | |
466 | typedef long long Word64; |
467 | |
468 | static __inline__ int MULSHIFT32(int x, int y) |
469 | { |
470 | int z; |
471 | |
472 | z = (Word64)x * (Word64)y >> 32; |
473 | |
474 | return z; |
475 | } |
476 | |
477 | static __inline short CLIPTOSHORT(int x) |
478 | { |
479 | int sign; |
480 | |
481 | /* clip to [-32768, 32767] */ |
482 | sign = x >> 31; |
483 | if (sign != (x >> 15)) { |
484 | x = sign ^((1 << 15) - 1); |
485 | } |
486 | |
487 | return (short)x; |
488 | } |
489 | |
490 | static __inline int FASTABS(int x) |
491 | { |
492 | int sign; |
493 | |
494 | sign = x >> (sizeof(int) * 8 - 1); |
495 | x ^= sign; |
496 | x -= sign; |
497 | |
498 | return x; |
499 | } |
500 | |
501 | static __inline int CLZ(int x) |
502 | { |
503 | int numZeros; |
504 | |
505 | if (!x) { |
506 | return 32; |
507 | } |
508 | |
509 | /* count leading zeros with binary search (function should be 17 ARM instructions total) */ |
510 | numZeros = 1; |
511 | if (!((unsigned int)x >> 16)) { |
512 | numZeros += 16; |
513 | x <<= 16; |
514 | } |
515 | if (!((unsigned int)x >> 24)) { |
516 | numZeros += 8; |
517 | x <<= 8; |
518 | } |
519 | if (!((unsigned int)x >> 28)) { |
520 | numZeros += 4; |
521 | x <<= 4; |
522 | } |
523 | if (!((unsigned int)x >> 30)) { |
524 | numZeros += 2; |
525 | x <<= 2; |
526 | } |
527 | |
528 | numZeros -= ((unsigned int)x >> 31); |
529 | |
530 | return numZeros; |
531 | } |
532 | |
533 | typedef union _U64 { |
534 | Word64 w64; |
535 | struct { |
536 | /* x86 = little endian */ |
537 | unsigned int lo32; |
538 | signed int hi32; |
539 | } r; |
540 | } U64; |
541 | |
542 | static __inline Word64 MADD64(Word64 sum64, int x, int y) |
543 | { |
544 | sum64 += (Word64)x * (Word64)y; |
545 | |
546 | return sum64; |
547 | } |
548 | |
549 | #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86)) |
550 | |
551 | typedef long long Word64; |
552 | |
553 | static __inline__ int MULSHIFT32(int x, int y) |
554 | { |
555 | int z; |
556 | |
557 | z = (Word64)x * (Word64)y >> 32; |
558 | |
559 | return z; |
560 | } |
561 | |
562 | static __inline short CLIPTOSHORT(int x) |
563 | { |
564 | int sign; |
565 | |
566 | /* clip to [-32768, 32767] */ |
567 | sign = x >> 31; |
568 | if (sign != (x >> 15)) { |
569 | x = sign ^((1 << 15) - 1); |
570 | } |
571 | |
572 | return (short)x; |
573 | } |
574 | |
575 | static __inline int FASTABS(int x) |
576 | { |
577 | int sign; |
578 | |
579 | sign = x >> (sizeof(int) * 8 - 1); |
580 | x ^= sign; |
581 | x -= sign; |
582 | |
583 | return x; |
584 | } |
585 | |
586 | static __inline int CLZ(int x) |
587 | { |
588 | int numZeros; |
589 | |
590 | if (!x) { |
591 | return 32; |
592 | } |
593 | |
594 | /* count leading zeros with binary search (function should be 17 ARM instructions total) */ |
595 | numZeros = 1; |
596 | if (!((unsigned int)x >> 16)) { |
597 | numZeros += 16; |
598 | x <<= 16; |
599 | } |
600 | if (!((unsigned int)x >> 24)) { |
601 | numZeros += 8; |
602 | x <<= 8; |
603 | } |
604 | if (!((unsigned int)x >> 28)) { |
605 | numZeros += 4; |
606 | x <<= 4; |
607 | } |
608 | if (!((unsigned int)x >> 30)) { |
609 | numZeros += 2; |
610 | x <<= 2; |
611 | } |
612 | |
613 | numZeros -= ((unsigned int)x >> 31); |
614 | |
615 | return numZeros; |
616 | } |
617 | |
618 | typedef union _U64 { |
619 | Word64 w64; |
620 | struct { |
621 | /* PowerPC = big endian */ |
622 | signed int hi32; |
623 | unsigned int lo32; |
624 | } r; |
625 | } U64; |
626 | |
627 | static __inline Word64 MADD64(Word64 sum64, int x, int y) |
628 | { |
629 | sum64 += (Word64)x * (Word64)y; |
630 | |
631 | return sum64; |
632 | } |
633 | |
634 | #else |
635 | |
636 | #error Unsupported platform in assembly.h |
637 | |
638 | #endif /* platforms */ |
639 | |
640 | #endif /* _ASSEMBLY_H */ |
641 |