blob: badec5bcbd17f4a77acc29951cc1b75344da05d3
1 | /***************************************************************************** |
2 | * Copyright (C) 2000-2001 Andre McCurdy <armccurdy@yahoo.co.uk> |
3 | * |
4 | * This program is free software. you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation@ either version 2 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY, without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program@ if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | * |
18 | ***************************************************************************** |
19 | * |
20 | * Notes: |
21 | * |
22 | * |
23 | ***************************************************************************** |
24 | * |
25 | * $Id: imdct_l_arm.S,v 1.7 2001/03/25 20:03:34 rob Rel $ |
26 | * |
27 | * 2001/03/24: Andre McCurdy <armccurdy@yahoo.co.uk> |
28 | * - Corrected PIC unsafe loading of address of 'imdct36_long_karray' |
29 | * |
30 | * 2000/09/20: Robert Leslie <rob@mars.org> |
31 | * - Added a global symbol with leading underscore per suggestion of |
32 | * Simon Burge to support linking with the a.out format. |
33 | * |
34 | * 2000/09/15: Robert Leslie <rob@mars.org> |
35 | * - Fixed a small bug where flags were changed before a conditional branch. |
36 | * |
37 | * 2000/09/15: Andre McCurdy <armccurdy@yahoo.co.uk> |
38 | * - Applied Nicolas Pitre's rounding optimisation in all remaining places. |
39 | * |
40 | * 2000/09/09: Nicolas Pitre <nico@cam.org> |
41 | * - Optimized rounding + scaling operations. |
42 | * |
43 | * 2000/08/09: Andre McCurdy <armccurdy@yahoo.co.uk> |
44 | * - Original created. |
45 | * |
46 | ****************************************************************************/ |
47 | |
48 | |
49 | /* |
50 | On entry: |
51 | |
52 | r0 = pointer to 18 element input array |
53 | r1 = pointer to 36 element output array |
54 | r2 = windowing block type |
55 | |
56 | |
57 | Stack frame created during execution of the function: |
58 | |
59 | Initial Holds: |
60 | Stack |
61 | pointer |
62 | minus: |
63 | |
64 | 0 |
65 | 4 lr |
66 | 8 r11 |
67 | 12 r10 |
68 | 16 r9 |
69 | 20 r8 |
70 | 24 r7 |
71 | 28 r6 |
72 | 32 r5 |
73 | 36 r4 |
74 | |
75 | 40 r2 : windowing block type |
76 | |
77 | 44 ct00 high |
78 | 48 ct00 low |
79 | 52 ct01 high |
80 | 56 ct01 low |
81 | 60 ct04 high |
82 | 64 ct04 low |
83 | 68 ct06 high |
84 | 72 ct06 low |
85 | 76 ct05 high |
86 | 80 ct05 low |
87 | 84 ct03 high |
88 | 88 ct03 low |
89 | 92 -ct05 high |
90 | 96 -ct05 low |
91 | 100 -ct07 high |
92 | 104 -ct07 low |
93 | 108 ct07 high |
94 | 112 ct07 low |
95 | 116 ct02 high |
96 | 120 ct02 low |
97 | */ |
98 | |
99 | #define BLOCK_MODE_NORMAL 0 |
100 | #define BLOCK_MODE_START 1 |
101 | #define BLOCK_MODE_STOP 3 |
102 | |
103 | |
104 | #define X0 0x00 |
105 | #define X1 0x04 |
106 | #define X2 0x08 |
107 | #define X3 0x0C |
108 | #define X4 0x10 |
109 | #define X5 0x14 |
110 | #define X6 0x18 |
111 | #define X7 0x1c |
112 | #define X8 0x20 |
113 | #define X9 0x24 |
114 | #define X10 0x28 |
115 | #define X11 0x2c |
116 | #define X12 0x30 |
117 | #define X13 0x34 |
118 | #define X14 0x38 |
119 | #define X15 0x3c |
120 | #define X16 0x40 |
121 | #define X17 0x44 |
122 | |
123 | #define x0 0x00 |
124 | #define x1 0x04 |
125 | #define x2 0x08 |
126 | #define x3 0x0C |
127 | #define x4 0x10 |
128 | #define x5 0x14 |
129 | #define x6 0x18 |
130 | #define x7 0x1c |
131 | #define x8 0x20 |
132 | #define x9 0x24 |
133 | #define x10 0x28 |
134 | #define x11 0x2c |
135 | #define x12 0x30 |
136 | #define x13 0x34 |
137 | #define x14 0x38 |
138 | #define x15 0x3c |
139 | #define x16 0x40 |
140 | #define x17 0x44 |
141 | #define x18 0x48 |
142 | #define x19 0x4c |
143 | #define x20 0x50 |
144 | #define x21 0x54 |
145 | #define x22 0x58 |
146 | #define x23 0x5c |
147 | #define x24 0x60 |
148 | #define x25 0x64 |
149 | #define x26 0x68 |
150 | #define x27 0x6c |
151 | #define x28 0x70 |
152 | #define x29 0x74 |
153 | #define x30 0x78 |
154 | #define x31 0x7c |
155 | #define x32 0x80 |
156 | #define x33 0x84 |
157 | #define x34 0x88 |
158 | #define x35 0x8c |
159 | |
160 | #define K00 0x0ffc19fd |
161 | #define K01 0x00b2aa3e |
162 | #define K02 0x0fdcf549 |
163 | #define K03 0x0216a2a2 |
164 | #define K04 0x0f9ee890 |
165 | #define K05 0x03768962 |
166 | #define K06 0x0f426cb5 |
167 | #define K07 0x04cfb0e2 |
168 | #define K08 0x0ec835e8 |
169 | #define K09 0x061f78aa |
170 | #define K10 0x0e313245 |
171 | #define K11 0x07635284 |
172 | #define K12 0x0d7e8807 |
173 | #define K13 0x0898c779 |
174 | #define K14 0x0cb19346 |
175 | #define K15 0x09bd7ca0 |
176 | #define K16 0x0bcbe352 |
177 | #define K17 0x0acf37ad |
178 | |
179 | #define minus_K02 0xf0230ab7 |
180 | |
181 | #define WL0 0x00b2aa3e |
182 | #define WL1 0x0216a2a2 |
183 | #define WL2 0x03768962 |
184 | #define WL3 0x04cfb0e2 |
185 | #define WL4 0x061f78aa |
186 | #define WL5 0x07635284 |
187 | #define WL6 0x0898c779 |
188 | #define WL7 0x09bd7ca0 |
189 | #define WL8 0x0acf37ad |
190 | #define WL9 0x0bcbe352 |
191 | #define WL10 0x0cb19346 |
192 | #define WL11 0x0d7e8807 |
193 | #define WL12 0x0e313245 |
194 | #define WL13 0x0ec835e8 |
195 | #define WL14 0x0f426cb5 |
196 | #define WL15 0x0f9ee890 |
197 | #define WL16 0x0fdcf549 |
198 | #define WL17 0x0ffc19fd |
199 | |
200 | |
201 | @***************************************************************************** |
202 | |
203 | |
204 | .text |
205 | .align |
206 | |
207 | .global III_imdct_l |
208 | .global _III_imdct_l |
209 | |
210 | III_imdct_l: |
211 | _III_imdct_l: |
212 | |
213 | stmdb sp!, { r2, r4 - r11, lr } @ all callee saved regs, plus arg3 |
214 | |
215 | ldr r4, =K08 @ r4 = K08 |
216 | ldr r5, =K09 @ r5 = K09 |
217 | ldr r8, [r0, #X4] @ r8 = X4 |
218 | ldr r9, [r0, #X13] @ r9 = X13 |
219 | rsb r6, r4, #0 @ r6 = -K08 |
220 | rsb r7, r5, #0 @ r7 = -K09 |
221 | |
222 | smull r2, r3, r4, r8 @ r2..r3 = (X4 * K08) |
223 | smlal r2, r3, r5, r9 @ r2..r3 = (X4 * K08) + (X13 * K09) = ct01 |
224 | |
225 | smull r10, lr, r8, r5 @ r10..lr = (X4 * K09) |
226 | smlal r10, lr, r9, r6 @ r10..lr = (X4 * K09) + (X13 * -K08) = ct00 |
227 | |
228 | ldr r8, [r0, #X7] @ r8 = X7 |
229 | ldr r9, [r0, #X16] @ r9 = X16 |
230 | |
231 | stmdb sp!, { r2, r3, r10, lr } @ stack ct00_h, ct00_l, ct01_h, ct01_l |
232 | |
233 | add r8, r8, r9 @ r8 = (X7 + X16) |
234 | ldr r9, [r0, #X1] @ r9 = X1 |
235 | |
236 | smlal r2, r3, r6, r8 @ r2..r3 = ct01 + ((X7 + X16) * -K08) |
237 | smlal r2, r3, r7, r9 @ r2..r3 += (X1 * -K09) |
238 | |
239 | ldr r7, [r0, #X10] @ r7 = X10 |
240 | |
241 | rsbs r10, r10, #0 |
242 | rsc lr, lr, #0 @ r10..lr = -ct00 |
243 | |
244 | smlal r2, r3, r5, r7 @ r2..r3 += (X10 * K09) = ct06 |
245 | |
246 | smlal r10, lr, r9, r6 @ r10..lr = -ct00 + ( X1 * -K08) |
247 | smlal r10, lr, r8, r5 @ r10..lr += ((X7 + X16) * K09) |
248 | smlal r10, lr, r7, r4 @ r10..lr += ( X10 * K08) = ct04 |
249 | |
250 | stmdb sp!, { r2, r3, r10, lr } @ stack ct04_h, ct04_l, ct06_h, ct06_l |
251 | |
252 | @---- |
253 | |
254 | ldr r7, [r0, #X0] |
255 | ldr r8, [r0, #X11] |
256 | ldr r9, [r0, #X12] |
257 | sub r7, r7, r8 |
258 | sub r7, r7, r9 @ r7 = (X0 - X11 -X12) = ct14 |
259 | |
260 | ldr r9, [r0, #X3] |
261 | ldr r8, [r0, #X8] |
262 | ldr r11, [r0, #X15] |
263 | sub r8, r8, r9 |
264 | add r8, r8, r11 @ r8 = (X8 - X3 + X15) = ct16 |
265 | |
266 | add r11, r7, r8 @ r11 = ct14 + ct16 = ct18 |
267 | |
268 | smlal r2, r3, r6, r11 @ r2..r3 = ct06 + ((X0 - X11 - X3 + X15 + X8 - X12) * -K08) |
269 | |
270 | ldr r6, [r0, #X2] |
271 | ldr r9, [r0, #X9] |
272 | ldr r12, [r0, #X14] |
273 | sub r6, r6, r9 |
274 | sub r6, r6, r12 @ r6 = (X2 - X9 - X14) = ct15 |
275 | |
276 | ldr r9, [r0, #X5] |
277 | ldr r12, [r0, #X6] |
278 | sub r9, r9, r12 |
279 | ldr r12, [r0, #X17] |
280 | sub r9, r9, r12 @ r9 = (X5 - X6 - X17) = ct17 |
281 | |
282 | add r12, r9, r6 @ r12 = ct15 + ct17 = ct19 |
283 | |
284 | smlal r2, r3, r5, r12 @ r2..r3 += ((X2 - X9 + X5 - X6 - X17 - X14) * K09) |
285 | |
286 | smlal r10, lr, r11, r5 @ r10..lr = ct04 + (ct18 * K09) |
287 | smlal r10, lr, r12, r4 @ r10..lr = ct04 + (ct18 * K09) + (ct19 * K08) |
288 | |
289 | movs r2, r2, lsr #28 |
290 | adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 |
291 | str r2, [r1, #x22] @ store result x22 |
292 | |
293 | movs r10, r10, lsr #28 |
294 | adc r10, r10, lr, lsl #4 @ r10 = bits[59..28] of r10..lr |
295 | str r10, [r1, #x4] @ store result x4 |
296 | |
297 | @---- |
298 | |
299 | ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp) |
300 | |
301 | @ r2..r3 = ct06 |
302 | @ r4..r5 = ct04 |
303 | @ r6 = ct15 |
304 | @ r7 = ct14 |
305 | @ r8 = ct16 |
306 | @ r9 = ct17 |
307 | @ r10 = . |
308 | @ r11 = . |
309 | @ r12 = . |
310 | @ lr = . |
311 | |
312 | ldr r10, =K03 @ r10 = K03 |
313 | ldr lr, =K15 @ lr = K15 |
314 | |
315 | smlal r2, r3, r10, r7 @ r2..r3 = ct06 + (ct14 * K03) |
316 | smlal r4, r5, lr, r7 @ r4..r5 = ct04 + (ct14 * K15) |
317 | |
318 | ldr r12, =K14 @ r12 = K14 |
319 | rsb r10, r10, #0 @ r10 = -K03 |
320 | |
321 | smlal r2, r3, lr, r6 @ r2..r3 += (ct15 * K15) |
322 | smlal r4, r5, r10, r6 @ r4..r5 += (ct15 * -K03) |
323 | smlal r2, r3, r12, r8 @ r2..r3 += (ct16 * K14) |
324 | |
325 | ldr r11, =minus_K02 @ r11 = -K02 |
326 | rsb r12, r12, #0 @ r12 = -K14 |
327 | |
328 | smlal r4, r5, r12, r9 @ r4..r5 += (ct17 * -K14) |
329 | smlal r2, r3, r11, r9 @ r2..r3 += (ct17 * -K02) |
330 | smlal r4, r5, r11, r8 @ r4..r5 += (ct16 * -K02) |
331 | |
332 | movs r2, r2, lsr #28 |
333 | adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 |
334 | str r2, [r1, #x7] @ store result x7 |
335 | |
336 | movs r4, r4, lsr #28 |
337 | adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5 |
338 | str r4, [r1, #x1] @ store result x1 |
339 | |
340 | @---- |
341 | |
342 | ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp) |
343 | |
344 | @ r2..r3 = ct06 |
345 | @ r4..r5 = ct04 |
346 | @ r6 = ct15 |
347 | @ r7 = ct14 |
348 | @ r8 = ct16 |
349 | @ r9 = ct17 |
350 | @ r10 = -K03 |
351 | @ r11 = -K02 |
352 | @ r12 = -K14 |
353 | @ lr = K15 |
354 | |
355 | rsbs r2, r2, #0 |
356 | rsc r3, r3, #0 @ r2..r3 = -ct06 |
357 | |
358 | smlal r2, r3, r12, r7 @ r2..r3 = -ct06 + (ct14 * -K14) |
359 | smlal r2, r3, r10, r8 @ r2..r3 += (ct16 * -K03) |
360 | |
361 | smlal r4, r5, r12, r6 @ r4..r5 = ct04 + (ct15 * -K14) |
362 | smlal r4, r5, r10, r9 @ r4..r5 += (ct17 * -K03) |
363 | smlal r4, r5, lr, r8 @ r4..r5 += (ct16 * K15) |
364 | smlal r4, r5, r11, r7 @ r4..r5 += (ct14 * -K02) |
365 | |
366 | rsb lr, lr, #0 @ lr = -K15 |
367 | rsb r11, r11, #0 @ r11 = K02 |
368 | |
369 | smlal r2, r3, lr, r9 @ r2..r3 += (ct17 * -K15) |
370 | smlal r2, r3, r11, r6 @ r2..r3 += (ct15 * K02) |
371 | |
372 | movs r4, r4, lsr #28 |
373 | adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5 |
374 | str r4, [r1, #x25] @ store result x25 |
375 | |
376 | movs r2, r2, lsr #28 |
377 | adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 |
378 | str r2, [r1, #x19] @ store result x19 |
379 | |
380 | @---- |
381 | |
382 | ldr r2, [sp, #16] @ r2 = ct01_l |
383 | ldr r3, [sp, #20] @ r3 = ct01_h |
384 | |
385 | ldr r6, [r0, #X1] |
386 | ldr r8, [r0, #X7] |
387 | ldr r9, [r0, #X10] |
388 | ldr r7, [r0, #X16] |
389 | |
390 | rsbs r2, r2, #0 |
391 | rsc r3, r3, #0 @ r2..r3 = -ct01 |
392 | |
393 | mov r4, r2 |
394 | mov r5, r3 @ r4..r5 = -ct01 |
395 | |
396 | @ r2..r3 = -ct01 |
397 | @ r4..r5 = -ct01 |
398 | @ r6 = X1 |
399 | @ r7 = X16 |
400 | @ r8 = X7 |
401 | @ r9 = X10 |
402 | @ r10 = -K03 |
403 | @ r11 = K02 |
404 | @ r12 = -K14 |
405 | @ lr = -K15 |
406 | |
407 | smlal r4, r5, r12, r7 @ r4..r5 = -ct01 + (X16 * -K14) |
408 | smlal r2, r3, lr, r9 @ r2..r3 = -ct01 + (X10 * -K15) |
409 | |
410 | smlal r4, r5, r10, r8 @ r4..r5 += (X7 * -K03) |
411 | smlal r2, r3, r10, r7 @ r2..r3 += (X16 * -K03) |
412 | |
413 | smlal r4, r5, r11, r9 @ r4..r5 += (X10 * K02) |
414 | smlal r2, r3, r12, r8 @ r2..r3 += (X7 * -K14) |
415 | |
416 | rsb lr, lr, #0 @ lr = K15 |
417 | rsb r11, r11, #0 @ r11 = -K02 |
418 | |
419 | smlal r4, r5, lr, r6 @ r4..r5 += (X1 * K15) = ct05 |
420 | smlal r2, r3, r11, r6 @ r2..r3 += (X1 * -K02) = ct03 |
421 | |
422 | stmdb sp!, { r2, r3, r4, r5 } @ stack ct05_h, ct05_l, ct03_h, ct03_l |
423 | |
424 | rsbs r4, r4, #0 |
425 | rsc r5, r5, #0 @ r4..r5 = -ct05 |
426 | |
427 | stmdb sp!, { r4, r5 } @ stack -ct05_h, -ct05_l |
428 | |
429 | ldr r2, [sp, #48] @ r2 = ct00_l |
430 | ldr r3, [sp, #52] @ r3 = ct00_h |
431 | |
432 | rsb r10, r10, #0 @ r10 = K03 |
433 | |
434 | rsbs r4, r2, #0 |
435 | rsc r5, r3, #0 @ r4..r5 = -ct00 |
436 | |
437 | @ r2..r3 = ct00 |
438 | @ r4..r5 = -ct00 |
439 | @ r6 = X1 |
440 | @ r7 = X16 |
441 | @ r8 = X7 |
442 | @ r9 = X10 |
443 | @ r10 = K03 |
444 | @ r11 = -K02 |
445 | @ r12 = -K14 |
446 | @ lr = K15 |
447 | |
448 | smlal r4, r5, r10, r6 @ r4..r5 = -ct00 + (X1 * K03) |
449 | smlal r2, r3, r10, r9 @ r2..r3 = ct00 + (X10 * K03) |
450 | |
451 | smlal r4, r5, r12, r9 @ r4..r5 += (X10 * -K14) |
452 | smlal r2, r3, r12, r6 @ r2..r3 += (X1 * -K14) |
453 | |
454 | smlal r4, r5, r11, r7 @ r4..r5 += (X16 * -K02) |
455 | smlal r4, r5, lr, r8 @ r4..r5 += (X7 * K15) = ct07 |
456 | |
457 | rsb lr, lr, #0 @ lr = -K15 |
458 | rsb r11, r11, #0 @ r11 = K02 |
459 | |
460 | smlal r2, r3, r11, r8 @ r2..r3 += (X7 * K02) |
461 | smlal r2, r3, lr, r7 @ r2..r3 += (X16 * -K15) = ct02 |
462 | |
463 | rsbs r6, r4, #0 |
464 | rsc r7, r5, #0 @ r6..r7 = -ct07 |
465 | |
466 | stmdb sp!, { r2 - r7 } @ stack -ct07_h, -ct07_l, ct07_h, ct07_l, ct02_h, ct02_l |
467 | |
468 | |
469 | @---- |
470 | |
471 | add r2, pc, #(imdct36_long_karray-.-8) @ r2 = base address of Knn array (PIC safe ?) |
472 | |
473 | |
474 | loop: |
475 | ldr r12, [r0, #X0] |
476 | |
477 | ldmia r2!, { r5 - r11 } @ first 7 words from Karray element |
478 | |
479 | smull r3, r4, r5, r12 @ sum = (Kxx * X0) |
480 | ldr r12, [r0, #X2] |
481 | ldr r5, [r0, #X3] |
482 | smlal r3, r4, r6, r12 @ sum += (Kxx * X2) |
483 | ldr r12, [r0, #X5] |
484 | ldr r6, [r0, #X6] |
485 | smlal r3, r4, r7, r5 @ sum += (Kxx * X3) |
486 | smlal r3, r4, r8, r12 @ sum += (Kxx * X5) |
487 | ldr r12, [r0, #X8] |
488 | ldr r5, [r0, #X9] |
489 | smlal r3, r4, r9, r6 @ sum += (Kxx * X6) |
490 | smlal r3, r4, r10, r12 @ sum += (Kxx * X8) |
491 | smlal r3, r4, r11, r5 @ sum += (Kxx * X9) |
492 | |
493 | ldmia r2!, { r5 - r10 } @ final 6 words from Karray element |
494 | |
495 | ldr r11, [r0, #X11] |
496 | ldr r12, [r0, #X12] |
497 | smlal r3, r4, r5, r11 @ sum += (Kxx * X11) |
498 | ldr r11, [r0, #X14] |
499 | ldr r5, [r0, #X15] |
500 | smlal r3, r4, r6, r12 @ sum += (Kxx * X12) |
501 | smlal r3, r4, r7, r11 @ sum += (Kxx * X14) |
502 | ldr r11, [r0, #X17] |
503 | smlal r3, r4, r8, r5 @ sum += (Kxx * X15) |
504 | smlal r3, r4, r9, r11 @ sum += (Kxx * X17) |
505 | |
506 | add r5, sp, r10, lsr #16 @ create index back into stack for required ctxx |
507 | |
508 | ldmia r5, { r6, r7 } @ r6..r7 = ctxx |
509 | |
510 | mov r8, r10, lsl #16 @ push ctxx index off the top end |
511 | |
512 | adds r3, r3, r6 @ add low words |
513 | adc r4, r4, r7 @ add high words, with carry |
514 | movs r3, r3, lsr #28 |
515 | adc r3, r3, r4, lsl #4 @ r3 = bits[59..28] of r3..r4 |
516 | |
517 | str r3, [r1, r8, lsr #24] @ push completion flag off the bottom end |
518 | |
519 | movs r8, r8, lsl #8 @ push result location index off the top end |
520 | beq loop @ loop back if completion flag not set |
521 | b imdct_l_windowing @ branch to windowing stage if looping finished |
522 | |
523 | imdct36_long_karray: |
524 | |
525 | .word K17, -K13, K10, -K06, -K05, K01, -K00, K04, -K07, K11, K12, -K16, 0x00000000 |
526 | .word K13, K07, K16, K01, K10, -K05, K04, -K11, K00, -K17, K06, -K12, 0x00200800 |
527 | .word K11, K17, K05, K12, -K01, K06, -K07, K00, -K13, K04, -K16, K10, 0x00200c00 |
528 | .word K07, K00, -K12, K05, -K16, -K10, K11, -K17, K04, K13, K01, K06, 0x00001400 |
529 | .word K05, K10, -K00, -K17, K07, -K13, K12, K06, -K16, K01, -K11, -K04, 0x00181800 |
530 | .word K01, K05, -K07, -K11, K13, K17, -K16, -K12, K10, K06, -K04, -K00, 0x00102000 |
531 | .word -K16, K12, -K11, K07, K04, -K00, -K01, K05, -K06, K10, K13, -K17, 0x00284800 |
532 | .word -K12, K06, K17, -K00, -K11, K04, K05, -K10, K01, K16, -K07, -K13, 0x00085000 |
533 | .word -K10, K16, K04, -K13, -K00, K07, K06, -K01, -K12, -K05, K17, K11, 0x00105400 |
534 | .word -K06, -K01, K13, K04, K17, -K11, -K10, -K16, -K05, K12, K00, K07, 0x00185c00 |
535 | .word -K04, -K11, -K01, K16, K06, K12, K13, -K07, -K17, -K00, -K10, -K05, 0x00006000 |
536 | .word -K00, -K04, -K06, -K10, -K12, -K16, -K17, -K13, -K11, -K07, -K05, -K01, 0x00206801 |
537 | |
538 | |
539 | @---- |
540 | @------------------------------------------------------------------------- |
541 | @---- |
542 | |
543 | imdct_l_windowing: |
544 | |
545 | ldr r11, [sp, #80] @ fetch function parameter 3 from out of the stack |
546 | ldmia r1!, { r0, r2 - r9 } @ load 9 words from x0, update pointer |
547 | |
548 | @ r0 = x0 |
549 | @ r1 = &x[9] |
550 | @ r2 = x1 |
551 | @ r3 = x2 |
552 | @ r4 = x3 |
553 | @ r5 = x4 |
554 | @ r6 = x5 |
555 | @ r7 = x6 |
556 | @ r8 = x7 |
557 | @ r9 = x8 |
558 | @ r10 = . |
559 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) |
560 | @ r12 = . |
561 | @ lr = . |
562 | |
563 | cmp r11, #BLOCK_MODE_STOP @ setup flags |
564 | rsb r10, r0, #0 @ r10 = -x0 (DONT change flags !!) |
565 | beq stop_block_x0_to_x17 |
566 | |
567 | |
568 | @ start and normal blocks are treated the same for x[0]..x[17] |
569 | |
570 | normal_block_x0_to_x17: |
571 | |
572 | ldr r12, =WL9 @ r12 = window_l[9] |
573 | |
574 | rsb r0, r9, #0 @ r0 = -x8 |
575 | rsb r9, r2, #0 @ r9 = -x1 |
576 | rsb r2, r8, #0 @ r2 = -x7 |
577 | rsb r8, r3, #0 @ r8 = -x2 |
578 | rsb r3, r7, #0 @ r3 = -x6 |
579 | rsb r7, r4, #0 @ r7 = -x3 |
580 | rsb r4, r6, #0 @ r4 = -x5 |
581 | rsb r6, r5, #0 @ r6 = -x4 |
582 | |
583 | @ r0 = -x8 |
584 | @ r1 = &x[9] |
585 | @ r2 = -x7 |
586 | @ r3 = -x6 |
587 | @ r4 = -x5 |
588 | @ r5 = . |
589 | @ r6 = -x4 |
590 | @ r7 = -x3 |
591 | @ r8 = -x2 |
592 | @ r9 = -x1 |
593 | @ r10 = -x0 |
594 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) |
595 | @ r12 = window_l[9] |
596 | @ lr = . |
597 | |
598 | smull r5, lr, r12, r0 @ r5..lr = (window_l[9] * (x[9] == -x[8])) |
599 | ldr r12, =WL10 @ r12 = window_l[10] |
600 | movs r5, r5, lsr #28 |
601 | adc r0, r5, lr, lsl #4 @ r0 = bits[59..28] of windowed x9 |
602 | |
603 | smull r5, lr, r12, r2 @ r5..lr = (window_l[10] * (x[10] == -x[7])) |
604 | ldr r12, =WL11 @ r12 = window_l[11] |
605 | movs r5, r5, lsr #28 |
606 | adc r2, r5, lr, lsl #4 @ r2 = bits[59..28] of windowed x10 |
607 | |
608 | smull r5, lr, r12, r3 @ r5..lr = (window_l[11] * (x[11] == -x[6])) |
609 | ldr r12, =WL12 @ r12 = window_l[12] |
610 | movs r5, r5, lsr #28 |
611 | adc r3, r5, lr, lsl #4 @ r3 = bits[59..28] of windowed x11 |
612 | |
613 | smull r5, lr, r12, r4 @ r5..lr = (window_l[12] * (x[12] == -x[5])) |
614 | ldr r12, =WL13 @ r12 = window_l[13] |
615 | movs r5, r5, lsr #28 |
616 | adc r4, r5, lr, lsl #4 @ r4 = bits[59..28] of windowed x12 |
617 | |
618 | smull r5, lr, r12, r6 @ r5..lr = (window_l[13] * (x[13] == -x[4])) |
619 | ldr r12, =WL14 @ r12 = window_l[14] |
620 | movs r5, r5, lsr #28 |
621 | adc r6, r5, lr, lsl #4 @ r6 = bits[59..28] of windowed x13 |
622 | |
623 | smull r5, lr, r12, r7 @ r5..lr = (window_l[14] * (x[14] == -x[3])) |
624 | ldr r12, =WL15 @ r12 = window_l[15] |
625 | movs r5, r5, lsr #28 |
626 | adc r7, r5, lr, lsl #4 @ r7 = bits[59..28] of windowed x14 |
627 | |
628 | smull r5, lr, r12, r8 @ r5..lr = (window_l[15] * (x[15] == -x[2])) |
629 | ldr r12, =WL16 @ r12 = window_l[16] |
630 | movs r5, r5, lsr #28 |
631 | adc r8, r5, lr, lsl #4 @ r8 = bits[59..28] of windowed x15 |
632 | |
633 | smull r5, lr, r12, r9 @ r5..lr = (window_l[16] * (x[16] == -x[1])) |
634 | ldr r12, =WL17 @ r12 = window_l[17] |
635 | movs r5, r5, lsr #28 |
636 | adc r9, r5, lr, lsl #4 @ r9 = bits[59..28] of windowed x16 |
637 | |
638 | smull r5, lr, r12, r10 @ r5..lr = (window_l[17] * (x[17] == -x[0])) |
639 | ldr r12, =WL0 @ r12 = window_l[0] |
640 | movs r5, r5, lsr #28 |
641 | adc r10, r5, lr, lsl #4 @ r10 = bits[59..28] of windowed x17 |
642 | |
643 | |
644 | stmia r1, { r0, r2 - r4, r6 - r10 } @ store windowed x[9] .. x[17] |
645 | ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x0 |
646 | |
647 | |
648 | smull r10, lr, r12, r0 @ r10..lr = (window_l[0] * x[0]) |
649 | ldr r12, =WL1 @ r12 = window_l[1] |
650 | movs r10, r10, lsr #28 |
651 | adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0 |
652 | |
653 | smull r10, lr, r12, r2 @ r10..lr = (window_l[1] * x[1]) |
654 | ldr r12, =WL2 @ r12 = window_l[2] |
655 | movs r10, r10, lsr #28 |
656 | adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1 |
657 | |
658 | smull r10, lr, r12, r3 @ r10..lr = (window_l[2] * x[2]) |
659 | ldr r12, =WL3 @ r12 = window_l[3] |
660 | movs r10, r10, lsr #28 |
661 | adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2 |
662 | |
663 | smull r10, lr, r12, r4 @ r10..lr = (window_l[3] * x[3]) |
664 | ldr r12, =WL4 @ r12 = window_l[4] |
665 | movs r10, r10, lsr #28 |
666 | adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3 |
667 | |
668 | smull r10, lr, r12, r5 @ r10..lr = (window_l[4] * x[4]) |
669 | ldr r12, =WL5 @ r12 = window_l[5] |
670 | movs r10, r10, lsr #28 |
671 | adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4 |
672 | |
673 | smull r10, lr, r12, r6 @ r10..lr = (window_l[5] * x[5]) |
674 | ldr r12, =WL6 @ r12 = window_l[6] |
675 | movs r10, r10, lsr #28 |
676 | adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5 |
677 | |
678 | smull r10, lr, r12, r7 @ r10..lr = (window_l[6] * x[6]) |
679 | ldr r12, =WL7 @ r12 = window_l[7] |
680 | movs r10, r10, lsr #28 |
681 | adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6 |
682 | |
683 | smull r10, lr, r12, r8 @ r10..lr = (window_l[7] * x[7]) |
684 | ldr r12, =WL8 @ r12 = window_l[8] |
685 | movs r10, r10, lsr #28 |
686 | adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7 |
687 | |
688 | smull r10, lr, r12, r9 @ r10..lr = (window_l[8] * x[8]) |
689 | movs r10, r10, lsr #28 |
690 | adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8 |
691 | |
692 | stmia r1, { r0, r2 - r9 } @ store windowed x[0] .. x[8] |
693 | |
694 | cmp r11, #BLOCK_MODE_START |
695 | beq start_block_x18_to_x35 |
696 | |
697 | |
698 | @---- |
699 | |
700 | |
701 | normal_block_x18_to_x35: |
702 | |
703 | ldr r11, =WL3 @ r11 = window_l[3] |
704 | ldr r12, =WL4 @ r12 = window_l[4] |
705 | |
706 | add r1, r1, #(18*4) @ r1 = &x[18] |
707 | |
708 | ldmia r1!, { r0, r2 - r4, r6 - r10 } @ load 9 words from x18, update pointer |
709 | |
710 | @ r0 = x18 |
711 | @ r1 = &x[27] |
712 | @ r2 = x19 |
713 | @ r3 = x20 |
714 | @ r4 = x21 |
715 | @ r5 = . |
716 | @ r6 = x22 |
717 | @ r7 = x23 |
718 | @ r8 = x24 |
719 | @ r9 = x25 |
720 | @ r10 = x26 |
721 | @ r11 = window_l[3] |
722 | @ r12 = window_l[4] |
723 | @ lr = . |
724 | |
725 | smull r5, lr, r12, r6 @ r5..lr = (window_l[4] * (x[22] == x[31])) |
726 | movs r5, r5, lsr #28 |
727 | adc r5, r5, lr, lsl #4 @ r5 = bits[59..28] of windowed x31 |
728 | |
729 | smull r6, lr, r11, r4 @ r5..lr = (window_l[3] * (x[21] == x[32])) |
730 | ldr r12, =WL5 @ r12 = window_l[5] |
731 | movs r6, r6, lsr #28 |
732 | adc r6, r6, lr, lsl #4 @ r6 = bits[59..28] of windowed x32 |
733 | |
734 | smull r4, lr, r12, r7 @ r4..lr = (window_l[5] * (x[23] == x[30])) |
735 | ldr r11, =WL1 @ r11 = window_l[1] |
736 | ldr r12, =WL2 @ r12 = window_l[2] |
737 | movs r4, r4, lsr #28 |
738 | adc r4, r4, lr, lsl #4 @ r4 = bits[59..28] of windowed x30 |
739 | |
740 | smull r7, lr, r12, r3 @ r7..lr = (window_l[2] * (x[20] == x[33])) |
741 | ldr r12, =WL6 @ r12 = window_l[6] |
742 | movs r7, r7, lsr #28 |
743 | adc r7, r7, lr, lsl #4 @ r7 = bits[59..28] of windowed x33 |
744 | |
745 | smull r3, lr, r12, r8 @ r3..lr = (window_l[6] * (x[24] == x[29])) |
746 | movs r3, r3, lsr #28 |
747 | adc r3, r3, lr, lsl #4 @ r3 = bits[59..28] of windowed x29 |
748 | |
749 | smull r8, lr, r11, r2 @ r7..lr = (window_l[1] * (x[19] == x[34])) |
750 | ldr r12, =WL7 @ r12 = window_l[7] |
751 | ldr r11, =WL8 @ r11 = window_l[8] |
752 | movs r8, r8, lsr #28 |
753 | adc r8, r8, lr, lsl #4 @ r8 = bits[59..28] of windowed x34 |
754 | |
755 | smull r2, lr, r12, r9 @ r7..lr = (window_l[7] * (x[25] == x[28])) |
756 | ldr r12, =WL0 @ r12 = window_l[0] |
757 | movs r2, r2, lsr #28 |
758 | adc r2, r2, lr, lsl #4 @ r2 = bits[59..28] of windowed x28 |
759 | |
760 | smull r9, lr, r12, r0 @ r3..lr = (window_l[0] * (x[18] == x[35])) |
761 | movs r9, r9, lsr #28 |
762 | adc r9, r9, lr, lsl #4 @ r9 = bits[59..28] of windowed x35 |
763 | |
764 | smull r0, lr, r11, r10 @ r7..lr = (window_l[8] * (x[26] == x[27])) |
765 | ldr r11, =WL16 @ r11 = window_l[16] |
766 | ldr r12, =WL17 @ r12 = window_l[17] |
767 | movs r0, r0, lsr #28 |
768 | adc r0, r0, lr, lsl #4 @ r0 = bits[59..28] of windowed x27 |
769 | |
770 | |
771 | stmia r1, { r0, r2 - r9 } @ store windowed x[27] .. x[35] |
772 | ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x18 |
773 | |
774 | |
775 | smull r10, lr, r12, r0 @ r10..lr = (window_l[17] * x[18]) |
776 | movs r10, r10, lsr #28 |
777 | adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0 |
778 | |
779 | smull r10, lr, r11, r2 @ r10..lr = (window_l[16] * x[19]) |
780 | ldr r11, =WL14 @ r11 = window_l[14] |
781 | ldr r12, =WL15 @ r12 = window_l[15] |
782 | movs r10, r10, lsr #28 |
783 | adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1 |
784 | |
785 | smull r10, lr, r12, r3 @ r10..lr = (window_l[15] * x[20]) |
786 | movs r10, r10, lsr #28 |
787 | adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2 |
788 | |
789 | smull r10, lr, r11, r4 @ r10..lr = (window_l[14] * x[21]) |
790 | ldr r11, =WL12 @ r11 = window_l[12] |
791 | ldr r12, =WL13 @ r12 = window_l[13] |
792 | movs r10, r10, lsr #28 |
793 | adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3 |
794 | |
795 | smull r10, lr, r12, r5 @ r10..lr = (window_l[13] * x[22]) |
796 | movs r10, r10, lsr #28 |
797 | adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4 |
798 | |
799 | smull r10, lr, r11, r6 @ r10..lr = (window_l[12] * x[23]) |
800 | ldr r11, =WL10 @ r12 = window_l[10] |
801 | ldr r12, =WL11 @ r12 = window_l[11] |
802 | movs r10, r10, lsr #28 |
803 | adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5 |
804 | |
805 | smull r10, lr, r12, r7 @ r10..lr = (window_l[11] * x[24]) |
806 | movs r10, r10, lsr #28 |
807 | adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6 |
808 | |
809 | smull r10, lr, r11, r8 @ r10..lr = (window_l[10] * x[25]) |
810 | ldr r12, =WL9 @ r12 = window_l[9] |
811 | movs r10, r10, lsr #28 |
812 | adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7 |
813 | |
814 | smull r10, lr, r12, r9 @ r10..lr = (window_l[9] * x[26]) |
815 | |
816 | movs r10, r10, lsr #28 |
817 | adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8 |
818 | |
819 | stmia r1, { r0, r2 - r9 } @ store windowed x[18] .. x[26] |
820 | |
821 | @---- |
822 | @ NB there are 2 possible exits from this function - this is only one of them |
823 | @---- |
824 | |
825 | add sp, sp, #(21*4) @ return stack frame |
826 | ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return |
827 | |
828 | @---- |
829 | |
830 | |
831 | stop_block_x0_to_x17: |
832 | |
833 | @ r0 = x0 |
834 | @ r1 = &x[9] |
835 | @ r2 = x1 |
836 | @ r3 = x2 |
837 | @ r4 = x3 |
838 | @ r5 = x4 |
839 | @ r6 = x5 |
840 | @ r7 = x6 |
841 | @ r8 = x7 |
842 | @ r9 = x8 |
843 | @ r10 = -x0 |
844 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) |
845 | @ r12 = . |
846 | @ lr = . |
847 | |
848 | rsb r0, r6, #0 @ r0 = -x5 |
849 | rsb r6, r2, #0 @ r6 = -x1 |
850 | rsb r2, r5, #0 @ r2 = -x4 |
851 | rsb r5, r3, #0 @ r5 = -x2 |
852 | rsb r3, r4, #0 @ r3 = -x3 |
853 | |
854 | add r1, r1, #(3*4) @ r1 = &x[12] |
855 | stmia r1, { r0, r2, r3, r5, r6, r10 } @ store unchanged x[12] .. x[17] |
856 | |
857 | ldr r0, =WL1 @ r0 = window_l[1] == window_s[0] |
858 | |
859 | rsb r10, r9, #0 @ r10 = -x8 |
860 | rsb r12, r8, #0 @ r12 = -x7 |
861 | rsb lr, r7, #0 @ lr = -x6 |
862 | |
863 | @ r0 = WL1 |
864 | @ r1 = &x[12] |
865 | @ r2 = . |
866 | @ r3 = . |
867 | @ r4 = . |
868 | @ r5 = . |
869 | @ r6 = . |
870 | @ r7 = x6 |
871 | @ r8 = x7 |
872 | @ r9 = x8 |
873 | @ r10 = -x8 |
874 | @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block) |
875 | @ r12 = -x7 |
876 | @ lr = -x6 |
877 | |
878 | smull r5, r6, r0, r7 @ r5..r6 = (window_l[1] * x[6]) |
879 | ldr r2, =WL4 @ r2 = window_l[4] == window_s[1] |
880 | movs r5, r5, lsr #28 |
881 | adc r7, r5, r6, lsl #4 @ r7 = bits[59..28] of windowed x6 |
882 | |
883 | smull r5, r6, r2, r8 @ r5..r6 = (window_l[4] * x[7]) |
884 | ldr r3, =WL7 @ r3 = window_l[7] == window_s[2] |
885 | movs r5, r5, lsr #28 |
886 | adc r8, r5, r6, lsl #4 @ r8 = bits[59..28] of windowed x7 |
887 | |
888 | smull r5, r6, r3, r9 @ r5..r6 = (window_l[7] * x[8]) |
889 | ldr r4, =WL10 @ r4 = window_l[10] == window_s[3] |
890 | movs r5, r5, lsr #28 |
891 | adc r9, r5, r6, lsl #4 @ r9 = bits[59..28] of windowed x8 |
892 | |
893 | smull r5, r6, r4, r10 @ r5..r6 = (window_l[10] * (x[9] == -x[8])) |
894 | ldr r0, =WL13 @ r0 = window_l[13] == window_s[4] |
895 | movs r5, r5, lsr #28 |
896 | adc r10, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9 |
897 | |
898 | smull r5, r6, r0, r12 @ r5..r6 = (window_l[13] * (x[10] == -x[7])) |
899 | ldr r2, =WL16 @ r2 = window_l[16] == window_s[5] |
900 | movs r5, r5, lsr #28 |
901 | adc r12, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9 |
902 | |
903 | smull r5, r6, r2, lr @ r5..r6 = (window_l[16] * (x[11] == -x[6])) |
904 | |
905 | ldr r0, =0x00 |
906 | |
907 | movs r5, r5, lsr #28 |
908 | adc lr, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9 |
909 | |
910 | stmdb r1!, { r7 - r10, r12, lr } @ store windowed x[6] .. x[11] |
911 | |
912 | ldr r5, =0x00 |
913 | ldr r6, =0x00 |
914 | ldr r2, =0x00 |
915 | ldr r3, =0x00 |
916 | ldr r4, =0x00 |
917 | |
918 | stmdb r1!, { r0, r2 - r6 } @ store windowed x[0] .. x[5] |
919 | |
920 | b normal_block_x18_to_x35 |
921 | |
922 | |
923 | @---- |
924 | |
925 | |
926 | start_block_x18_to_x35: |
927 | |
928 | ldr r4, =WL1 @ r0 = window_l[1] == window_s[0] |
929 | |
930 | add r1, r1, #(24*4) @ r1 = &x[24] |
931 | |
932 | ldmia r1, { r0, r2, r3 } @ load 3 words from x24, dont update pointer |
933 | |
934 | @ r0 = x24 |
935 | @ r1 = &x[24] |
936 | @ r2 = x25 |
937 | @ r3 = x26 |
938 | @ r4 = WL1 |
939 | @ r5 = WL4 |
940 | @ r6 = WL7 |
941 | @ r7 = WL10 |
942 | @ r8 = WL13 |
943 | @ r9 = WL16 |
944 | @ r10 = . |
945 | @ r11 = . |
946 | @ r12 = . |
947 | @ lr = . |
948 | |
949 | ldr r5, =WL4 @ r5 = window_l[4] == window_s[1] |
950 | |
951 | smull r10, r11, r4, r0 @ r10..r11 = (window_l[1] * (x[24] == x[29])) |
952 | ldr r6, =WL7 @ r6 = window_l[7] == window_s[2] |
953 | movs r10, r10, lsr #28 |
954 | adc lr, r10, r11, lsl #4 @ lr = bits[59..28] of windowed x29 |
955 | |
956 | smull r10, r11, r5, r2 @ r10..r11 = (window_l[4] * (x[25] == x[28])) |
957 | ldr r7, =WL10 @ r7 = window_l[10] == window_s[3] |
958 | movs r10, r10, lsr #28 |
959 | adc r12, r10, r11, lsl #4 @ r12 = bits[59..28] of windowed x28 |
960 | |
961 | smull r10, r11, r6, r3 @ r10..r11 = (window_l[7] * (x[26] == x[27])) |
962 | ldr r8, =WL13 @ r8 = window_l[13] == window_s[4] |
963 | movs r10, r10, lsr #28 |
964 | adc r4, r10, r11, lsl #4 @ r4 = bits[59..28] of windowed x27 |
965 | |
966 | smull r10, r11, r7, r3 @ r10..r11 = (window_l[10] * x[26]) |
967 | ldr r9, =WL16 @ r9 = window_l[16] == window_s[5] |
968 | movs r10, r10, lsr #28 |
969 | adc r3, r10, r11, lsl #4 @ r3 = bits[59..28] of windowed x26 |
970 | |
971 | smull r10, r11, r8, r2 @ r10..r11 = (window_l[13] * x[25]) |
972 | ldr r5, =0x00 |
973 | movs r10, r10, lsr #28 |
974 | adc r2, r10, r11, lsl #4 @ r2 = bits[59..28] of windowed x25 |
975 | |
976 | smull r10, r11, r9, r0 @ r10..r11 = (window_l[16] * x[24]) |
977 | ldr r6, =0x00 |
978 | movs r10, r10, lsr #28 |
979 | adc r0, r10, r11, lsl #4 @ r0 = bits[59..28] of windowed x24 |
980 | |
981 | stmia r1!, { r0, r2, r3, r4, r12, lr } @ store windowed x[24] .. x[29] |
982 | |
983 | ldr r7, =0x00 |
984 | ldr r8, =0x00 |
985 | ldr r9, =0x00 |
986 | ldr r10, =0x00 |
987 | |
988 | stmia r1!, { r5 - r10 } @ store windowed x[30] .. x[35] |
989 | |
990 | @---- |
991 | @ NB there are 2 possible exits from this function - this is only one of them |
992 | @---- |
993 | |
994 | add sp, sp, #(21*4) @ return stack frame |
995 | ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return |
996 | |
997 | @---- |
998 | @END |
999 | @---- |
1000 | |
1001 |