1 MODULE ARM_MEMORY
2
3 PUBLIC ARM_MEMCPY
4 PUBLIC ARM_MEMSET
5 PUBLIC ARM_MEMSET8
6 PUBLIC ARM_MEMSET16
7 PUBLIC ARM_MEMSET32
8
9 SECTION .text:CODE:NOROOT(2)
10 CODE32
11
12 ;-------------------------------------------------------------------------------
13 ; void ARM_MEMCPY(void* pDest, void* pSrc, U32 NumBytes)
14 ;
15 ; Function description
16 ; Copy data in memory from source address to destination address.
17 ;
18 ; Register usage:
19 ;
20 ; R0 pDest
21 ; R1 pSrc
22 ; R2 NumBytes
23 ;
24 ; R3 Used for data transfers
25 ; R4 Used for data transfers
26 ; R12 Used for data transfers
27 ; R14 Used for data transfers
28 ;
29 ; R13 SP
30 ; R14 LR (contains return address)
31 ; R15 PC
32 ;
33 ;-------------------------------------------------------------------------------
34 ARM_MEMCPY:
35 ;-------------------------------------------------------------------------------
36 cmp R2, #+3 ; R2 = NumBytes
37 bls ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer
38
39 ands R12, R0, #+3 ; R0 = destination address
40 beq ARM_MEMCPY_DestIsDWordAligned ; Is destination address already word aligned ?
41
42 ;-------------------------------------------------------------------------------
43 ; Handle as much bytes as necessary to align destination address
44 ;
45 ldrb R3, [R1], #+1 ; We need at least one byte to the next word alignment, so we read one.
46 cmp R12, #+2 ; Set condition codes according to the mis-alignment
47 add R2, R2, R12 ; Adjust NumBytes : 1, 2, 3
48 ldrbls R12, [R1], #+1 ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
49 strb R3, [R0], #+1
50 ldrbcc R3, [R1], #+1 ; Carry clear (CC)? -> We need one more byte
51 strbls R12, [R0], #+1
52 sub R2, R2, #+4 ; Adjust NumBytes
53 strbcc R3, [R0], #+1 ; now destination address already is word aligned
54
55 ;-------------------------------------------------------------------------------
56 ; Choose best way to transfer data
57 ;
58 ARM_MEMCPY_DestIsDWordAligned:
59 ands R3, R1, #+3
60 beq ARM_MEMCPY_HandleBulkWordData ; If source and destination are aligned, use bulk word transfer
61
62 subs R2, R2, #+4
63 bcc ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word left, use single byte transfer
64
65 ldr R12, [R1, -R3]! ; Read first mis-aligned data word and word align source address
66 cmp R3, #+2
67 beq ARM_MEMCPY_Loop16BitShift
68
69 bhi ARM_MEMCPY_Loop24BitShift
70
71 ;-------------------------------------------------------------------------------
72 ; Handle data in units of word
73 ;
74 ; This is done by reading mis-aligned words from source address and
75 ; shift them into the right alignment. After this the next data word
76 ; will be read to complete the missing data part.
77 ;
78 ARM_MEMCPY_Loop8BitShift:
79 mov R3, R12, LSR #+8 ; Shift data word into right position
80 ldr R12, [R1, #+4]! ; Load next mis-aligned data word
81 subs R2, R2, #+4 ; Decrement NumBytes
82 orr R3, R3, R12, LSL #+24 ; Combine missing part of data to build full data word
83 str R3, [R0], #+4 ; Store complete word
84 bcs ARM_MEMCPY_Loop8BitShift
85
86 add R1, R1, #+1 ; Adjust source address
87 b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes
88
89 ARM_MEMCPY_Loop16BitShift:
90 mov R3, R12, LSR #+16 ; Shift data word into right position
91 ldr R12, [R1, #+4]! ; Load next mis-aligned data word
92 subs R2, R2, #+4 ; Decrement NumBytes
93 orr R3, R3, R12, LSL #+16 ; Combine missing part of data to build full data word
94 str R3, [R0], #+4 ; Store complete word
95 bcs ARM_MEMCPY_Loop16BitShift
96
97 add R1, R1, #+2 ; Adjust source address
98 b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes
99
100 ARM_MEMCPY_Loop24BitShift:
101 mov R3, R12, LSR #+24 ; Shift data word into right position
102 ldr R12, [R1, #+4]! ; Load next mis-aligned data word
103 subs R2, R2, #+4 ; Decrement NumBytes
104 orr R3, R3, R12, LSL #+8 ; Combine missing part of data to build full data word
105 str R3, [R0], #+4 ; Store complete word
106 bcs ARM_MEMCPY_Loop24BitShift
107
108 add R1, R1, #+3 ; Adjust source address
109 b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes
110
111 ;-------------------------------------------------------------------------------
112 ; Handle large bulk data in blocks of 8 words (32 bytes)
113 ;
114 ARM_MEMCPY_HandleBulkWordData:
115 subs R2, R2, #+0x20
116 stmdb SP!, {R4, LR}
117 bcc ARM_MEMCPY_HandleTrailingWords
118
119 ARM_MEMCPY_LoopHandleBulkWord:
120 ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
121 stm R0!, {R3, R4, R12, LR}
122 ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
123 stm R0!, {R3, R4, R12, LR}
124 subs R2, R2, #+0x20
125 bcs ARM_MEMCPY_LoopHandleBulkWord
126
127 ;-------------------------------------------------------------------------------
128 ; Handle trailing 7 words
129 ;
130 ARM_MEMCPY_HandleTrailingWords:
131 movs R12, R2, LSL #28 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
132
133 ldmcs R1!, {R3, R4, R12, LR} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
134 stmcs R0!, {R3, R4, R12, LR}
135 ldmmi R1!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
136 stmmi R0!, {R3, R4}
137
138 movs R12, R2, LSL #+30 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
139
140 ldmia SP!, {R4, LR}
141 ldrcs R3, [R1], #+4 ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
142 strcs R3, [R0], #+4
143 bxeq LR
144
145 ;-------------------------------------------------------------------------------
146 ; Handle trailing 3 bytes
147 ;
148 ; N Z C V Q ***** I F T M4 3 2 1 0
149 ; N = bit[31]
150 ; C = last shift bit : shift
151 ; C = 1 ADD/CMN has carry bit
152 ; C = 0 SUB/CMP no borrow bit
153 ; xxxxxxxxxxxxxxxxxxxx10 << 31 : N=0, C=1
154 ; xxxxxxxxxxxxxxxxxxxx01 << 31 : N=1, C=0
155 ; BMI : N=1
156 ; BCS : C=1
157 ARM_MEMCPY_HandleTrailingBytes:
158 movs R2, R2, LSL #+31 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
159
160 ldrbmi R2, [R1], #+1 ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
161 ldrbcs R3, [R1], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
162 ldrbcs R12, [R1], #+1
163 strbmi R2, [R0], #+1
164 strbcs R3, [R0], #+1
165 strbcs R12, [R0], #+1
166 bx LR
167
168
169 ;-------------------------------------------------------------------------------
170 ; void ARM_MEMSET(void* pDest, U32 c, U32 NumBytes)
171 ;
172 ; Function description
173 ; Copy data in memory from source address to destination address.
174 ;
175 ; Register usage:
176 ;
177 ; R0 pDest
178 ; R1 c
179 ; R2 NumBytes
180 ;
181 ; R3 Used for data transfers
182 ; R4 Used for data transfers
183 ; R5 Used for data transfers
184 ; R6 Used for data transfers
185 ;
186 ; R13 SP
187 ; R14 LR (contains return address)
188 ; R15 PC
189 ;
190 ;-------------------------------------------------------------------------------
191 ARM_MEMSET:
192 ;-------------------------------------------------------------------------------
193 orr R1, R1, R1, LSL #+8
194 orr R1, R1, R1, LSL #+16
195
196 cmp R2, #+3 ; R2 = NumBytes
197 bls ARM_MEMSET_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer
198
199 ands R3, R0, #+3 ; R0 = destination address
200 beq ARM_MEMSET_DestIsAligned ; Is destination address already word aligned ?
201
202 ; Handle as much bytes as necessary to align destination address
203
204 strb R1, [R0], #+1 ; We need at least one byte to the next word alignment, so we read one.
205 cmp R3, #+2 ; Set condition codes according to the mis-alignment
206 add R2, R2, R3 ; Adjust NumBytes
207 strbls R1, [R0], #+1 ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
208 sub R2, R2, #+4 ; Adjust NumBytes
209 strbcc R1, [R0], #+1 ; Carry clear (CC)? -> We need one more byte
210
211 ; Choose best way to transfer data
212
213 ARM_MEMSET_DestIsAligned: ; destination is aligned, use bulk word transfer
214
215 ; Handle large bulk data in blocks of 8 words (32 bytes)
216
217 ARM_MEMSET_HandleBulkWordData:
218 stmdb SP!, {R4, R5, R6}
219
220 mov R3, R1, LSL #+0 ; Transfer 16 bytes at once
221 mov R4, R1, LSL #+0
222 mov R5, R1, LSL #+0
223
224 subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
225 bcc ARM_MEMSET_HandleTrailingWords
226
227 ARM_MEMSET_LoopHandleBulkWord:
228 stm R0!, {R1, R3, R4, R5}
229 stm R0!, {R1, R3, R4, R5}
230 subs R2, R2, #+0x20
231 bcs ARM_MEMSET_LoopHandleBulkWord
232
233
234 ; Handle trailing 7 words
235
236 ARM_MEMSET_HandleTrailingWords:
237 movs R6, R2, LSL #28 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
238 stmcs R0!, {R1, R3, R4, R5} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
239 stmmi R0!, {R1, R3} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
240
241 movs R6, R2, LSL #+30 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
242 strcs R1, [R0], #+4 ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
243
244 ldmia SP!, {R4, R5, R6}
245 bxeq LR ; Z flag contain no Trailing Bytes
246
247
248 ; Handle trailing 3 bytes
249
250 ARM_MEMSET_HandleTrailingBytes:
251 movs R2, R2, LSL #+31 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
252 strbmi R1, [R0], #+1 ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
253 strbcs R1, [R0], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
254 strbcs R1, [R0], #+1
255 bx LR
256
257
258 ; int ARM_MEMSET8(void* pDest, U32 c, U32 NumBytes);
259 ;-------------------------------------------------------------------------------
260 ARM_MEMSET8:
261 ;-------------------------------------------------------------------------------
262 stmdb SP!, {R4, R5}
263 cmp R2, #4
264 blt ARM_MEMSET8_loop3
265
266 ; Alignment is unknown
267 tst R0, #1
268 strneb R1, [R0], #1
269 subne R2, R2, #1
270
271 ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
272 orr R1, R1, R1, LSL #8
273 tst R0, #2
274 strneh R1, [R0], #2
275 subne R2, R2, #2
276
277 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
278 orr R1, R1, R1, LSL #16
279 mov R3, R1
280 cmp R2, #16
281 blt ARM_MEMSET8_loop2
282 tst R0, #4
283 strne R1, [R0], #4
284 subne R2, R2, #4
285 tst R0, #8
286 stmneia R0!, {R1, R3}
287 subne R2, R2, #8
288
289 ; Now we are 128-bit aligned
290 mov R4, R1
291 mov R5, R1
292 ARM_MEMSET8_loop1:
293 ; Copy 4 32-bit values per loop iteration
294 subs R2, R2, #16
295 stmgeia R0!, {R1, R3, R4, R5}
296 bge ARM_MEMSET8_loop1
297 add R2, R2, #16
298
299 ARM_MEMSET8_loop2:
300 ; Copy up to 3 remaining 32-bit values
301 tst R2, #8
302 stmneia R0!, {R1, R3}
303 tst R2, #4
304 strne R1, [R0], #4
305 and R2, R2, #3
306
307 ARM_MEMSET8_loop3:
308 ; Copy up to 3 remaining bytes
309 subs R2, R2, #1
310 strgeb R1, [R0], #1
311 subs R2, R2, #1
312 strgeb R1, [R0], #1
313 subs R2, R2, #1
314 strgeb R1, [R0], #1
315 ldmia SP!, {R4, R5}
316 bx LR
317
318 ; int ARM_MEMSET16(void* pDest, U32 c, U32 NumHalfWords);
319 ;-------------------------------------------------------------------------------
320 ARM_MEMSET16:
321 ;-------------------------------------------------------------------------------
322 stmdb SP!, {R4, R5}
323
324 cmp R2, #2
325 blt ARM_MEMSET16_HandleTrailingHalfWord ; 1 or 0
326
327 ; Alignment is known to be at least 16-bit
328 tst R0, #2
329 strneh R1, [R0], #2 ; xxxx-xx10 --->
330 subne R2, R2, #1 ; xxxx-xx00
331
332 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
333 orr R1, R1, R1, LSL #16
334 mov R4, R1
335
336 cmp R2, #8
337 blt ARM_MEMSET16_HandleTrailingWords ; 7, 6, ... 0
338
339 tst R0, #4
340 strne R1, [R0], #4 ; xxxx-x100 --->
341 subne R2, R2, #2 ; xxxx-x000 --->
342
343 ; Now we are 64-bit aligned
344 tst R0, #8
345 stmneia R0!, {R1, R4} ; xxxx-1000 --->
346 subne R2, R2, #4 ; xxxx-0000 --->
347
348 ARM_MEMSET16_HandleBulkWordData:
349 ; Now we are 128-bit aligned
350 mov R5, R1
351 mov R3, R1
352
353 ARM_MEMSET16_LoopHandleBulkWord:
354 ; Copy 4 32-bit values per loop iteration
355 subs R2, R2, #8
356 stmgeia R0!, {R1, R3, R4, R5}
357 bge ARM_MEMSET16_LoopHandleBulkWord
358 add R2, R2, #8
359
360 ARM_MEMSET16_HandleTrailingWords:
361 ; Copy up to 3 remaining 32-bit values
362 tst R2, #4
363 stmneia R0!, {R1, R4}
364
365 tst R2, #2
366 strne R1, [R0], #4
367
368 and R2, R2, #1
369
370 ARM_MEMSET16_HandleTrailingHalfWord:
371 ; Copy up to 1 remaining 16-bit value
372 subs R2, R2, #1
373 strgeh R1, [R0], #2
374
375 ldmia SP!, {R4, R5}
376 bx LR
377
378
379 ; int ARM_MEMSET32(void* pDest, U32 c, U32 NumWords);
380 ;-------------------------------------------------------------------------------
381 ARM_MEMSET32:
382 ;-------------------------------------------------------------------------------
383 stmdb SP!, {R4, R5}
384
385 cmp R2, #4
386 blt ARM_MEMSET32_loop2
387
388 ; Alignment is known to be at least 32-bit
389 mov R3, R1
390
391 tst R0, #4
392 strne R1, [R0], #4
393 subne R2, R2, #1
394
395 ; Now we are 64-bit aligned
396 tst R0, #8
397 stmneia R0!, {R1, R3}
398 subne R2, R2, #2
399
400 ; Now we are 128-bit aligned
401 mov R4, R1
402 mov R5, R1
403 ARM_MEMSET32_loop1:
404 ; Copy 4 32-bit values per loop iteration
405 subs R2, R2, #4
406 stmgeia R0!, {R1, R3, R4, R5}
407 bge ARM_MEMSET32_loop1
408 add R2, R2, #4
409
410 ARM_MEMSET32_loop2:
411 ; Copy up to 3 remaining 32-bit values
412 subs R2, R2, #1
413 strge R1, [R0], #4
414 subs R2, R2, #1
415 strge R1, [R0], #4
416 subs R2, R2, #1
417 strge R1, [R0], #4
418
419 ldmia SP!, {R4, R5}
420 bx LR
421
422 ;-__arm void ARM_memxor(void* pDest, U32 c, U32 NumBytes);
423 ; r0 r1 r2
424 ;-------------------------------------------------------------------------------
425 arm_memxor:
426 ;-------------------------------------------------------------------------------
427 orr R1, R1, R1, LSL #+8
428 orr R1, R1, R1, LSL #+16
429
430 cmp R2, #+3 ; R2 = NumBytes
431 bls arm_memxor_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer
432
433 ands R3, R0, #+3 ; R0 = destination address
434 beq arm_memxor_DestIsAligned ; Is destination address already word aligned ?
435
436 ;-
437 ; Handle as much bytes as necessary to align destination address
438 ;-
439 ldrb R12, [R0], #+0 ; We need at least one byte to the next word alignment, so we read one.
440 eor R12, R12, r1
441 strb R12, [R0], #+1 ; We need at least one byte to the next word alignment, so we read one.
442
443 cmp R3, #+2 ; Set condition codes according to the mis-alignment
444 add R2, R2, R3 ; Adjust NumBytes
445
446 ldrbls R3, [R0], #+0 ; We need at least one byte to the next word alignment, so we read one.
447 eorls R3, R3, r1
448 strbls R3, [R0], #+1 ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
449
450 sub R2, R2, #+4 ; Adjust NumBytes
451
452 ldrbcc R3, [R0], #+0 ; We need at least one byte to the next word alignment, so we read one.
453 eorcc R3, R3, r1
454 strbcc R3, [R0], #+1 ; Carry clear (CC)? -> We need one more byte
455
456 ;-
457 ; Choose best way to transfer data
458 ;-
459 arm_memxor_DestIsAligned: ; destination is aligned, use bulk word transfer
460 ;-
461 ; Handle large bulk data in blocks of 8 words (32 bytes)
462 ;-
463 arm_memxor_HandleBulkWordData:
464 stmdb SP!, {R4, R5, R6, R7}
465
466 subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
467 bcc arm_memxor_HandleTrailingWords
468
469 arm_memxor_LoopHandleBulkWord:
470 ldm R0, {R3, R4, R5, R6}
471 eor r3, r3, r1
472 eor r4, r4, r1
473 eor r5, r5, r1
474 eor r6, r6, r1
475 stm R0!, {R3, R4, R5, R6}
476
477 ldm R0, {R3, R4, R5, R6}
478 eor r3, r3, r1
479 eor r4, r4, r1
480 eor r5, r5, r1
481 eor r6, r6, r1
482 stm R0!, {R3, R4, R5, R6}
483
484 subs R2, R2, #+0x20
485 bcs arm_memxor_LoopHandleBulkWord
486
487 ;-
488 ; Handle trailing 7 words
489 ;-
490 arm_memxor_HandleTrailingWords:
491 movs R7, R2, LSL #28 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
492
493 ldmcs R0, {R3, R4, R5, R6}
494 eorcs r3, r3, r1
495 eorcs r4, r4, r1
496 eorcs r5, r5, r1
497 eorcs r6, r6, r1
498 stmcs R0!, {R3, R4, R5, R6} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is xor)
499
500 ldmmi R0, {R3, R4}
501 eormi r3, r3, r1
502 eormi r4, r4, r1
503 stmmi R0!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is xor)
504
505 movs R7, R2, LSL #+30 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
506
507 ldrcs R3, [R0]
508 eorcs r3, r3, r1
509 strcs R3, [R0], #+4 ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is xor)
510
511 ldmia SP!, {R4, R5, R6, R7}
512 bxeq LR ; Z flag contain no Trailing Bytes
513
514 ;-
515 ; Handle trailing 3 bytes
516 ;-
517 arm_memxor_HandleTrailingBytes:
518 movs R2, R2, LSL #+31 ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
519
520 ldrmi R2, [R0]
521 eormi R2, R2, r1
522 strbmi R2, [R0], #+1 ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is xor)
523
524 ldrcs R2, [R0]
525 eorcs R2, R2, r1
526 strbcs R2, [R0], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor)
527
528 ldrcs R2, [R0]
529 eorcs R2, R2, r1
530 strbcs R2, [R0], #+1 ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor)
531
532 bx LR
533
534 ;-__arm int arm_memxor8(void* pDest, U32 c, U32 NumBytes);
535 ; r0 r1 r2
536 ;-------------------------------------------------------------------------------
537 arm_memxor8:
538 ;-------------------------------------------------------------------------------
539 stmdb SP!, {R4, R5, R6}
540
541 orr R1, R1, R1, LSL #+8
542 orr R1, R1, R1, LSL #+16
543
544 cmp R2, #4
545 blt arm_memxor8_loop3
546
547 ; Alignment is unknown
548 tst R0, #1
549
550 ldrneb R6, [R0]
551 eorne R6, r6, R1
552 strneb R6, [R0], #1
553
554 subne R2, R2, #1
555
556 ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
557 tst R0, #2
558
559 ldrneh R6, [R0]
560 eorne R6, r6, R1
561 strneh R6, [R0], #2
562
563 subne R2, R2, #2
564
565 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
566 cmp R2, #16
567 blt arm_memxor8_loop2
568 tst R0, #4
569
570 ldrne R6, [R0]
571 eorne R6, r6, R1
572 strne R6, [R0], #4
573 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
574 subne R2, R2, #4
575 tst R0, #8
576
577 ldmneia R0, {R3, R6}
578 eorne R3, r3, R1
579 eorne R6, r6, R1
580 stmneia R0!, {R3, R6}
581
582 subne R2, R2, #8
583
584 ; Now we are 128-bit aligned
585 mov R4, R1
586 mov R5, R1
587 arm_memxor8_loop1:
588 ; Copy 4 32-bit values per loop iteration
589 subs R2, R2, #16
590
591 ldmgeia R0, {R3, R4, R5, R6}
592 eorge r3, r3, r1
593 eorge r4, r4, r1
594 eorge r5, r5, r1
595 eorge r6, r6, r1
596 stmgeia R0!, {R3, R4, R5, R6}
597
598 bge arm_memxor8_loop1
599 add R2, R2, #16
600
601 arm_memxor8_loop2:
602 ; Copy up to 3 remaining 32-bit values
603 tst R2, #8
604
605 ldmneia R0, {R3, R4}
606 eorne r3, r3, r1
607 eorne r4, r4, r1
608 stmneia R0!, {R3, R4}
609
610 tst R2, #4
611
612 ldrne R3, [R0]
613 eorne r3, r3, r1
614 strne R3, [R0], #4
615
616 and R2, R2, #3
617
618 arm_memxor8_loop3:
619 ; Copy up to 3 remaining bytes
620 subs R2, R2, #1
621
622 ldrgeb R3, [R0]
623 eorge r3, r3, r1
624 strgeb R3, [R0], #1
625
626 subs R2, R2, #1
627
628 ldrgeb R3, [R0]
629 eorge r3, r3, r1
630 strgeb R1, [R0], #1
631
632 subs R2, R2, #1
633
634 ldrgeb R3, [R0]
635 eorge r3, r3, r1
636 strgeb R1, [R0], #1
637
638 ldmia SP!, {R4, R5, R6}
639 bx LR
640
641 ;-__arm int arm_memxor16(void* pDest, U32 c, U32 NumHalfWords);
642 ; r0 r1 r2
643 ;-------------------------------------------------------------------------------
644 arm_memxor16:
645 ;-------------------------------------------------------------------------------
646 stmdb SP!, {R4, R5, R6}
647 orr R1, R1, R1, LSL #+16
648
649 cmp R2, #2
650 blt arm_memxor16_HandleTrailingHalfWord ; 1 or 0
651
652 ; Alignment is known to be at least 16-bit
653 tst R0, #2
654
655 ldrneh R6, [R0]
656 eorne R6, r6, R1
657 strneh R6, [R0], #2 ; xxxx-xx10 --->
658
659 subne R2, R2, #1 ; xxxx-xx00
660
661 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
662 cmp R2, #8
663 blt arm_memxor16_HandleTrailingWords ; 7, 6, ... 0
664
665 tst R0, #4
666
667 ldrne R3, [R0]
668 eorne r3, r3, r1
669 strne R3, [R0], #4 ; xxxx-x100 --->
670
671
672 subne R2, R2, #2 ; xxxx-x000 --->
673
674 ; Now we are 64-bit aligned
675 tst R0, #8
676
677 ldmneia R0, {R3, R4}
678 eorne r3, r3, r1
679 eorne r4, r4, r1
680 stmneia R0!, {R3, R4} ; xxxx-1000 --->
681
682 subne R2, R2, #4 ; xxxx-0000 --->
683
684 arm_memxor16_HandleBulkWordData:
685 ; Now we are 128-bit aligned
686 mov R5, R1
687 mov R6, R1
688
689 arm_memxor16_LoopHandleBulkWord:
690 ; Copy 4 32-bit values per loop iteration
691 subs R2, R2, #8
692
693 ldmgeia R0, {R3, R4, R5, R6}
694 eorge r3, r3, r1
695 eorge r4, r4, r1
696 eorge r5, r5, r1
697 eorge r6, r6, r1
698 stmgeia R0!, {R3, R4, R5, R6}
699
700 bge arm_memxor16_LoopHandleBulkWord
701 add R2, R2, #8
702
703 arm_memxor16_HandleTrailingWords:
704 ; Copy up to 3 remaining 32-bit values
705 tst R2, #4
706
707 ldmneia R0, {R3, R4}
708 eorne r3, r3, r1
709 eorne r4, r4, r1
710 stmneia R0!, {R3, R4}
711
712 tst R2, #2
713
714 ldrne R3, [R0]
715 eorne r3, r3, r1
716 strne R3, [R0], #4
717
718 and R2, R2, #1
719
720 arm_memxor16_HandleTrailingHalfWord:
721 ; Copy up to 1 remaining 16-bit value
722 subs R2, R2, #1
723
724 ldrgeh R3, [R0]
725 eorge r3, r3, r1
726 strgeh R3, [R0], #2
727
728 ldmia SP!, {R4, R5, R6}
729 bx LR
730
731
732 ;-__arm int arm_memxor32(void* pDest, U32 c, U32 NumWords);
733 ; r0 r1 r2
734 ;-------------------------------------------------------------------------------
735 arm_memxor32:
736 ;-------------------------------------------------------------------------------
737 stmdb SP!, {R4, R5, R6}
738
739 cmp R2, #4
740 blt arm_memxor32_loop2
741
742 ; Alignment is known to be at least 32-bit, is it 64-bit aligned ?
743 tst R0, #4
744 ; No, it is 32-bit aligned
745 ldrne R3, [R0]
746 eorne R3, r3, R1
747 strne R3, [R0], #4
748 subne R2, R2, #1
749
750 ; Now we are 64-bit aligned, is it 128-bit aligned ?
751 tst R0, #8
752 ; No, it is 64-bit aligned
753 ldmneia R0, {R3, R4}
754 eorne r3, r3, r1
755 eorne r4, r4, r1
756 stmneia R0!, {R3, R4} ; xxxx-1000 --->
757 subne R2, R2, #2
758
759 ; Now we are 128-bit aligned
760 mov R4, R1
761 mov R5, R1
762 arm_memxor32_loop1:
763 ; Copy 4 32-bit values per loop iteration
764 subs R2, R2, #4
765
766 ldmgeia R0, {R3, R4, R5, R6}
767 eorge r3, r3, r1
768 eorge r4, r4, r1
769 eorge r5, r5, r1
770 eorge r6, r6, r1
771 stmgeia R0!, {R3, R4, R5, R6}
772
773 bge arm_memxor32_loop1
774 add R2, R2, #4
775
776 arm_memxor32_loop2:
777 ; Copy up to 3 remaining 32-bit values
778
779 subs R2, R2, #1
780 ldrge R3, [R0]
781 eorge r3, r3, r1
782 strge R3, [R0], #4
783
784 subs R2, R2, #1
785 ldrge R3, [R0]
786 eorge r3, r3, r1
787 strge R3, [R0], #4
788
789 subs R2, R2, #1
790 ldrge R3, [R0]
791 eorge r3, r3, r1
792 strge R3, [R0], #4
793
794 ldmia SP!, {R4, R5, R6}
795 bx LR
796
797
798 END