diff options
Diffstat (limited to 'libc/string/avr32/memcpy.S')
| -rw-r--r-- | libc/string/avr32/memcpy.S | 111 | 
1 files changed, 111 insertions, 0 deletions
diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S new file mode 100644 index 000000000..f95aabd13 --- /dev/null +++ b/libc/string/avr32/memcpy.S @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License.  See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +/* Don't use r12 as dst since we must return it unmodified */ +#define dst r9 +#define src r11 +#define len r10 + +       .text +       .global memcpy +       .type   memcpy, @function +memcpy: +       pref    src[0] +       mov     dst, r12 + +       /* If we have less than 32 bytes, don't do anything fancy */ +       cp.w    len, 32 +       brge    .Lmore_than_31 + +       sub     len, 1 +       retlt   r12 +1:     ld.ub   r8, src++ +       st.b    dst++, r8 +       sub     len, 1 +       brge    1b +       retal   r12 + +.Lmore_than_31: +       pushm   r0-r7, lr + +       /* Check alignment */ +       mov     r8, src +       andl    r8, 31, COH +       brne    .Lunaligned_src +       mov     r8, dst +       andl    r8, 3, COH +       brne    .Lunaligned_dst + +.Laligned_copy: +       sub     len, 32 +       brlt    .Lless_than_32 + +1:     /* Copy 32 bytes at a time */ +       ldm     src, r0-r7 +       sub     src, -32 +       stm     dst, r0-r7 +       sub     dst, -32 +       sub     len, 32 +       brge    1b + +.Lless_than_32: +       /* Copy 16 more bytes if possible */ +       sub     len, -16 +       brlt    .Lless_than_16 +       ldm     src, r0-r3 +       sub     src, -16 +       sub     len, 16 +       stm     dst, r0-r3 +       sub     dst, -16 + +.Lless_than_16: +       /* Do the remaining as byte copies */ +       neg     len +       add     pc, pc, len << 2 +       .rept   15 +       ld.ub   r0, src++ +       st.b    dst++, r0 +       .endr + +       popm    r0-r7, pc + +.Lunaligned_src: +       /* Make src cacheline-aligned. r8 = (src & 31) */ +       rsub    r8, r8, 32 +       sub     len, r8 +1:     ld.ub   r0, src++ +       st.b    dst++, r0 +       sub     r8, 1 +       brne    1b + +       /* If dst is word-aligned, we're ready to go */ +       pref    src[0] +       mov     r8, 3 +       tst     dst, r8 +       breq    .Laligned_copy + +.Lunaligned_dst: +       /* src is aligned, but dst is not. Expect bad performance */ +       sub     len, 4 +       brlt    2f +1:     ld.w    r0, src++ +       st.w    dst++, r0 +       sub     len, 4 +       brge    1b + +2:     neg     len +       add     pc, pc, len << 2 +       .rept   3 +       ld.ub   r0, src++ +       st.b    dst++, r0 +       .endr + +       popm    r0-r7, pc +       .size   memcpy, . - memcpy + +libc_hidden_def(memcpy)  | 
