1 files changed, 111 insertions, 0 deletions
diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S
new file mode 100644
index 000000000..f95aabd13
--- /dev/null
+++ b/libc/string/avr32/memcpy.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+/* Don't use r12 as dst since we must return it unmodified */
+#define dst r9
+#define src r11
+#define len r10
+
+       .text
+       .global memcpy
+       .type   memcpy, @function
+memcpy:
+       pref    src[0]
+       mov     dst, r12
+
+       /* If we have less than 32 bytes, don't do anything fancy */
+       cp.w    len, 32
+       brge    .Lmore_than_31
+
+       sub     len, 1
+       retlt   r12
+1:     ld.ub   r8, src++
+       st.b    dst++, r8
+       sub     len, 1
+       brge    1b
+       retal   r12
+
+.Lmore_than_31:
+       pushm   r0-r7, lr
+
+       /* Check alignment */
+       mov     r8, src
+       andl    r8, 31, COH
+       brne    .Lunaligned_src
+       mov     r8, dst
+       andl    r8, 3, COH
+       brne    .Lunaligned_dst
+
+.Laligned_copy:
+       sub     len, 32
+       brlt    .Lless_than_32
+
+1:     /* Copy 32 bytes at a time */
+       ldm     src, r0-r7
+       sub     src, -32
+       stm     dst, r0-r7
+       sub     dst, -32
+       sub     len, 32
+       brge    1b
+
+.Lless_than_32:
+       /* Copy 16 more bytes if possible */
+       sub     len, -16
+       brlt    .Lless_than_16
+       ldm     src, r0-r3
+       sub     src, -16
+       sub     len, 16
+       stm     dst, r0-r3
+       sub     dst, -16
+
+.Lless_than_16:
+       /* Do the remaining as byte copies */
+       neg     len
+       add     pc, pc, len << 2
+       .rept   15
+       ld.ub   r0, src++
+       st.b    dst++, r0
+       .endr
+
+       popm    r0-r7, pc
+
+.Lunaligned_src:
+       /* Make src cacheline-aligned. r8 = (src & 31) */
+       rsub    r8, r8, 32
+       sub     len, r8
+1:     ld.ub   r0, src++
+       st.b    dst++, r0
+       sub     r8, 1
+       brne    1b
+
+       /* If dst is word-aligned, we're ready to go */
+       pref    src[0]
+       mov     r8, 3
+       tst     dst, r8
+       breq    .Laligned_copy
+
+.Lunaligned_dst:
+       /* src is aligned, but dst is not. Expect bad performance */
+       sub     len, 4
+       brlt    2f
+1:     ld.w    r0, src++
+       st.w    dst++, r0
+       sub     len, 4
+       brge    1b
+
+2:     neg     len
+       add     pc, pc, len << 2
+       .rept   3
+       ld.ub   r0, src++
+       st.b    dst++, r0
+       .endr
+
+       popm    r0-r7, pc
+       .size   memcpy, . - memcpy
+
+libc_hidden_def(memcpy)