22 files changed, 2675 insertions, 0 deletions
diff --git a/libc/string/avr32/Makefile b/libc/string/avr32/Makefile
new file mode 100644
index 000000000..0002ffdce
--- /dev/null
+++ b/libc/string/avr32/Makefile
@@ -0,0 +1,26 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org>
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Library General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Library General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+top_srcdir     := ../../../
+top_builddir   := ../../../
+
+all: objs
+
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/avr32/bcopy.S b/libc/string/avr32/bcopy.S
new file mode 100644
index 000000000..e1d173165
--- /dev/null
+++ b/libc/string/avr32/bcopy.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+
+       .text
+       .global bcopy
+       .type   bcopy, @function
+       .align  1
+bcopy:
+       /* Swap the first two arguments */
+       eor     r11, r12
+       eor     r12, r11
+       eor     r11, r12
+       rjmp    HIDDEN_JUMPTARGET(memmove)
+
+       .size   bcopy, . - bcopy
+
+#endif /* __UCLIBC_SUSV3_LEGACY__ */
diff --git a/libc/string/avr32/bzero.S b/libc/string/avr32/bzero.S
new file mode 100644
index 000000000..928148dcb
--- /dev/null
+++ b/libc/string/avr32/bzero.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+
+       .text
+       .global bzero
+       .type   bzero, @function
+       .align  1
+bzero:
+       mov     r10, r11
+       mov     r11, 0
+       rjmp    HIDDEN_JUMPTARGET(memset)
+
+       .size   bzero, . - bzero
+
+#endif /* __UCLIBC_SUSV3_LEGACY__ */
diff --git a/libc/string/avr32/memcmp.S b/libc/string/avr32/memcmp.S
new file mode 100644
index 000000000..5d7eac3d9
--- /dev/null
+++ b/libc/string/avr32/memcmp.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s1 r12
+#define s2 r11
+#define len r10
+
+       .text
+       .global memcmp
+       .type   memcmp, @function
+       .align  1
+memcmp:
+       sub     len, 4
+       brlt    .Lless_than_4
+
+1:     ld.w    r8, s1++
+       ld.w    r9, s2++
+       cp.w    r8, r9
+       brne    .Lfound_word
+       sub     len, 4
+       brge    1b
+
+.Lless_than_4:
+       sub     len, -4
+       reteq   0
+
+1:     ld.ub   r8, s1++
+       ld.ub   r9, s2++
+       sub     r8, r9
+       retne   r8
+       sub     len, 1
+       brgt    1b
+
+       retal   0
+
+.Lfound_word:
+       mov     len, 4
+
+2:     bfextu  r11, r9, 24, 8
+       bfextu  r12, r8, 24, 8
+       sub     r12, r11
+       retne   r12
+       lsl     r8, 8
+       lsl     r9, 8
+       sub     len, 1
+       brne    2b
+       retal   r12
+
+       .size   memcmp, . - memcmp
+
+libc_hidden_def(memcmp)
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias(memcmp,bcmp)
+#endif
diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S
new file mode 100644
index 000000000..f95aabd13
--- /dev/null
+++ b/libc/string/avr32/memcpy.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+/* Don't use r12 as dst since we must return it unmodified */
+#define dst r9
+#define src r11
+#define len r10
+
+       .text
+       .global memcpy
+       .type   memcpy, @function
+memcpy:
+       pref    src[0]
+       mov     dst, r12
+
+       /* If we have less than 32 bytes, don't do anything fancy */
+       cp.w    len, 32
+       brge    .Lmore_than_31
+
+       sub     len, 1
+       retlt   r12
+1:     ld.ub   r8, src++
+       st.b    dst++, r8
+       sub     len, 1
+       brge    1b
+       retal   r12
+
+.Lmore_than_31:
+       pushm   r0-r7, lr
+
+       /* Check alignment */
+       mov     r8, src
+       andl    r8, 31, COH
+       brne    .Lunaligned_src
+       mov     r8, dst
+       andl    r8, 3, COH
+       brne    .Lunaligned_dst
+
+.Laligned_copy:
+       sub     len, 32
+       brlt    .Lless_than_32
+
+1:     /* Copy 32 bytes at a time */
+       ldm     src, r0-r7
+       sub     src, -32
+       stm     dst, r0-r7
+       sub     dst, -32
+       sub     len, 32
+       brge    1b
+
+.Lless_than_32:
+       /* Copy 16 more bytes if possible */
+       sub     len, -16
+       brlt    .Lless_than_16
+       ldm     src, r0-r3
+       sub     src, -16
+       sub     len, 16
+       stm     dst, r0-r3
+       sub     dst, -16
+
+.Lless_than_16:
+       /* Do the remaining as byte copies */
+       neg     len
+       add     pc, pc, len << 2
+       .rept   15
+       ld.ub   r0, src++
+       st.b    dst++, r0
+       .endr
+
+       popm    r0-r7, pc
+
+.Lunaligned_src:
+       /* Make src cacheline-aligned. r8 = (src & 31) */
+       rsub    r8, r8, 32
+       sub     len, r8
+1:     ld.ub   r0, src++
+       st.b    dst++, r0
+       sub     r8, 1
+       brne    1b
+
+       /* If dst is word-aligned, we're ready to go */
+       pref    src[0]
+       mov     r8, 3
+       tst     dst, r8
+       breq    .Laligned_copy
+
+.Lunaligned_dst:
+       /* src is aligned, but dst is not. Expect bad performance */
+       sub     len, 4
+       brlt    2f
+1:     ld.w    r0, src++
+       st.w    dst++, r0
+       sub     len, 4
+       brge    1b
+
+2:     neg     len
+       add     pc, pc, len << 2
+       .rept   3
+       ld.ub   r0, src++
+       st.b    dst++, r0
+       .endr
+
+       popm    r0-r7, pc
+       .size   memcpy, . - memcpy
+
+libc_hidden_def(memcpy)
diff --git a/libc/string/avr32/memmove.S b/libc/string/avr32/memmove.S
new file mode 100644
index 000000000..8ca4da54d
--- /dev/null
+++ b/libc/string/avr32/memmove.S
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#define dst r12
+#define src r11
+#define len r10
+
+       .text
+       .global memmove
+       .type   memmove, @function
+memmove:
+       cp.w    src, dst
+       brge    HIDDEN_JUMPTARGET(memcpy)
+
+       add     dst, len
+       add     src, len
+       pref    src[-1]
+
+       /*
+        * The rest is basically the same as in memcpy.S except that
+        * the direction is reversed.
+        */
+       cp.w    len, 32
+       brge    .Lmore_than_31
+
+       sub     len, 1
+       retlt   r12
+1:     ld.ub   r8, --src
+       st.b    --dst, r8
+       sub     len, 1
+       brge    1b
+       retal   r12
+
+.Lmore_than_31:
+       pushm   r0-r7, lr
+
+       /* Check alignment */
+       mov     r8, src
+       andl    r8, 31, COH
+       brne    .Lunaligned_src
+       mov     r8, r12
+       andl    r8, 3, COH
+       brne    .Lunaligned_dst
+
+.Laligned_copy:
+       sub     len, 32
+       brlt    .Lless_than_32
+
+1:     /* Copy 32 bytes at a time */
+       sub     src, 32
+       ldm     src, r0-r7
+       sub     dst, 32
+       sub     len, 32
+       stm     dst, r0-r7
+       brge    1b
+
+.Lless_than_32:
+       /* Copy 16 more bytes if possible */
+       sub     len, -16
+       brlt    .Lless_than_16
+       sub     src, 16
+       ldm     src, r0-r3
+       sub     dst, 16
+       sub     len, 16
+       stm     dst, r0-r3
+
+.Lless_than_16:
+       /* Do the remaining as byte copies */
+       sub     len, -16
+       breq    2f
+1:     ld.ub   r0, --src
+       st.b    --dst, r0
+       sub     len, 1
+       brne    1b
+
+2:     popm    r0-r7, pc
+
+.Lunaligned_src:
+       /* Make src cacheline-aligned. r8 = (src & 31) */
+       sub     len, r8
+1:     ld.ub   r0, --src
+       st.b    --dst, r0
+       sub     r8, 1
+       brne    1b
+
+       /* If dst is word-aligned, we're ready to go */
+       pref    src[-4]
+       mov     r8, 3
+       tst     dst, r8
+       breq    .Laligned_copy
+
+.Lunaligned_dst:
+       /* src is aligned, but dst is not. Expect bad performance */
+       sub     len, 4
+       brlt    2f
+1:     ld.w    r0, --src
+       st.w    --dst, r0
+       sub     len, 4
+       brge    1b
+
+2:     neg     len
+       add     pc, pc, len << 2
+       .rept   3
+       ld.ub   r0, --src
+       st.b    --dst, r0
+       .endr
+
+       popm    r0-r7, pc
+       .size   memmove, . - memmove
+
+libc_hidden_def(memmove)
diff --git a/libc/string/avr32/memset.S b/libc/string/avr32/memset.S
new file mode 100644
index 000000000..964bf4834
--- /dev/null
+++ b/libc/string/avr32/memset.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s r12
+#define c r11
+#define n r10
+
+       .text
+       .global memset
+       .type   memset, @function
+
+       .align  1
+memset:
+       cp.w    n, 32
+       mov     r9, s
+       brge    .Llarge_memset
+
+       sub     n, 1
+       retlt   s
+1:     st.b    s++, c
+       sub     n, 1
+       brge    1b
+
+       retal   r9
+
+.Llarge_memset:
+       mov     r8, r11
+       mov     r11, 3
+       bfins   r8, r8, 8, 8
+       bfins   r8, r8, 16, 16
+       tst     s, r11
+       breq    2f
+
+1:     st.b    s++, r8
+       sub     n, 1
+       tst     s, r11
+       brne    1b
+
+2:     mov     r11, r9
+       mov     r9, r8
+       sub     n, 8
+
+3:     st.d    s++, r8
+       sub     n, 8
+       brge    3b
+
+       /* If we are done, n == -8 and we'll skip all st.b insns below */
+       neg     n
+       lsl     n, 1
+       add     pc, n
+       .rept   7
+       st.b    s++, r8
+       .endr
+       retal   r11
+
+       .size   memset, . - memset
+
+libc_hidden_def(memset)
diff --git a/libc/string/avr32/strcmp.S b/libc/string/avr32/strcmp.S
new file mode 100644
index 000000000..e9f087577
--- /dev/null
+++ b/libc/string/avr32/strcmp.S
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s1 r12
+#define s2 r11
+#define len r10
+
+       .text
+       .global strcmp
+       .type   strcmp, @function
+       .align  1
+strcmp:
+       mov     r8, 3
+       tst     s1, r8
+       brne    .Lunaligned_s1
+       tst     s2, r8
+       brne    .Lunaligned_s2
+
+1:     ld.w    r8, s1++
+       ld.w    r9, s2++
+       cp.w    r8, r9
+       brne    2f
+       tnbz    r8
+       brne    1b
+       retal   0
+
+2:     bfextu  r12, r8, 24, 8
+       bfextu  r11, r9, 24, 8
+       sub     r12, r11
+       retne   r12
+       cp.w    r11, 0
+       reteq   0
+       bfextu  r12, r8, 16, 8
+       bfextu  r11, r9, 16, 8
+       sub     r12, r11
+       retne   r12
+       cp.w    r11, 0
+       reteq   0
+       bfextu  r12, r8, 8, 8
+       bfextu  r11, r9, 8, 8
+       sub     r12, r11
+       retne   r12
+       cp.w    r11, 0
+       reteq   0
+       bfextu  r12, r8, 0, 8
+       bfextu  r11, r9, 0, 8
+       sub     r12, r11
+       retal   r12
+
+.Lunaligned_s1:
+3:     tst     s1, r8
+       breq    4f
+       ld.ub   r10, s1++
+       ld.ub   r9, s2++
+       sub     r10, r9
+       retne   r10
+       cp.w    r9, 0
+       brne    3b
+       retal   r10
+
+4:     tst     s2, r8
+       breq    1b
+
+.Lunaligned_s2:
+       /*
+        * s1 and s2 can't both be aligned, and unaligned word loads
+        * can trigger spurious exceptions if we cross a page boundary.
+        * Do it the slow way...
+        */
+1:     ld.ub   r8, s1++
+       ld.ub   r9, s2++
+       sub     r8, r9
+       retne   r8
+       cp.w    r9, 0
+       brne    1b
+       retal   0
+
+       .size   strcmp, . - strcmp
+
+libc_hidden_def(strcmp)
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias(strcmp, strcoll)
+libc_hidden_def(strcoll)
+#endif
diff --git a/libc/string/avr32/strlen.S b/libc/string/avr32/strlen.S
new file mode 100644
index 000000000..d2808998d
--- /dev/null
+++ b/libc/string/avr32/strlen.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define str r12
+
+       .text
+       .global strlen
+       .type   strlen, @function
+strlen:
+       mov     r11, r12
+
+       mov     r9, str
+       andl    r9, 3, COH
+       brne    .Lunaligned_str
+
+1:     ld.w    r8, str++
+       tnbz    r8
+       brne    1b
+
+       sub     r12, r11
+       bfextu  r9, r8, 24, 8
+       cp.w    r9, 0
+       subeq   r12, 4
+       reteq   r12
+       bfextu  r9, r8, 16, 8
+       cp.w    r9, 0
+       subeq   r12, 3
+       reteq   r12
+       bfextu  r9, r8, 8, 8
+       cp.w    r9, 0
+       subeq   r12, 2
+       reteq   r12
+       sub     r12, 1
+       retal   r12
+
+.Lunaligned_str:
+       add     pc, pc, r9 << 3
+       sub     r0, r0, 0       /* 4-byte nop */
+       ld.ub   r8, str++
+       sub     r8, r8, 0
+       breq    1f
+       ld.ub   r8, str++
+       sub     r8, r8, 0
+       breq    1f
+       ld.ub   r8, str++
+       sub     r8, r8, 0
+       brne    1b
+
+1:     sub     r12, 1
+       sub     r12, r11
+       retal   r12
+
+       .size   strlen, . - strlen
+
+libc_hidden_def(strlen)
diff --git a/libc/string/cris/memcopy.h b/libc/string/cris/memcopy.h
new file mode 100644
index 000000000..0eae998da
--- /dev/null
+++ b/libc/string/cris/memcopy.h
@@ -0,0 +1,62 @@
+/* Copyright (C) 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   Modified for use in uClibc (C) 2007 Axis Communications AB.
+   Minimal modifications: include path name and #undef of WORD_COPY_FWD/BWD
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "../generic/memcopy.h"
+
+/* We override the word-copying macros, partly because misalignment in one
+   pointer isn't cause for a special function, partly because we want to
+   get rid of all the static functions in generic/memcopy.c; these macros 
+   are only used in memmove.c since we have arch-specific mempcpy, memcpy and
+   memset.  */
+
+#undef OP_T_THRES
+#define OP_T_THRES OPSIZ
+
+#undef WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		\
+  do									\
+    {									\
+      unsigned long enddst_bp = dst_bp + nbytes - (nbytes % OPSIZ);	\
+      nbytes_left = (nbytes % OPSIZ);					\
+      while (dst_bp < (unsigned long) enddst_bp)			\
+	{								\
+	  op_t x = *(op_t *) src_bp;					\
+	  src_bp += sizeof x;						\
+	  *(op_t *) dst_bp = x;						\
+	  dst_bp += sizeof x;						\
+	}								\
+    } while (0)
+
+#undef WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_bp, src_bp, nbytes_left, nbytes)		\
+  do									\
+    {									\
+      unsigned long enddst_bp = dst_bp - nbytes + (nbytes % OPSIZ);	\
+      nbytes_left = (nbytes % OPSIZ);					\
+      while (dst_bp > enddst_bp)					\
+	{								\
+	  op_t x;							\
+	  src_bp -= sizeof x;						\
+	  x = *(op_t *) src_bp;						\
+	  dst_bp -= sizeof x;						\
+	  *(op_t *) dst_bp = x;						\
+	}								\
+    } while (0)
diff --git a/libc/string/cris/memcpy.c b/libc/string/cris/memcpy.c
new file mode 100644
index 000000000..a85108109
--- /dev/null
+++ b/libc/string/cris/memcpy.c
@@ -0,0 +1,264 @@
+/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1995, 2000 Axis Communications AB.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/*#************************************************************************#*/
+/*#-------------------------------------------------------------------------*/
+/*#                                                                         */
+/*# FUNCTION NAME: memcpy()                                                 */
+/*#                                                                         */
+/*# PARAMETERS:  void* dst;   Destination address.                          */
+/*#              void* src;   Source address.                               */
+/*#              int   len;   Number of bytes to copy.                      */
+/*#                                                                         */
+/*# RETURNS:     dst.                                                       */
+/*#                                                                         */
+/*# DESCRIPTION: Copies len bytes of memory from src to dst.  No guarantees */
+/*#              about copying of overlapping memory areas. This routine is */
+/*#              very sensitive to compiler changes in register allocation. */
+/*#              Should really be rewritten to avoid this problem.          */
+/*#                                                                         */
+/*#-------------------------------------------------------------------------*/
+/*#                                                                         */
+/*# HISTORY                                                                 */
+/*#                                                                         */
+/*# DATE      NAME            CHANGES                                       */
+/*# ----      ----            -------                                       */
+/*# 941007    Kenny R         Creation                                      */
+/*# 941011    Kenny R         Lots of optimizations and inlining.           */
+/*# 941129    Ulf A           Adapted for use in libc.                      */
+/*# 950216    HP              N==0 forgotten if non-aligned src/dst.        */
+/*#                           Added some optimizations.                     */
+/*# 001025    HP              Make src and dst char *.  Align dst to	    */
+/*#			      dword, not just word-if-both-src-and-dst-	    */
+/*#			      are-misaligned.				    */
+/*# 070806    RW              Modified for uClibc                           */
+/*#                           (__arch_v32 -> __CONFIG_CRISV32__,            */
+/*#                           include features.h to reach it.)              */
+/*#                                                                         */
+/*#-------------------------------------------------------------------------*/
+
+#include <features.h>
+
+#ifdef __CONFIG_CRISV32__
+/* For CRISv32, movem is very cheap.  */
+#define MEMCPY_BLOCK_THRESHOLD (44)
+#else
+/* Break even between movem and move16 is at 38.7*2, but modulo 44. */
+#define MEMCPY_BLOCK_THRESHOLD (44*2)
+#endif
+
+void *memcpy(void *, const void *, unsigned int);
+
+libc_hidden_proto(memcpy)
+void *memcpy(void *pdst,
+             const void *psrc,
+             unsigned int pn)
+{
+  /* Ok.  Now we want the parameters put in special registers.
+     Make sure the compiler is able to make something useful of this.
+      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+
+     If gcc was allright, it really would need no temporaries, and no
+     stack space to save stuff on. */
+
+#ifndef MEMPCPY
+  register void *return_dst __asm__ ("r10") = pdst;
+#else
+  /* FIXME: Use R10 for something.  */
+# define return_dst dst
+#endif
+
+  register char *dst __asm__ ("r13") = pdst;
+  register char *src __asm__ ("r11") = (char *) psrc;
+  register int n __asm__ ("r12") = pn;
+  
+ 
+  /* When src is aligned but not dst, this makes a few extra needless
+     cycles.  I believe it would take as many to check that the
+     re-alignment was unnecessary.  */
+  if (((unsigned long) dst & 3) != 0
+      /* Don't align if we wouldn't copy more than a few bytes; so we
+	 don't have to check further for overflows.  */
+      && n >= 3)
+  {
+    if ((unsigned long) dst & 1)
+    {
+      n--;
+      *(char*)dst = *(char*)src;
+      src++;
+      dst++;
+    }
+
+    if ((unsigned long) dst & 2)
+    {
+      n -= 2;
+      *(short*)dst = *(short*)src;
+      src += 2;
+      dst += 2;
+    }
+  }
+
+  /* Decide which copying method to use. */
+  if (n >= MEMCPY_BLOCK_THRESHOLD)
+  {
+    /* For large copies we use 'movem' */
+
+  /* It is not optimal to tell the compiler about clobbering any
+     registers; that will move the saving/restoring of those registers
+     to the function prologue/epilogue, and make non-movem sizes
+     suboptimal.
+
+      This method is not foolproof; it assumes that the "register asm"
+     declarations at the beginning of the function really are used
+     here (beware: they may be moved to temporary registers).
+      This way, we do not have to save/move the registers around into
+     temporaries; we can safely use them straight away.  */
+    __asm__ volatile ("\
+	.syntax no_register_prefix					\n\
+									\n\
+        ;; Check that the register asm declaration got right.		\n\
+        ;; The GCC manual explicitly says TRT will happen.		\n\
+	.ifnc %0-%1-%2,$r13-$r11-$r12					\n\
+	.err								\n\
+	.endif								\n\
+									\n\
+	;; Save the registers we'll use in the movem process		\n\
+	;; on the stack.						\n\
+	subq 	11*4,sp							\n\
+	movem	r10,[sp]						\n\
+									\n\
+        ;; Now we've got this:						\n\
+	;; r11 - src							\n\
+	;; r13 - dst							\n\
+	;; r12 - n							\n\
+									\n\
+        ;; Update n for the first loop					\n\
+        subq    44,r12							\n\
+0:									\n\
+	movem	[r11+],r10						\n\
+        subq   44,r12							\n\
+        bge     0b							\n\
+	movem	r10,[r13+]						\n\
+									\n\
+        addq   44,r12  ;; compensate for last loop underflowing n	\n\
+									\n\
+	;; Restore registers from stack					\n\
+        movem [sp+],r10"
+
+     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) 
+     /* Inputs */ : "0" (dst), "1" (src), "2" (n));
+  }
+
+  /* Either we directly starts copying, using dword copying
+     in a loop, or we copy as much as possible with 'movem' 
+     and then the last block (<44 bytes) is copied here.
+     This will work since 'movem' will have updated src,dst,n. */
+
+  while ( n >= 16 )
+  {
+    *((long*)dst)++ = *((long*)src)++;
+    *((long*)dst)++ = *((long*)src)++;
+    *((long*)dst)++ = *((long*)src)++;
+    *((long*)dst)++ = *((long*)src)++;
+    n -= 16;
+  }
+
+  /* A switch() is definitely the fastest although it takes a LOT of code.
+   * Particularly if you inline code this.
+   */
+  switch (n)
+  {
+    case 0:
+      break;
+    case 1:
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 2:
+      *((short*)dst)++ = *((short*)src)++;
+      break;
+    case 3:
+      *((short*)dst)++ = *((short*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 4:
+      *((long*)dst)++ = *((long*)src)++;
+      break;
+    case 5:
+      *((long*)dst)++ = *((long*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 6:
+      *((long*)dst)++ = *((long*)src)++;
+      *((short*)dst)++ = *((short*)src)++;
+      break;
+    case 7:
+      *((long*)dst)++ = *((long*)src)++;
+      *((short*)dst)++ = *((short*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 8:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      break;
+    case 9:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 10:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((short*)dst)++ = *((short*)src)++;
+      break;
+    case 11:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((short*)dst)++ = *((short*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 12:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      break;
+    case 13:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+    case 14:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((short*)dst)++ = *((short*)src)++;
+      break;
+    case 15:
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((long*)dst)++ = *((long*)src)++;
+      *((short*)dst)++ = *((short*)src)++;
+      *((char*)dst)++ = *((char*)src)++;
+      break;
+  }
+
+  return return_dst; /* destination pointer. */
+} /* memcpy() */
+libc_hidden_def(memcpy)
diff --git a/libc/string/cris/memmove.c b/libc/string/cris/memmove.c
new file mode 100644
index 000000000..437637078
--- /dev/null
+++ b/libc/string/cris/memmove.c
@@ -0,0 +1,101 @@
+/* Taken from generic/memmove.c; trivially modified to work with 
+   arch-specific memcopy.h for Cris.
+
+   Copy memory to memory until the specified number of bytes
+   has been copied.  Overlap is handled correctly.
+   Copyright (C) 1991, 1995, 1996, 1997, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Torbjorn Granlund (tege@sics.se).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <string.h>
+
+#include "memcopy.h"
+#include "../generic/pagecopy.h"
+
+libc_hidden_proto(memmove)
+void *memmove (void *dest, const void *src, size_t len)
+{
+  unsigned long int dstp = (long int) dest;
+  unsigned long int srcp = (long int) src;
+
+  /* This test makes the forward copying code be used whenever possible.
+     Reduces the working set.  */
+  if (dstp - srcp >= len)	/* *Unsigned* compare!  */
+    {
+#if 1
+#warning REMINDER: Cris arch-opt memmove assumes memcpy does forward copying!
+      memcpy(dest, src, len);
+#else
+      /* Copy from the beginning to the end.  */
+
+      /* If there not too few bytes to copy, use word copy.  */
+      if (len >= OP_T_THRES)
+	{
+	  /* Copy just a few bytes to make DSTP aligned.  */
+	  len -= (-dstp) % OPSIZ;
+	  BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
+
+	  /* Copy whole pages from SRCP to DSTP by virtual address
+	     manipulation, as much as possible.  */
+
+	  PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
+
+	  /* Copy from SRCP to DSTP taking advantage of the known
+	     alignment of DSTP.  Number of bytes remaining is put
+	     in the third argument, i.e. in LEN.  This number may
+	     vary from machine to machine.  */
+
+	  WORD_COPY_FWD (dstp, srcp, len, len);
+
+	  /* Fall out and copy the tail.  */
+	}
+
+      /* There are just a few bytes to copy.  Use byte memory operations.  */
+      BYTE_COPY_FWD (dstp, srcp, len);
+#endif
+    }
+  else
+    {
+      /* Copy from the end to the beginning.  */
+      srcp += len;
+      dstp += len;
+
+      /* If there not too few bytes to copy, use word copy.  */
+      if (len >= OP_T_THRES)
+	{
+	  /* Copy just a few bytes to make DSTP aligned.  */
+	  len -= dstp % OPSIZ;
+	  BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);
+
+	  /* Copy from SRCP to DSTP taking advantage of the known
+	     alignment of DSTP.  Number of bytes remaining is put
+	     in the third argument, i.e. in LEN.  This number may
+	     vary from machine to machine.  */
+
+	  WORD_COPY_BWD (dstp, srcp, len, len);
+
+	  /* Fall out and copy the tail.  */
+	}
+
+      /* There are just a few bytes to copy.  Use byte memory operations.  */
+      BYTE_COPY_BWD (dstp, srcp, len);
+    }
+
+  return (dest);
+}
+libc_hidden_def(memmove)
diff --git a/libc/string/cris/memset.c b/libc/string/cris/memset.c
new file mode 100644
index 000000000..7e71bc50f
--- /dev/null
+++ b/libc/string/cris/memset.c
@@ -0,0 +1,271 @@
+/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000 Axis Communications AB.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/*#************************************************************************#*/
+/*#-------------------------------------------------------------------------*/
+/*#                                                                         */
+/*# FUNCTION NAME: memset()                                                 */
+/*#                                                                         */
+/*# PARAMETERS:  void* dst;   Destination address.                          */
+/*#              int     c;   Value of byte to write.                       */
+/*#              int   len;   Number of bytes to write.                     */
+/*#                                                                         */
+/*# RETURNS:     dst.                                                       */
+/*#                                                                         */
+/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
+/*#              Framework taken from memcpy.  This routine is              */
+/*#              very sensitive to compiler changes in register allocation. */
+/*#              Should really be rewritten to avoid this problem.          */
+/*#                                                                         */
+/*#-------------------------------------------------------------------------*/
+/*#                                                                         */
+/*# HISTORY                                                                 */
+/*#                                                                         */
+/*# DATE      NAME            CHANGES                                       */
+/*# ----      ----            -------                                       */
+/*# 990713    HP              Tired of watching this function (or           */
+/*#                           really, the nonoptimized generic              */
+/*#                           implementation) take up 90% of simulator      */
+/*#                           output.  Measurements needed.                 */
+/*#                                                                         */
+/*#-------------------------------------------------------------------------*/
+
+/* No, there's no macro saying 12*4, since it is "hard" to get it into
+   the asm in a good way.  Thus better to expose the problem everywhere.
+   */
+
+/* Assuming 1 cycle per dword written or read (ok, not really true), and
+   one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
+   so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
+
+#define ZERO_BLOCK_SIZE (1*12*4)
+
+void *memset(void *, int, unsigned long);
+
+libc_hidden_proto(memset)
+void *memset(void *pdst,
+             int c,
+             unsigned long plen)
+{
+  /* Ok.  Now we want the parameters put in special registers.
+     Make sure the compiler is able to make something useful of this. */
+
+  register char *return_dst __asm__ ("r10") = pdst;
+  register long n __asm__ ("r12") = plen;
+  register int lc __asm__ ("r11") = c;
+
+  /* Most apps use memset sanely.  Only those memsetting about 3..4
+     bytes or less get penalized compared to the generic implementation
+     - and that's not really sane use. */
+
+  /* Ugh.  This is fragile at best.  Check with newer GCC releases, if
+     they compile cascaded "x |= x << 8" sanely! */
+  __asm__("movu.b %0,$r13 \n\
+	   lslq 8,$r13    \n\
+	   move.b %0,$r13 \n\
+	   move.d $r13,%0 \n\
+	   lslq 16,$r13   \n\
+	   or.d $r13,%0"
+          : "=r" (lc) : "0" (lc) : "r13");
+
+  {
+    register char *dst __asm__ ("r13") = pdst;
+ 
+  if (((unsigned long) pdst & 3) != 0
+     /* Oops! n=0 must be a legal call, regardless of alignment. */
+      && n >= 3)
+  {
+    if ((unsigned long)dst & 1)
+    {
+      *dst = (char) lc;
+      n--;
+      dst++;
+    }
+
+    if ((unsigned long)dst & 2)
+    {
+      *(short *)dst = lc;
+      n -= 2;
+      dst += 2;
+    }
+  }
+
+  /* Now the fun part.  For the threshold value of this, check the equation
+     above. */
+  /* Decide which copying method to use. */
+  if (n >= ZERO_BLOCK_SIZE)
+  {
+    /* For large copies we use 'movem' */
+
+  /* It is not optimal to tell the compiler about clobbering any
+     registers; that will move the saving/restoring of those registers
+     to the function prologue/epilogue, and make non-movem sizes
+     suboptimal.
+
+      This method is not foolproof; it assumes that the "asm reg"
+     declarations at the beginning of the function really are used
+     here (beware: they may be moved to temporary registers).
+      This way, we do not have to save/move the registers around into
+     temporaries; we can safely use them straight away.  */
+    __asm__ volatile ("								\n\
+	.syntax no_register_prefix						\n\
+										\n\
+        ;; Check that the register asm declaration got right.			\n\
+        ;; The GCC manual explicitly says there's no warranty for that (too).	\n\
+	.ifnc %0-%1-%4,$r13-$r12-$r11						\n\
+	.err									\n\
+	.endif									\n\
+										\n\
+	;; Save the registers we'll clobber in the movem process		\n\
+	;; on the stack.  Don't mention them to gcc, it will only be		\n\
+	;; upset.								\n\
+	subq 	11*4,sp								\n\
+        movem   r10,[sp]							\n\
+										\n\
+        move.d  r11,r0								\n\
+        move.d  r11,r1								\n\
+        move.d  r11,r2								\n\
+        move.d  r11,r3								\n\
+        move.d  r11,r4								\n\
+        move.d  r11,r5								\n\
+        move.d  r11,r6								\n\
+        move.d  r11,r7								\n\
+        move.d  r11,r8								\n\
+        move.d  r11,r9								\n\
+        move.d  r11,r10								\n\
+										\n\
+        ;; Now we've got this:							\n\
+	;; r13 - dst								\n\
+	;; r12 - n								\n\
+										\n\
+        ;; Update n for the first loop						\n\
+        subq    12*4,r12							\n\
+0:										\n\
+        subq   12*4,r12								\n\
+        bge     0b								\n\
+	movem	r11,[r13+]							\n\
+										\n\
+        addq   12*4,r12  ;; compensate for last loop underflowing n		\n\
+										\n\
+	;; Restore registers from stack						\n\
+        movem [sp+],r10" 
+
+     /* Outputs */ : "=r" (dst), "=r" (n)
+     /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
+    
+  }
+
+    /* Either we directly starts copying, using dword copying
+       in a loop, or we copy as much as possible with 'movem' 
+       and then the last block (<44 bytes) is copied here.
+       This will work since 'movem' will have updated src,dst,n. */
+
+    while ( n >= 16 )
+    {
+      *((long*)dst)++ = lc;
+      *((long*)dst)++ = lc;
+      *((long*)dst)++ = lc;
+      *((long*)dst)++ = lc;
+      n -= 16;
+    }
+
+    /* A switch() is definitely the fastest although it takes a LOT of code.
+     * Particularly if you inline code this.
+     */
+    switch (n)
+    {
+      case 0:
+        break;
+      case 1:
+        *(char*)dst = (char) lc;
+        break;
+      case 2:
+        *(short*)dst = (short) lc;
+        break;
+      case 3:
+        *((short*)dst)++ = (short) lc;
+        *(char*)dst = (char) lc;
+        break;
+      case 4:
+        *((long*)dst)++ = lc;
+        break;
+      case 5:
+        *((long*)dst)++ = lc;
+        *(char*)dst = (char) lc;
+        break;
+      case 6:
+        *((long*)dst)++ = lc;
+        *(short*)dst = (short) lc;
+        break;
+      case 7:
+        *((long*)dst)++ = lc;
+        *((short*)dst)++ = (short) lc;
+        *(char*)dst = (char) lc;
+        break;
+      case 8:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        break;
+      case 9:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *(char*)dst = (char) lc;
+        break;
+      case 10:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *(short*)dst = (short) lc;
+        break;
+      case 11:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *((short*)dst)++ = (short) lc;
+        *(char*)dst = (char) lc;
+        break;
+      case 12:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        break;
+      case 13:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *(char*)dst = (char) lc;
+        break;
+      case 14:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *(short*)dst = (short) lc;
+        break;
+      case 15:
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *((long*)dst)++ = lc;
+        *((short*)dst)++ = (short) lc;
+        *(char*)dst = (char) lc;
+        break;
+    }
+  }
+
+  return return_dst; /* destination pointer. */
+} /* memset() */
+libc_hidden_def(memset)
diff --git a/libc/string/cris/strcpy.c b/libc/string/cris/strcpy.c
new file mode 100644
index 000000000..0af25253e
--- /dev/null
+++ b/libc/string/cris/strcpy.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2006-2007 Axis Communications AB
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <string.h>
+
+libc_hidden_proto(strcpy)
+char *strcpy(char *dest, const char *src)
+{
+  char *ret = dest;
+  unsigned long himagic = 0x80808080L;
+  unsigned long lomagic = 0x01010101L;
+
+  while ((unsigned long)src & (sizeof src - 1))
+  {
+    if (!(*dest++ = *src++))
+    {
+      return ret;
+    }
+  }
+
+  while (1)
+  {
+    unsigned long value = *(unsigned long*)src;
+    unsigned long magic;
+
+    src += sizeof (unsigned long);
+
+    if ((magic = (value - lomagic) & himagic))
+    {
+      if (magic & ~value)
+      {
+        break;
+      }
+    }
+
+    *(unsigned long*)dest = value;
+    dest += sizeof (unsigned long);
+  }
+
+  src -= sizeof (unsigned long);
+
+  while ((*dest++ = *src++))
+  {
+  }
+
+  return ret;
+}
+libc_hidden_def(strcpy)
diff --git a/libc/string/cris/strncpy.c b/libc/string/cris/strncpy.c
new file mode 100644
index 000000000..93a6608bc
--- /dev/null
+++ b/libc/string/cris/strncpy.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2006-2007 Axis Communications AB
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <string.h>
+
+libc_hidden_proto(memset)
+
+libc_hidden_proto(strncpy)
+char *strncpy(char *dest, const char *src, size_t count)
+{
+  char *ret = dest;
+  unsigned long himagic = 0x80808080L;
+  unsigned long lomagic = 0x01010101L;
+
+  while (count && (unsigned long)src & (sizeof src - 1))
+  {
+    count--;
+    if (!(*dest++ = *src++))
+    {
+      goto finalize;
+    }
+  }
+
+  while (count >= sizeof (unsigned long))
+  {
+    unsigned long value = *(unsigned long*)src;
+    unsigned long magic;
+
+    if ((magic = (value - lomagic) & himagic))
+    {
+      if (magic & ~value)
+      {
+        break;
+      }
+    }
+
+    *(unsigned long*)dest = value;
+    dest += sizeof (unsigned long);
+    src += sizeof (unsigned long);
+    count -= sizeof (unsigned long);
+  }
+
+  while (count)
+  {
+    count--;
+    if (!(*dest++ = *src++))
+      break;
+  }
+
+finalize:
+  if (count)
+  {
+    memset(dest, '\0', count);
+  }
+
+  return ret;
+}
+libc_hidden_def(strncpy)
diff --git a/libc/string/xtensa/Makefile b/libc/string/xtensa/Makefile
new file mode 100644
index 000000000..0a95346fd
--- /dev/null
+++ b/libc/string/xtensa/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S
new file mode 100644
index 000000000..19f3a6818
--- /dev/null
+++ b/libc/string/xtensa/memcpy.S
@@ -0,0 +1,297 @@
+/* Optimized memcpy for Xtensa.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+	.macro	src_b	r, w0, w1
+#ifdef __XTENSA_EB__
+	src	\r, \w0, \w1
+#else
+	src	\r, \w1, \w0
+#endif
+	.endm
+
+	.macro	ssa8	r
+#ifdef __XTENSA_EB__
+	ssa8b	\r
+#else
+	ssa8l	\r
+#endif
+	.endm
+
+/* If the Xtensa Unaligned Load Exception option is not used, this
+   code can run a few cycles faster by relying on the low address bits
+   being ignored.  However, if the code is then run with an Xtensa ISS
+   client that checks for unaligned accesses, it will produce a lot of
+   warning messages.  Set this flag to disable the use of unaligned
+   accesses and keep the ISS happy.  */
+
+#define UNALIGNED_ADDRESSES_CHECKED 1
+
+/* Do not use .literal_position in the ENTRY macro.  */
+#undef LITERAL_POSITION
+#define LITERAL_POSITION
+
+
+/* void *memcpy (void *dst, const void *src, size_t len)
+
+   The algorithm is as follows:
+
+   If the destination is unaligned, align it by conditionally
+   copying 1- and/or 2-byte pieces.
+
+   If the source is aligned, copy 16 bytes with a loop, and then finish up
+   with 8, 4, 2, and 1-byte copies conditional on the length.
+
+   Else (if source is unaligned), do the same, but use SRC to align the
+   source data.
+
+   This code tries to use fall-through branches for the common
+   case of aligned source and destination and multiple of 4 (or 8) length.  */
+
+
+/* Byte by byte copy.  */
+
+	.text
+	.align	4
+	.literal_position
+__memcpy_aux:
+
+	/* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
+	   (0 mod 4 alignment for LBEG).  */
+	.byte	0
+
+.Lbytecopy:
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, 2f
+#else
+	beqz	a4, 2f
+	add	a7, a3, a4	// a7 = end address for source
+#endif
+1:	l8ui	a6, a3, 0
+	addi	a3, a3, 1
+	s8i	a6, a5, 0
+	addi	a5, a5, 1
+#if !XCHAL_HAVE_LOOPS
+	blt	a3, a7, 1b
+#endif
+2:	retw
+
+
+/* Destination is unaligned.  */
+
+	.align	4
+.Ldst1mod2: // dst is only byte aligned
+
+	/* Do short copies byte-by-byte.  */
+	_bltui	a4, 7, .Lbytecopy
+
+	/* Copy 1 byte.  */
+	l8ui	a6, a3, 0
+	addi	a3, a3, 1
+	addi	a4, a4, -1
+	s8i	a6, a5, 0
+	addi	a5, a5, 1
+
+	/* Return to main algorithm if dst is now aligned.  */
+	_bbci.l	a5, 1, .Ldstaligned
+
+.Ldst2mod4: // dst has 16-bit alignment
+
+	/* Do short copies byte-by-byte.  */
+	_bltui	a4, 6, .Lbytecopy
+
+	/* Copy 2 bytes.  */
+	l8ui	a6, a3, 0
+	l8ui	a7, a3, 1
+	addi	a3, a3, 2
+	addi	a4, a4, -2
+	s8i	a6, a5, 0
+	s8i	a7, a5, 1
+	addi	a5, a5, 2
+
+	/* dst is now aligned; return to main algorithm.  */
+	j	.Ldstaligned
+
+
+ENTRY (memcpy)
+	/* a2 = dst, a3 = src, a4 = len */
+
+	mov	a5, a2		// copy dst so that a2 is return value
+	_bbsi.l	a2, 0, .Ldst1mod2
+	_bbsi.l	a2, 1, .Ldst2mod4
+.Ldstaligned:
+
+	/* Get number of loop iterations with 16B per iteration.  */
+	srli	a7, a4, 4
+
+	/* Check if source is aligned.  */
+	movi	a8, 3
+	_bany	a3, a8, .Lsrcunaligned
+
+	/* Destination and source are word-aligned, use word copy.  */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a7, 2f
+#else
+	beqz	a7, 2f
+	slli	a8, a7, 4
+	add	a8, a8, a3	// a8 = end of last 16B source chunk
+#endif
+1:	l32i	a6, a3, 0
+	l32i	a7, a3, 4
+	s32i	a6, a5, 0
+	l32i	a6, a3, 8
+	s32i	a7, a5, 4
+	l32i	a7, a3, 12
+	s32i	a6, a5, 8
+	addi	a3, a3, 16
+	s32i	a7, a5, 12
+	addi	a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+	blt	a3, a8, 1b
+#endif
+
+	/* Copy any leftover pieces smaller than 16B.  */
+2:	bbci.l	a4, 3, 3f
+
+	/* Copy 8 bytes.  */
+	l32i	a6, a3, 0
+	l32i	a7, a3, 4
+	addi	a3, a3, 8
+	s32i	a6, a5, 0
+	s32i	a7, a5, 4
+	addi	a5, a5, 8
+
+3:	bbsi.l	a4, 2, 4f
+	bbsi.l	a4, 1, 5f
+	bbsi.l	a4, 0, 6f
+	retw
+
+	/* Copy 4 bytes.  */
+4:	l32i	a6, a3, 0
+	addi	a3, a3, 4
+	s32i	a6, a5, 0
+	addi	a5, a5, 4
+	bbsi.l	a4, 1, 5f
+	bbsi.l	a4, 0, 6f
+	retw
+
+	/* Copy 2 bytes.  */
+5:	l16ui	a6, a3, 0
+	addi	a3, a3, 2
+	s16i	a6, a5, 0
+	addi	a5, a5, 2
+	bbsi.l	a4, 0, 6f
+	retw
+
+	/* Copy 1 byte.  */
+6:	l8ui	a6, a3, 0
+	s8i	a6, a5, 0
+
+.Ldone:
+	retw
+
+
+/* Destination is aligned; source is unaligned.  */
+
+	.align	4
+.Lsrcunaligned:
+	/* Avoid loading anything for zero-length copies.  */
+	_beqz	a4, .Ldone
+
+	/* Copy 16 bytes per iteration for word-aligned dst and
+	   unaligned src.  */
+	ssa8	a3		// set shift amount from byte offset
+#if UNALIGNED_ADDRESSES_CHECKED
+	and	a11, a3, a8	// save unalignment offset for below
+	sub	a3, a3, a11	// align a3
+#endif
+	l32i	a6, a3, 0	// load first word
+#if XCHAL_HAVE_LOOPS
+	loopnez	a7, 2f
+#else
+	beqz	a7, 2f
+	slli	a10, a7, 4
+	add	a10, a10, a3	// a10 = end of last 16B source chunk
+#endif
+1:	l32i	a7, a3, 4
+	l32i	a8, a3, 8
+	src_b	a6, a6, a7
+	s32i	a6, a5, 0
+	l32i	a9, a3, 12
+	src_b	a7, a7, a8
+	s32i	a7, a5, 4
+	l32i	a6, a3, 16
+	src_b	a8, a8, a9
+	s32i	a8, a5, 8
+	addi	a3, a3, 16
+	src_b	a9, a9, a6
+	s32i	a9, a5, 12
+	addi	a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+	blt	a3, a10, 1b
+#endif
+
+2:	bbci.l	a4, 3, 3f
+
+	/* Copy 8 bytes.  */
+	l32i	a7, a3, 4
+	l32i	a8, a3, 8
+	src_b	a6, a6, a7
+	s32i	a6, a5, 0
+	addi	a3, a3, 8
+	src_b	a7, a7, a8
+	s32i	a7, a5, 4
+	addi	a5, a5, 8
+	mov	a6, a8
+
+3:	bbci.l	a4, 2, 4f
+
+	/* Copy 4 bytes.  */
+	l32i	a7, a3, 4
+	addi	a3, a3, 4
+	src_b	a6, a6, a7
+	s32i	a6, a5, 0
+	addi	a5, a5, 4
+	mov	a6, a7
+4:
+#if UNALIGNED_ADDRESSES_CHECKED
+	add	a3, a3, a11	// readjust a3 with correct misalignment
+#endif
+	bbsi.l	a4, 1, 5f
+	bbsi.l	a4, 0, 6f
+	retw
+
+	/* Copy 2 bytes.  */
+5:	l8ui	a6, a3, 0
+	l8ui	a7, a3, 1
+	addi	a3, a3, 2
+	s8i	a6, a5, 0
+	s8i	a7, a5, 1
+	addi	a5, a5, 2
+	bbsi.l	a4, 0, 6f
+	retw
+
+	/* Copy 1 byte.  */
+6:	l8ui	a6, a3, 0
+	s8i	a6, a5, 0
+	retw
+
+libc_hidden_def (memcpy)
diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S
new file mode 100644
index 000000000..c0928825d
--- /dev/null
+++ b/libc/string/xtensa/memset.S
@@ -0,0 +1,165 @@
+/* Optimized memset for Xtensa.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+/* Do not use .literal_position in the ENTRY macro.  */
+#undef LITERAL_POSITION
+#define LITERAL_POSITION
+
+/* void *memset (void *dst, int c, size_t length)
+
+   The algorithm is as follows:
+
+   Create a word with c in all byte positions.
+	
+   If the destination is aligned, set 16B chunks with a loop, and then
+   finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
+
+   If the destination is unaligned, align it by conditionally
+   setting 1B and/or 2B and then go to aligned case.
+
+   This code tries to use fall-through branches for the common
+   case of an aligned destination (except for the branches to
+   the alignment labels).  */
+
+
+/* Byte-by-byte set.  */
+
+	.text
+	.align	4
+	.literal_position
+__memset_aux:
+
+	/* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
+	   (0 mod 4 alignment for LBEG).  */
+	.byte	0
+
+.Lbyteset:
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, 2f
+#else
+	beqz	a4, 2f
+	add	a6, a5, a4	// a6 = ending address
+#endif
+1:	s8i	a3, a5, 0
+	addi	a5, a5, 1
+#if !XCHAL_HAVE_LOOPS
+	blt	a5, a6, 1b
+#endif
+2:	retw
+
+
+/* Destination is unaligned.  */
+
+	.align	4
+
+.Ldst1mod2: // dst is only byte aligned
+
+	/* Do short sizes byte-by-byte.  */
+	bltui	a4, 8, .Lbyteset
+
+	/* Set 1 byte.  */
+	s8i	a3, a5, 0
+	addi	a5, a5, 1
+	addi	a4, a4, -1
+
+	/* Now retest if dst is aligned.  */
+	_bbci.l	a5, 1, .Ldstaligned
+
+.Ldst2mod4: // dst has 16-bit alignment
+
+	/* Do short sizes byte-by-byte.  */
+	bltui	a4, 8, .Lbyteset
+
+	/* Set 2 bytes.  */
+	s16i	a3, a5, 0
+	addi	a5, a5, 2
+	addi	a4, a4, -2
+
+	/* dst is now aligned; return to main algorithm */
+	j	.Ldstaligned
+
+
+ENTRY (memset)
+	/* a2 = dst, a3 = c, a4 = length */
+
+	/* Duplicate character into all bytes of word.  */
+	extui	a3, a3, 0, 8
+	slli	a7, a3, 8
+	or	a3, a3, a7
+	slli	a7, a3, 16
+	or	a3, a3, a7
+
+	mov	a5, a2		// copy dst so that a2 is return value
+
+	/* Check if dst is unaligned.  */
+	_bbsi.l	a2, 0, .Ldst1mod2
+	_bbsi.l	a2, 1, .Ldst2mod4
+.Ldstaligned:
+
+	/* Get number of loop iterations with 16B per iteration.  */
+	srli	a7, a4, 4
+
+	/* Destination is word-aligned.  */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a7, 2f
+#else
+	beqz	a7, 2f
+	slli	a6, a7, 4
+	add	a6, a6, a5	// a6 = end of last 16B chunk
+#endif
+	/* Set 16 bytes per iteration.  */
+1:	s32i	a3, a5, 0
+	s32i	a3, a5, 4
+	s32i	a3, a5, 8
+	s32i	a3, a5, 12
+	addi	a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+	blt	a5, a6, 1b
+#endif
+
+	/* Set any leftover pieces smaller than 16B.  */
+2:	bbci.l	a4, 3, 3f
+
+	/* Set 8 bytes.  */
+	s32i	a3, a5, 0
+	s32i	a3, a5, 4
+	addi	a5, a5, 8
+
+3:	bbci.l	a4, 2, 4f
+
+	/* Set 4 bytes.  */
+	s32i	a3, a5, 0
+	addi	a5, a5, 4
+
+4:	bbci.l	a4, 1, 5f
+
+	/* Set 2 bytes.  */
+	s16i	a3, a5, 0
+	addi	a5, a5, 2
+
+5:	bbci.l	a4, 0, 6f
+
+	/* Set 1 byte.  */
+	s8i	a3, a5, 0
+6:	retw
+
+libc_hidden_def (memset)
diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S
new file mode 100644
index 000000000..90c418d12
--- /dev/null
+++ b/libc/string/xtensa/strcmp.S
@@ -0,0 +1,313 @@
+/* Optimized strcmp for Xtensa.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define	MASK0 0xff000000
+#define	MASK1 0x00ff0000
+#define	MASK2 0x0000ff00
+#define	MASK3 0x000000ff
+#else
+#define	MASK0 0x000000ff
+#define	MASK1 0x0000ff00
+#define	MASK2 0x00ff0000
+#define	MASK3 0xff000000
+#endif
+
+#define MASK4 0x40404040
+
+	.literal .Lmask0, MASK0
+	.literal .Lmask1, MASK1
+	.literal .Lmask2, MASK2
+	.literal .Lmask3, MASK3
+	.literal .Lmask4, MASK4
+
+	.text
+ENTRY (strcmp)
+	/* a2 = s1, a3 = s2 */
+
+	l8ui	a8, a2, 0	// byte 0 from s1
+	l8ui	a9, a3, 0	// byte 0 from s2
+	movi	a10, 3		// mask
+	bne	a8, a9, .Lretdiff
+
+	or	a11, a2, a3
+	bnone	a11, a10, .Laligned
+
+	xor	a11, a2, a3	// compare low two bits of s1 and s2
+	bany	a11, a10, .Lunaligned	// if they have different alignment
+
+	/* s1/s2 are not word-aligned.  */
+	addi	a2, a2, 1	// advance s1
+	beqz	a8, .Leq	// bytes equal, if zero, strings are equal
+	addi	a3, a3, 1	// advance s2
+	bnone	a2, a10, .Laligned // if s1/s2 now aligned
+	l8ui	a8, a2, 0	// byte 1 from s1
+	l8ui	a9, a3, 0	// byte 1 from s2
+	addi	a2, a2, 1	// advance s1
+	bne	a8, a9, .Lretdiff // if different, return difference
+	beqz	a8, .Leq	// bytes equal, if zero, strings are equal
+	addi	a3, a3, 1	// advance s2
+	bnone	a2, a10, .Laligned // if s1/s2 now aligned
+	l8ui	a8, a2, 0	// byte 2 from s1
+	l8ui	a9, a3, 0	// byte 2 from s2
+	addi	a2, a2, 1	// advance s1
+	bne	a8, a9, .Lretdiff // if different, return difference
+	beqz	a8, .Leq	// bytes equal, if zero, strings are equal
+	addi	a3, a3, 1	// advance s2
+	j	.Laligned
+
+/* s1 and s2 have different alignment.
+
+   If the zero-overhead loop option is available, use an (almost)
+   infinite zero-overhead loop with conditional exits so we only pay
+   for taken branches when exiting the loop.
+
+   Note: It is important for this unaligned case to come before the
+   code for aligned strings, because otherwise some of the branches
+   above cannot reach and have to be transformed to branches around
+   jumps.  The unaligned code is smaller and the branches can reach
+   over it.  */
+
+	.align	4
+	/* (2 mod 4) alignment for loop instruction */
+.Lunaligned:
+#if XCHAL_HAVE_LOOPS
+	_movi.n	a8, 0		// set up for the maximum loop count
+	loop	a8, .Lretdiff	// loop forever (almost anyway)
+#endif
+.Lnextbyte:
+	l8ui	a8, a2, 0
+	l8ui	a9, a3, 0
+	addi	a2, a2, 1
+	bne	a8, a9, .Lretdiff
+	addi	a3, a3, 1
+#if XCHAL_HAVE_LOOPS
+	beqz	a8, .Lretdiff
+#else
+	bnez	a8, .Lnextbyte
+#endif
+.Lretdiff:
+	sub	a2, a8, a9
+	retw
+
+/* s1 is word-aligned; s2 is word-aligned.
+
+   If the zero-overhead loop option is available, use an (almost)
+   infinite zero-overhead loop with conditional exits so we only pay
+   for taken branches when exiting the loop.  */
+
+/* New algorithm, relying on the fact that all normal ASCII is between
+   32 and 127.
+
+   Rather than check all bytes for zero:
+   Take one word (4 bytes).  Call it w1.
+   Shift w1 left by one into w1'.
+   Or w1 and w1'.  For all normal ASCII bit 6 will be 1; for zero it won't.
+   Check that all 4 bit 6's (one for each byte) are one:
+   If they are, we are definitely not done.
+   If they are not, we are probably done, but need to check for zero.  */
+
+	.align	4
+#if XCHAL_HAVE_LOOPS
+.Laligned:
+	.begin	no-transform
+	l32r	a4, .Lmask0	// mask for byte 0
+	l32r	a7, .Lmask4
+	/* Loop forever.  (a4 is more than than the maximum number
+	   of iterations) */
+	loop	a4, .Laligned_done
+
+	/* First unrolled loop body.  */
+	l32i	a8, a2, 0	// get word from s1
+	l32i	a9, a3, 0	// get word from s2
+	slli	a5, a8, 1
+	bne	a8, a9, .Lwne2
+	or	a9, a8, a5
+	bnall	a9, a7, .Lprobeq
+
+	/* Second unrolled loop body.  */
+	l32i	a8, a2, 4	// get word from s1+4
+	l32i	a9, a3, 4	// get word from s2+4
+	slli	a5, a8, 1
+	bne	a8, a9, .Lwne2
+	or	a9, a8, a5
+	bnall	a9, a7, .Lprobeq2
+
+	addi	a2, a2, 8	// advance s1 pointer
+	addi	a3, a3, 8	// advance s2 pointer
+.Laligned_done:
+	or	a1, a1, a1	// nop
+
+.Lprobeq2:
+	/* Adjust pointers to account for the loop unrolling.  */
+	addi	a2, a2, 4
+	addi	a3, a3, 4
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+.Laligned:
+	movi	a4, MASK0	// mask for byte 0
+	movi	a7, MASK4
+	j	.Lfirstword
+.Lnextword:
+	addi	a2, a2, 4	// advance s1 pointer
+	addi	a3, a3, 4	// advance s2 pointer
+.Lfirstword:
+	l32i	a8, a2, 0	// get word from s1
+	l32i	a9, a3, 0	// get word from s2
+	slli	a5, a8, 1
+	bne	a8, a9, .Lwne2
+	or	a9, a8, a5
+	ball	a9, a7, .Lnextword
+#endif /* !XCHAL_HAVE_LOOPS */
+
+	/* align (0 mod 4) */
+.Lprobeq:
+	/* Words are probably equal, but check for sure.
+	   If not, loop over the rest of string using normal algorithm.  */
+
+	bnone	a8, a4, .Leq	// if byte 0 is zero
+	l32r	a5, .Lmask1	// mask for byte 1
+	l32r	a6, .Lmask2	// mask for byte 2
+	bnone	a8, a5, .Leq	// if byte 1 is zero
+	l32r	a7, .Lmask3	// mask for byte 3
+	bnone	a8, a6, .Leq	// if byte 2 is zero
+	bnone	a8, a7, .Leq	// if byte 3 is zero
+	addi.n	a2, a2, 4	// advance s1 pointer
+	addi.n	a3, a3, 4	// advance s2 pointer
+#if XCHAL_HAVE_LOOPS
+
+	/* align (1 mod 4) */
+	loop	a4, .Leq	// loop forever (a4 is bigger than max iters)
+	.end	no-transform
+
+	l32i	a8, a2, 0	// get word from s1
+	l32i	a9, a3, 0	// get word from s2
+	addi	a2, a2, 4	// advance s1 pointer
+	bne	a8, a9, .Lwne
+	bnone	a8, a4, .Leq	// if byte 0 is zero
+	bnone	a8, a5, .Leq	// if byte 1 is zero
+	bnone	a8, a6, .Leq	// if byte 2 is zero
+	bnone	a8, a7, .Leq	// if byte 3 is zero
+	addi	a3, a3, 4	// advance s2 pointer
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+	j	.Lfirstword2
+.Lnextword2:
+	addi	a3, a3, 4	// advance s2 pointer
+.Lfirstword2:
+	l32i	a8, a2, 0	// get word from s1
+	l32i	a9, a3, 0	// get word from s2
+	addi	a2, a2, 4	// advance s1 pointer
+	bne	a8, a9, .Lwne
+	bnone	a8, a4, .Leq	// if byte 0 is zero
+	bnone	a8, a5, .Leq	// if byte 1 is zero
+	bnone	a8, a6, .Leq	// if byte 2 is zero
+	bany	a8, a7, .Lnextword2	// if byte 3 is zero
+#endif /* !XCHAL_HAVE_LOOPS */
+
+	/* Words are equal; some byte is zero.  */
+.Leq:	movi	a2, 0		// return equal
+	retw
+
+.Lwne2:	/* Words are not equal.  On big-endian processors, if none of the
+	   bytes are zero, the return value can be determined by a simple
+	   comparison.  */
+#ifdef __XTENSA_EB__
+	or	a10, a8, a5
+	bnall	a10, a7, .Lsomezero
+	bgeu	a8, a9, .Lposreturn
+	movi	a2, -1
+	retw
+.Lposreturn:
+	movi	a2, 1
+	retw
+.Lsomezero:	// There is probably some zero byte.
+#endif /* __XTENSA_EB__ */
+.Lwne:	/* Words are not equal.  */
+	xor	a2, a8, a9	// get word with nonzero in byte that differs
+	bany	a2, a4, .Ldiff0	// if byte 0 differs
+	movi	a5, MASK1	// mask for byte 1
+	bnone	a8, a4, .Leq	// if byte 0 is zero
+	bany	a2, a5, .Ldiff1	// if byte 1 differs
+	movi	a6, MASK2	// mask for byte 2
+	bnone	a8, a5, .Leq	// if byte 1 is zero
+	bany	a2, a6, .Ldiff2	// if byte 2 differs
+	bnone	a8, a6, .Leq	// if byte 2 is zero
+#ifdef __XTENSA_EB__
+.Ldiff3:
+.Ldiff2:
+.Ldiff1:
+	/* Byte 0 is equal (at least) and there is a difference before a zero
+	   byte.  Just subtract words to get the return value.
+	   The high order equal bytes cancel, leaving room for the sign.  */
+	sub	a2, a8, a9
+	retw
+
+.Ldiff0:
+	/* Need to make room for the sign, so can't subtract whole words.  */
+	extui	a10, a8, 24, 8
+	extui	a11, a9, 24, 8
+	sub	a2, a10, a11
+	retw
+
+#else /* !__XTENSA_EB__ */
+	/* Little-endian is a little more difficult because can't subtract
+	   whole words.  */
+.Ldiff3:
+	/* Bytes 0-2 are equal; byte 3 is different.
+	   For little-endian need to have a sign bit for the difference.  */
+	extui	a10, a8, 24, 8
+	extui	a11, a9, 24, 8
+	sub	a2, a10, a11
+	retw
+
+.Ldiff0:
+	/* Byte 0 is different.  */
+	extui	a10, a8, 0, 8
+	extui	a11, a9, 0, 8
+	sub	a2, a10, a11
+	retw
+
+.Ldiff1:
+	/* Byte 0 is equal; byte 1 is different.  */
+	extui	a10, a8, 8, 8
+	extui	a11, a9, 8, 8
+	sub	a2, a10, a11
+	retw
+
+.Ldiff2:
+	/* Bytes 0-1 are equal; byte 2 is different.  */
+	extui	a10, a8, 16, 8
+	extui	a11, a9, 16, 8
+	sub	a2, a10, a11
+	retw
+
+#endif /* !__XTENSA_EB */
+
+libc_hidden_def (strcmp)
+
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias (strcmp, strcoll)
+libc_hidden_def (strcoll)
+#endif
diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S
new file mode 100644
index 000000000..108070384
--- /dev/null
+++ b/libc/string/xtensa/strcpy.S
@@ -0,0 +1,150 @@
+/* Optimized strcpy for Xtensa.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define	MASK0 0xff000000
+#define	MASK1 0x00ff0000
+#define	MASK2 0x0000ff00
+#define	MASK3 0x000000ff
+#else
+#define	MASK0 0x000000ff
+#define	MASK1 0x0000ff00
+#define	MASK2 0x00ff0000
+#define	MASK3 0xff000000
+#endif
+
+	.text
+ENTRY (strcpy)
+	/* a2 = dst, a3 = src */
+
+	mov	a10, a2		// leave dst in return value register
+	movi	a4, MASK0
+	movi	a5, MASK1
+	movi	a6, MASK2
+	movi	a7, MASK3
+	bbsi.l	a3, 0, .Lsrc1mod2
+	bbsi.l	a3, 1, .Lsrc2mod4
+.Lsrcaligned:
+
+	/* Check if the destination is aligned.  */
+	movi	a8, 3
+	bnone	a10, a8, .Laligned
+
+	j	.Ldstunaligned
+
+.Lsrc1mod2: // src address is odd
+	l8ui	a8, a3, 0	// get byte 0
+	addi	a3, a3, 1	// advance src pointer
+	s8i	a8, a10, 0	// store byte 0
+	beqz	a8, 1f		// if byte 0 is zero
+	addi	a10, a10, 1	// advance dst pointer
+	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
+
+.Lsrc2mod4: // src address is 2 mod 4
+	l8ui	a8, a3, 0	// get byte 0
+	/* 1-cycle interlock */
+	s8i	a8, a10, 0	// store byte 0
+	beqz	a8, 1f		// if byte 0 is zero
+	l8ui	a8, a3, 1	// get byte 0
+	addi	a3, a3, 2	// advance src pointer
+	s8i	a8, a10, 1	// store byte 0
+	addi	a10, a10, 2	// advance dst pointer
+	bnez	a8, .Lsrcaligned
+1:	retw
+
+
+/* dst is word-aligned; src is word-aligned.  */
+
+	.align	4
+#if XCHAL_HAVE_LOOPS
+	/* (2 mod 4) alignment for loop instruction */
+.Laligned:
+	_movi.n	a8, 0		// set up for the maximum loop count
+	loop	a8, .Lz3	// loop forever (almost anyway)
+	l32i	a8, a3, 0	// get word from src
+	addi	a3, a3, 4	// advance src pointer
+	bnone	a8, a4, .Lz0	// if byte 0 is zero
+	bnone	a8, a5, .Lz1	// if byte 1 is zero
+	bnone	a8, a6, .Lz2	// if byte 2 is zero
+	s32i	a8, a10, 0	// store word to dst
+	bnone	a8, a7, .Lz3	// if byte 3 is zero
+	addi	a10, a10, 4	// advance dst pointer
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+1:	addi	a10, a10, 4	// advance dst pointer
+.Laligned:
+	l32i	a8, a3, 0	// get word from src
+	addi	a3, a3, 4	// advance src pointer
+	bnone	a8, a4, .Lz0	// if byte 0 is zero
+	bnone	a8, a5, .Lz1	// if byte 1 is zero
+	bnone	a8, a6, .Lz2	// if byte 2 is zero
+	s32i	a8, a10, 0	// store word to dst
+	bany	a8, a7, 1b	// if byte 3 is zero
+#endif /* !XCHAL_HAVE_LOOPS */
+
+.Lz3:	/* Byte 3 is zero.  */
+	retw
+
+.Lz0:	/* Byte 0 is zero.  */
+#ifdef __XTENSA_EB__
+	movi	a8, 0
+#endif
+	s8i	a8, a10, 0
+	retw
+
+.Lz1:	/* Byte 1 is zero.  */
+#ifdef __XTENSA_EB__
+        extui   a8, a8, 16, 16
+#endif
+	s16i	a8, a10, 0
+	retw
+
+.Lz2:	/* Byte 2 is zero.  */
+#ifdef __XTENSA_EB__
+        extui   a8, a8, 16, 16
+#endif
+	s16i	a8, a10, 0
+	movi	a8, 0
+	s8i	a8, a10, 2
+	retw
+
+	.align	4
+	/* (2 mod 4) alignment for loop instruction */
+.Ldstunaligned:
+
+#if XCHAL_HAVE_LOOPS
+	_movi.n	a8, 0		// set up for the maximum loop count
+	loop	a8, 2f		// loop forever (almost anyway)
+#endif
+1:	l8ui	a8, a3, 0
+	addi	a3, a3, 1
+	s8i	a8, a10, 0
+	addi	a10, a10, 1
+#if XCHAL_HAVE_LOOPS
+	beqz	a8, 2f
+#else
+	bnez	a8, 1b
+#endif
+2:	retw
+
+libc_hidden_def (strcpy)
diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S
new file mode 100644
index 000000000..dd72c16fa
--- /dev/null
+++ b/libc/string/xtensa/strlen.S
@@ -0,0 +1,104 @@
+/* Optimized strlen for Xtensa.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define	MASK0 0xff000000
+#define	MASK1 0x00ff0000
+#define	MASK2 0x0000ff00
+#define	MASK3 0x000000ff
+#else
+#define	MASK0 0x000000ff
+#define	MASK1 0x0000ff00
+#define	MASK2 0x00ff0000
+#define	MASK3 0xff000000
+#endif
+
+	.text
+ENTRY (strlen)
+	/* a2 = s */
+
+	addi	a3, a2, -4	// because we overincrement at the end
+	movi	a4, MASK0
+	movi	a5, MASK1
+	movi	a6, MASK2
+	movi	a7, MASK3
+	bbsi.l	a2, 0, .L1mod2
+	bbsi.l	a2, 1, .L2mod4
+	j	.Laligned
+
+.L1mod2: // address is odd
+	l8ui	a8, a3, 4	// get byte 0
+	addi	a3, a3, 1	// advance string pointer
+	beqz	a8, .Lz3	// if byte 0 is zero
+	bbci.l	a3, 1, .Laligned // if string pointer is now word-aligned
+
+.L2mod4: // address is 2 mod 4
+	addi	a3, a3, 2	// advance ptr for aligned access
+	l32i	a8, a3, 0	// get word with first two bytes of string
+	bnone	a8, a6, .Lz2	// if byte 2 (of word, not string) is zero
+	bany	a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero
+
+	/* Byte 3 is zero.  */
+	addi	a3, a3, 3	// point to zero byte
+	sub	a2, a3, a2	// subtract to get length
+	retw
+
+
+/* String is word-aligned.  */
+
+	.align	4
+	/* (2 mod 4) alignment for loop instruction */
+.Laligned:
+#if XCHAL_HAVE_LOOPS
+	_movi.n	a8, 0		// set up for the maximum loop count
+	loop	a8, .Lz3	// loop forever (almost anyway)
+#endif
+1:	l32i	a8, a3, 4	// get next word of string
+	addi	a3, a3, 4	// advance string pointer
+	bnone	a8, a4, .Lz0	// if byte 0 is zero
+	bnone	a8, a5, .Lz1	// if byte 1 is zero
+	bnone	a8, a6, .Lz2	// if byte 2 is zero
+#if XCHAL_HAVE_LOOPS
+	bnone	a8, a7, .Lz3	// if byte 3 is zero
+#else
+	bany	a8, a7, 1b	// repeat if byte 3 is non-zero
+#endif
+
+.Lz3:	/* Byte 3 is zero.  */
+	addi	a3, a3, 3	// point to zero byte
+	/* Fall through....  */
+
+.Lz0:	/* Byte 0 is zero.  */
+	sub	a2, a3, a2	// subtract to get length
+	retw
+
+.Lz1:	/* Byte 1 is zero.  */
+	addi	a3, a3, 1	// point to zero byte
+	sub	a2, a3, a2	// subtract to get length
+	retw
+
+.Lz2:	/* Byte 2 is zero.  */
+	addi	a3, a3, 2	// point to zero byte
+	sub	a2, a3, a2	// subtract to get length
+	retw
+
+libc_hidden_def (strlen)
diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S
new file mode 100644
index 000000000..7ba2ef77d
--- /dev/null
+++ b/libc/string/xtensa/strncpy.S
@@ -0,0 +1,241 @@
+/* Optimized strcpy for Xtensa.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define	MASK0 0xff000000
+#define	MASK1 0x00ff0000
+#define	MASK2 0x0000ff00
+#define	MASK3 0x000000ff
+#else
+#define	MASK0 0x000000ff
+#define	MASK1 0x0000ff00
+#define	MASK2 0x00ff0000
+#define	MASK3 0xff000000
+#endif
+
+/* Do not use .literal_position in the ENTRY macro.  */
+#undef LITERAL_POSITION
+#define LITERAL_POSITION
+
+	.text
+	.align	4
+	.literal_position
+__strncpy_aux:
+
+.Lsrc1mod2: // src address is odd
+	l8ui	a8, a3, 0	// get byte 0
+	addi	a3, a3, 1	// advance src pointer
+	s8i	a8, a10, 0	// store byte 0
+	addi	a4, a4, -1	// decrement n
+	beqz    a4, .Lret       // if n is zero
+	addi	a10, a10, 1	// advance dst pointer
+	beqz	a8, .Lfill	// if byte 0 is zero
+	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
+
+.Lsrc2mod4: // src address is 2 mod 4
+	l8ui	a8, a3, 0	// get byte 0
+	addi	a4, a4, -1	// decrement n
+	s8i	a8, a10, 0	// store byte 0
+	beqz    a4, .Lret       // if n is zero
+	addi	a10, a10, 1	// advance dst pointer
+	beqz	a8, .Lfill	// if byte 0 is zero
+	l8ui	a8, a3, 1	// get byte 0
+	addi	a3, a3, 2	// advance src pointer
+	s8i	a8, a10, 0	// store byte 0
+	addi	a4, a4, -1	// decrement n
+	beqz    a4, .Lret       // if n is zero
+	addi	a10, a10, 1	// advance dst pointer
+	bnez	a8, .Lsrcaligned
+	j	.Lfill
+
+.Lret:
+	retw
+
+
+ENTRY (strncpy)
+	/* a2 = dst, a3 = src */
+
+	mov	a10, a2		// leave dst in return value register
+	beqz    a4, .Lret       // if n is zero
+
+	movi	a11, MASK0
+	movi	a5, MASK1
+	movi	a6, MASK2
+	movi	a7, MASK3
+	bbsi.l	a3, 0, .Lsrc1mod2
+	bbsi.l	a3, 1, .Lsrc2mod4
+.Lsrcaligned:
+
+	/* Check if the destination is aligned.  */
+	movi	a8, 3
+	bnone	a10, a8, .Laligned
+
+	j	.Ldstunaligned
+
+
+/* Fill the dst with zeros -- n is at least 1.  */
+
+.Lfill:
+	movi	a9, 0
+	bbsi.l	a10, 0, .Lfill1mod2
+	bbsi.l	a10, 1, .Lfill2mod4
+.Lfillaligned:
+	blti	a4, 4, .Lfillcleanup
+
+	/* Loop filling complete words with zero.  */
+#if XCHAL_HAVE_LOOPS
+
+	srai	a8, a4, 2
+	loop	a8, 1f
+	s32i	a9, a10, 0
+	addi	a10, a10, 4
+
+1:	slli	a8, a8, 2
+	sub	a4, a4, a8
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+1:	s32i	a9, a10, 0
+	addi	a10, a10, 4
+	addi	a4, a4, -4
+	bgei    a4, 4, 1b
+
+#endif /* !XCHAL_HAVE_LOOPS */
+
+	beqz	a4, 2f
+
+.Lfillcleanup:
+	/* Fill leftover (1 to 3) bytes with zero.  */
+	s8i	a9, a10, 0	// store byte 0
+	addi	a4, a4, -1	// decrement n
+	addi	a10, a10, 1
+	bnez    a4, .Lfillcleanup 
+
+2:	retw
+	
+.Lfill1mod2: // dst address is odd
+	s8i	a9, a10, 0	// store byte 0
+	addi	a4, a4, -1	// decrement n
+	beqz    a4, 2b		// if n is zero
+	addi    a10, a10, 1	// advance dst pointer
+	bbci.l	a10, 1, .Lfillaligned // if dst is now word-aligned
+
+.Lfill2mod4: // dst address is 2 mod 4
+	s8i	a9, a10, 0	// store byte 0
+	addi	a4, a4, -1	// decrement n
+	beqz    a4, 2b		// if n is zero
+	s8i	a9, a10, 1	// store byte 1
+	addi	a4, a4, -1	// decrement n
+	beqz    a4, 2b		// if n is zero
+	addi    a10, a10, 2	// advance dst pointer
+	j	.Lfillaligned
+
+
+/* dst is word-aligned; src is word-aligned; n is at least 1.  */
+
+	.align	4
+	/* (2 mod 4) alignment for loop instruction */
+.Laligned:
+#if XCHAL_HAVE_LOOPS
+	_movi.n	a8, 0		// set up for the maximum loop count
+	loop	a8, 1f		// loop forever (almost anyway)
+	blti	a4, 5, .Ldstunaligned // n is near limit; do one at a time
+	l32i	a8, a3, 0	// get word from src
+	addi	a3, a3, 4	// advance src pointer
+	bnone	a8, a11, .Lz0	// if byte 0 is zero
+	bnone	a8, a5, .Lz1	// if byte 1 is zero
+	bnone	a8, a6, .Lz2	// if byte 2 is zero
+	s32i	a8, a10, 0	// store word to dst
+	addi	a4, a4, -4	// decrement n
+	addi	a10, a10, 4	// advance dst pointer
+	bnone	a8, a7, .Lfill	// if byte 3 is zero
+1:	
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+1:	blti	a4, 5, .Ldstunaligned // n is near limit; do one at a time
+	l32i	a8, a3, 0	// get word from src
+	addi	a3, a3, 4	// advance src pointer
+	bnone	a8, a11, .Lz0	// if byte 0 is zero
+	bnone	a8, a5, .Lz1	// if byte 1 is zero
+	bnone	a8, a6, .Lz2	// if byte 2 is zero
+	s32i	a8, a10, 0	// store word to dst
+	addi	a4, a4, -4	// decrement n
+	addi	a10, a10, 4	// advance dst pointer
+	bany	a8, a7, 1b	// no zeroes
+#endif /* !XCHAL_HAVE_LOOPS */
+
+	j	.Lfill
+
+.Lz0:	/* Byte 0 is zero.  */
+#ifdef __XTENSA_EB__
+	movi	a8, 0
+#endif
+	s8i	a8, a10, 0
+	addi	a4, a4, -1	// decrement n
+	addi	a10, a10, 1	// advance dst pointer
+	j	.Lfill
+
+.Lz1:	/* Byte 1 is zero.  */
+#ifdef __XTENSA_EB__
+        extui   a8, a8, 16, 16
+#endif
+	s16i	a8, a10, 0
+	addi	a4, a4, -2	// decrement n
+	addi	a10, a10, 2	// advance dst pointer
+	j	.Lfill
+
+.Lz2:	/* Byte 2 is zero.  */
+#ifdef __XTENSA_EB__
+        extui   a8, a8, 16, 16
+#endif
+	s16i	a8, a10, 0
+	movi	a8, 0
+	s8i	a8, a10, 2
+	addi	a4, a4, -3	// decrement n
+	addi	a10, a10, 3	// advance dst pointer
+	j	.Lfill
+
+	.align	4
+	/* (2 mod 4) alignment for loop instruction */
+.Ldstunaligned:
+
+#if XCHAL_HAVE_LOOPS
+	_movi.n	a8, 0		// set up for the maximum loop count
+	loop	a8, 2f		// loop forever (almost anyway)
+#endif
+1:	l8ui	a8, a3, 0
+	addi	a3, a3, 1
+	s8i	a8, a10, 0
+	addi	a4, a4, -1
+	beqz	a4, 3f
+	addi	a10, a10, 1
+#if XCHAL_HAVE_LOOPS
+	beqz	a8, 2f
+#else
+	bnez	a8, 1b
+#endif
+2:	j	.Lfill
+
+3:	retw
+
+libc_hidden_def (strncpy)