summaryrefslogtreecommitdiffstats
path: root/libc/string
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string')
-rw-r--r--libc/string/avr32/Makefile26
-rw-r--r--libc/string/avr32/bcopy.S26
-rw-r--r--libc/string/avr32/bzero.S24
-rw-r--r--libc/string/avr32/memcmp.S61
-rw-r--r--libc/string/avr32/memcpy.S111
-rw-r--r--libc/string/avr32/memmove.S116
-rw-r--r--libc/string/avr32/memset.S65
-rw-r--r--libc/string/avr32/strcmp.S91
-rw-r--r--libc/string/avr32/strlen.S62
-rw-r--r--libc/string/cris/memcopy.h62
-rw-r--r--libc/string/cris/memcpy.c264
-rw-r--r--libc/string/cris/memmove.c101
-rw-r--r--libc/string/cris/memset.c271
-rw-r--r--libc/string/cris/strcpy.c51
-rw-r--r--libc/string/cris/strncpy.c61
-rw-r--r--libc/string/xtensa/Makefile13
-rw-r--r--libc/string/xtensa/memcpy.S297
-rw-r--r--libc/string/xtensa/memset.S165
-rw-r--r--libc/string/xtensa/strcmp.S313
-rw-r--r--libc/string/xtensa/strcpy.S150
-rw-r--r--libc/string/xtensa/strlen.S104
-rw-r--r--libc/string/xtensa/strncpy.S241
22 files changed, 2675 insertions, 0 deletions
diff --git a/libc/string/avr32/Makefile b/libc/string/avr32/Makefile
new file mode 100644
index 000000000..0002ffdce
--- /dev/null
+++ b/libc/string/avr32/Makefile
@@ -0,0 +1,26 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org>
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Library General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Library General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+top_srcdir := ../../../
+top_builddir := ../../../
+
+all: objs
+
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/avr32/bcopy.S b/libc/string/avr32/bcopy.S
new file mode 100644
index 000000000..e1d173165
--- /dev/null
+++ b/libc/string/avr32/bcopy.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+
+ .text
+ .global bcopy
+ .type bcopy, @function
+ .align 1
+bcopy:
+ /* Swap the first two arguments */
+ eor r11, r12
+ eor r12, r11
+ eor r11, r12
+ rjmp HIDDEN_JUMPTARGET(memmove)
+
+ .size bcopy, . - bcopy
+
+#endif /* __UCLIBC_SUSV3_LEGACY__ */
diff --git a/libc/string/avr32/bzero.S b/libc/string/avr32/bzero.S
new file mode 100644
index 000000000..928148dcb
--- /dev/null
+++ b/libc/string/avr32/bzero.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+
+ .text
+ .global bzero
+ .type bzero, @function
+ .align 1
+bzero:
+ mov r10, r11
+ mov r11, 0
+ rjmp HIDDEN_JUMPTARGET(memset)
+
+ .size bzero, . - bzero
+
+#endif /* __UCLIBC_SUSV3_LEGACY__ */
diff --git a/libc/string/avr32/memcmp.S b/libc/string/avr32/memcmp.S
new file mode 100644
index 000000000..5d7eac3d9
--- /dev/null
+++ b/libc/string/avr32/memcmp.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s1 r12
+#define s2 r11
+#define len r10
+
+ .text
+ .global memcmp
+ .type memcmp, @function
+ .align 1
+memcmp:
+ sub len, 4
+ brlt .Lless_than_4
+
+1: ld.w r8, s1++
+ ld.w r9, s2++
+ cp.w r8, r9
+ brne .Lfound_word
+ sub len, 4
+ brge 1b
+
+.Lless_than_4:
+ sub len, -4
+ reteq 0
+
+1: ld.ub r8, s1++
+ ld.ub r9, s2++
+ sub r8, r9
+ retne r8
+ sub len, 1
+ brgt 1b
+
+ retal 0
+
+.Lfound_word:
+ mov len, 4
+
+2: bfextu r11, r9, 24, 8
+ bfextu r12, r8, 24, 8
+ sub r12, r11
+ retne r12
+ lsl r8, 8
+ lsl r9, 8
+ sub len, 1
+ brne 2b
+ retal r12
+
+ .size memcmp, . - memcmp
+
+libc_hidden_def(memcmp)
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias(memcmp,bcmp)
+#endif
diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S
new file mode 100644
index 000000000..f95aabd13
--- /dev/null
+++ b/libc/string/avr32/memcpy.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+/* Don't use r12 as dst since we must return it unmodified */
+#define dst r9
+#define src r11
+#define len r10
+
+ .text
+ .global memcpy
+ .type memcpy, @function
+memcpy:
+ pref src[0]
+ mov dst, r12
+
+ /* If we have less than 32 bytes, don't do anything fancy */
+ cp.w len, 32
+ brge .Lmore_than_31
+
+ sub len, 1
+ retlt r12
+1: ld.ub r8, src++
+ st.b dst++, r8
+ sub len, 1
+ brge 1b
+ retal r12
+
+.Lmore_than_31:
+ pushm r0-r7, lr
+
+ /* Check alignment */
+ mov r8, src
+ andl r8, 31, COH
+ brne .Lunaligned_src
+ mov r8, dst
+ andl r8, 3, COH
+ brne .Lunaligned_dst
+
+.Laligned_copy:
+ sub len, 32
+ brlt .Lless_than_32
+
+1: /* Copy 32 bytes at a time */
+ ldm src, r0-r7
+ sub src, -32
+ stm dst, r0-r7
+ sub dst, -32
+ sub len, 32
+ brge 1b
+
+.Lless_than_32:
+ /* Copy 16 more bytes if possible */
+ sub len, -16
+ brlt .Lless_than_16
+ ldm src, r0-r3
+ sub src, -16
+ sub len, 16
+ stm dst, r0-r3
+ sub dst, -16
+
+.Lless_than_16:
+ /* Do the remaining as byte copies */
+ neg len
+ add pc, pc, len << 2
+ .rept 15
+ ld.ub r0, src++
+ st.b dst++, r0
+ .endr
+
+ popm r0-r7, pc
+
+.Lunaligned_src:
+ /* Make src cacheline-aligned. r8 = (src & 31) */
+ rsub r8, r8, 32
+ sub len, r8
+1: ld.ub r0, src++
+ st.b dst++, r0
+ sub r8, 1
+ brne 1b
+
+ /* If dst is word-aligned, we're ready to go */
+ pref src[0]
+ mov r8, 3
+ tst dst, r8
+ breq .Laligned_copy
+
+.Lunaligned_dst:
+ /* src is aligned, but dst is not. Expect bad performance */
+ sub len, 4
+ brlt 2f
+1: ld.w r0, src++
+ st.w dst++, r0
+ sub len, 4
+ brge 1b
+
+2: neg len
+ add pc, pc, len << 2
+ .rept 3
+ ld.ub r0, src++
+ st.b dst++, r0
+ .endr
+
+ popm r0-r7, pc
+ .size memcpy, . - memcpy
+
+libc_hidden_def(memcpy)
diff --git a/libc/string/avr32/memmove.S b/libc/string/avr32/memmove.S
new file mode 100644
index 000000000..8ca4da54d
--- /dev/null
+++ b/libc/string/avr32/memmove.S
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#define dst r12
+#define src r11
+#define len r10
+
+ .text
+ .global memmove
+ .type memmove, @function
+memmove:
+ cp.w src, dst
+ brge HIDDEN_JUMPTARGET(memcpy)
+
+ add dst, len
+ add src, len
+ pref src[-1]
+
+ /*
+ * The rest is basically the same as in memcpy.S except that
+ * the direction is reversed.
+ */
+ cp.w len, 32
+ brge .Lmore_than_31
+
+ sub len, 1
+ retlt r12
+1: ld.ub r8, --src
+ st.b --dst, r8
+ sub len, 1
+ brge 1b
+ retal r12
+
+.Lmore_than_31:
+ pushm r0-r7, lr
+
+ /* Check alignment */
+ mov r8, src
+ andl r8, 31, COH
+ brne .Lunaligned_src
+ mov r8, r12
+ andl r8, 3, COH
+ brne .Lunaligned_dst
+
+.Laligned_copy:
+ sub len, 32
+ brlt .Lless_than_32
+
+1: /* Copy 32 bytes at a time */
+ sub src, 32
+ ldm src, r0-r7
+ sub dst, 32
+ sub len, 32
+ stm dst, r0-r7
+ brge 1b
+
+.Lless_than_32:
+ /* Copy 16 more bytes if possible */
+ sub len, -16
+ brlt .Lless_than_16
+ sub src, 16
+ ldm src, r0-r3
+ sub dst, 16
+ sub len, 16
+ stm dst, r0-r3
+
+.Lless_than_16:
+ /* Do the remaining as byte copies */
+ sub len, -16
+ breq 2f
+1: ld.ub r0, --src
+ st.b --dst, r0
+ sub len, 1
+ brne 1b
+
+2: popm r0-r7, pc
+
+.Lunaligned_src:
+ /* Make src cacheline-aligned. r8 = (src & 31) */
+ sub len, r8
+1: ld.ub r0, --src
+ st.b --dst, r0
+ sub r8, 1
+ brne 1b
+
+ /* If dst is word-aligned, we're ready to go */
+ pref src[-4]
+ mov r8, 3
+ tst dst, r8
+ breq .Laligned_copy
+
+.Lunaligned_dst:
+ /* src is aligned, but dst is not. Expect bad performance */
+ sub len, 4
+ brlt 2f
+1: ld.w r0, --src
+ st.w --dst, r0
+ sub len, 4
+ brge 1b
+
+2: neg len
+ add pc, pc, len << 2
+ .rept 3
+ ld.ub r0, --src
+ st.b --dst, r0
+ .endr
+
+ popm r0-r7, pc
+ .size memmove, . - memmove
+
+libc_hidden_def(memmove)
diff --git a/libc/string/avr32/memset.S b/libc/string/avr32/memset.S
new file mode 100644
index 000000000..964bf4834
--- /dev/null
+++ b/libc/string/avr32/memset.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s r12
+#define c r11
+#define n r10
+
+ .text
+ .global memset
+ .type memset, @function
+
+ .align 1
+memset:
+ cp.w n, 32
+ mov r9, s
+ brge .Llarge_memset
+
+ sub n, 1
+ retlt s
+1: st.b s++, c
+ sub n, 1
+ brge 1b
+
+ retal r9
+
+.Llarge_memset:
+ mov r8, r11
+ mov r11, 3
+ bfins r8, r8, 8, 8
+ bfins r8, r8, 16, 16
+ tst s, r11
+ breq 2f
+
+1: st.b s++, r8
+ sub n, 1
+ tst s, r11
+ brne 1b
+
+2: mov r11, r9
+ mov r9, r8
+ sub n, 8
+
+3: st.d s++, r8
+ sub n, 8
+ brge 3b
+
+ /* If we are done, n == -8 and we'll skip all st.b insns below */
+ neg n
+ lsl n, 1
+ add pc, n
+ .rept 7
+ st.b s++, r8
+ .endr
+ retal r11
+
+ .size memset, . - memset
+
+libc_hidden_def(memset)
diff --git a/libc/string/avr32/strcmp.S b/libc/string/avr32/strcmp.S
new file mode 100644
index 000000000..e9f087577
--- /dev/null
+++ b/libc/string/avr32/strcmp.S
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define s1 r12
+#define s2 r11
+#define len r10
+
+ .text
+ .global strcmp
+ .type strcmp, @function
+ .align 1
+strcmp:
+ mov r8, 3
+ tst s1, r8
+ brne .Lunaligned_s1
+ tst s2, r8
+ brne .Lunaligned_s2
+
+1: ld.w r8, s1++
+ ld.w r9, s2++
+ cp.w r8, r9
+ brne 2f
+ tnbz r8
+ brne 1b
+ retal 0
+
+2: bfextu r12, r8, 24, 8
+ bfextu r11, r9, 24, 8
+ sub r12, r11
+ retne r12
+ cp.w r11, 0
+ reteq 0
+ bfextu r12, r8, 16, 8
+ bfextu r11, r9, 16, 8
+ sub r12, r11
+ retne r12
+ cp.w r11, 0
+ reteq 0
+ bfextu r12, r8, 8, 8
+ bfextu r11, r9, 8, 8
+ sub r12, r11
+ retne r12
+ cp.w r11, 0
+ reteq 0
+ bfextu r12, r8, 0, 8
+ bfextu r11, r9, 0, 8
+ sub r12, r11
+ retal r12
+
+.Lunaligned_s1:
+3: tst s1, r8
+ breq 4f
+ ld.ub r10, s1++
+ ld.ub r9, s2++
+ sub r10, r9
+ retne r10
+ cp.w r9, 0
+ brne 3b
+ retal r10
+
+4: tst s2, r8
+ breq 1b
+
+.Lunaligned_s2:
+ /*
+ * s1 and s2 can't both be aligned, and unaligned word loads
+ * can trigger spurious exceptions if we cross a page boundary.
+ * Do it the slow way...
+ */
+1: ld.ub r8, s1++
+ ld.ub r9, s2++
+ sub r8, r9
+ retne r8
+ cp.w r9, 0
+ brne 1b
+ retal 0
+
+ .size strcmp, . - strcmp
+
+libc_hidden_def(strcmp)
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias(strcmp, strcoll)
+libc_hidden_def(strcoll)
+#endif
diff --git a/libc/string/avr32/strlen.S b/libc/string/avr32/strlen.S
new file mode 100644
index 000000000..d2808998d
--- /dev/null
+++ b/libc/string/avr32/strlen.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+#include <features.h>
+
+#define str r12
+
+ .text
+ .global strlen
+ .type strlen, @function
+strlen:
+ mov r11, r12
+
+ mov r9, str
+ andl r9, 3, COH
+ brne .Lunaligned_str
+
+1: ld.w r8, str++
+ tnbz r8
+ brne 1b
+
+ sub r12, r11
+ bfextu r9, r8, 24, 8
+ cp.w r9, 0
+ subeq r12, 4
+ reteq r12
+ bfextu r9, r8, 16, 8
+ cp.w r9, 0
+ subeq r12, 3
+ reteq r12
+ bfextu r9, r8, 8, 8
+ cp.w r9, 0
+ subeq r12, 2
+ reteq r12
+ sub r12, 1
+ retal r12
+
+.Lunaligned_str:
+ add pc, pc, r9 << 3
+ sub r0, r0, 0 /* 4-byte nop */
+ ld.ub r8, str++
+ sub r8, r8, 0
+ breq 1f
+ ld.ub r8, str++
+ sub r8, r8, 0
+ breq 1f
+ ld.ub r8, str++
+ sub r8, r8, 0
+ brne 1b
+
+1: sub r12, 1
+ sub r12, r11
+ retal r12
+
+ .size strlen, . - strlen
+
+libc_hidden_def(strlen)
diff --git a/libc/string/cris/memcopy.h b/libc/string/cris/memcopy.h
new file mode 100644
index 000000000..0eae998da
--- /dev/null
+++ b/libc/string/cris/memcopy.h
@@ -0,0 +1,62 @@
+/* Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ Modified for use in uClibc (C) 2007 Axis Communications AB.
+ Minimal modifications: include path name and #undef of WORD_COPY_FWD/BWD
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include "../generic/memcopy.h"
+
+/* We override the word-copying macros, partly because misalignment in one
+ pointer isn't cause for a special function, partly because we want to
+ get rid of all the static functions in generic/memcopy.c; these macros
+ are only used in memmove.c since we have arch-specific mempcpy, memcpy and
+ memset. */
+
+#undef OP_T_THRES
+#define OP_T_THRES OPSIZ
+
+#undef WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
+ do \
+ { \
+ unsigned long enddst_bp = dst_bp + nbytes - (nbytes % OPSIZ); \
+ nbytes_left = (nbytes % OPSIZ); \
+ while (dst_bp < (unsigned long) enddst_bp) \
+ { \
+ op_t x = *(op_t *) src_bp; \
+ src_bp += sizeof x; \
+ *(op_t *) dst_bp = x; \
+ dst_bp += sizeof x; \
+ } \
+ } while (0)
+
+#undef WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_bp, src_bp, nbytes_left, nbytes) \
+ do \
+ { \
+ unsigned long enddst_bp = dst_bp - nbytes + (nbytes % OPSIZ); \
+ nbytes_left = (nbytes % OPSIZ); \
+ while (dst_bp > enddst_bp) \
+ { \
+ op_t x; \
+ src_bp -= sizeof x; \
+ x = *(op_t *) src_bp; \
+ dst_bp -= sizeof x; \
+ *(op_t *) dst_bp = x; \
+ } \
+ } while (0)
diff --git a/libc/string/cris/memcpy.c b/libc/string/cris/memcpy.c
new file mode 100644
index 000000000..a85108109
--- /dev/null
+++ b/libc/string/cris/memcpy.c
@@ -0,0 +1,264 @@
+/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1994, 1995, 2000 Axis Communications AB.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/*#************************************************************************#*/
+/*#-------------------------------------------------------------------------*/
+/*# */
+/*# FUNCTION NAME: memcpy() */
+/*# */
+/*# PARAMETERS: void* dst; Destination address. */
+/*# void* src; Source address. */
+/*# int len; Number of bytes to copy. */
+/*# */
+/*# RETURNS: dst. */
+/*# */
+/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */
+/*# about copying of overlapping memory areas. This routine is */
+/*# very sensitive to compiler changes in register allocation. */
+/*# Should really be rewritten to avoid this problem. */
+/*# */
+/*#-------------------------------------------------------------------------*/
+/*# */
+/*# HISTORY */
+/*# */
+/*# DATE NAME CHANGES */
+/*# ---- ---- ------- */
+/*# 941007 Kenny R Creation */
+/*# 941011 Kenny R Lots of optimizations and inlining. */
+/*# 941129 Ulf A Adapted for use in libc. */
+/*# 950216 HP N==0 forgotten if non-aligned src/dst. */
+/*# Added some optimizations. */
+/*# 001025 HP Make src and dst char *. Align dst to */
+/*# dword, not just word-if-both-src-and-dst- */
+/*# are-misaligned. */
+/*# 070806 RW Modified for uClibc */
+/*# (__arch_v32 -> __CONFIG_CRISV32__, */
+/*# include features.h to reach it.) */
+/*# */
+/*#-------------------------------------------------------------------------*/
+
+#include <features.h>
+
+#ifdef __CONFIG_CRISV32__
+/* For CRISv32, movem is very cheap. */
+#define MEMCPY_BLOCK_THRESHOLD (44)
+#else
+/* Break even between movem and move16 is at 38.7*2, but modulo 44. */
+#define MEMCPY_BLOCK_THRESHOLD (44*2)
+#endif
+
+void *memcpy(void *, const void *, unsigned int);
+
+libc_hidden_proto(memcpy)
+void *memcpy(void *pdst,
+ const void *psrc,
+ unsigned int pn)
+{
+ /* Ok. Now we want the parameters put in special registers.
+ Make sure the compiler is able to make something useful of this.
+ As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+
+ If gcc was allright, it really would need no temporaries, and no
+ stack space to save stuff on. */
+
+#ifndef MEMPCPY
+ register void *return_dst __asm__ ("r10") = pdst;
+#else
+ /* FIXME: Use R10 for something. */
+# define return_dst dst
+#endif
+
+ register char *dst __asm__ ("r13") = pdst;
+ register char *src __asm__ ("r11") = (char *) psrc;
+ register int n __asm__ ("r12") = pn;
+
+
+ /* When src is aligned but not dst, this makes a few extra needless
+ cycles. I believe it would take as many to check that the
+ re-alignment was unnecessary. */
+ if (((unsigned long) dst & 3) != 0
+ /* Don't align if we wouldn't copy more than a few bytes; so we
+ don't have to check further for overflows. */
+ && n >= 3)
+ {
+ if ((unsigned long) dst & 1)
+ {
+ n--;
+ *(char*)dst = *(char*)src;
+ src++;
+ dst++;
+ }
+
+ if ((unsigned long) dst & 2)
+ {
+ n -= 2;
+ *(short*)dst = *(short*)src;
+ src += 2;
+ dst += 2;
+ }
+ }
+
+ /* Decide which copying method to use. */
+ if (n >= MEMCPY_BLOCK_THRESHOLD)
+ {
+ /* For large copies we use 'movem' */
+
+ /* It is not optimal to tell the compiler about clobbering any
+ registers; that will move the saving/restoring of those registers
+ to the function prologue/epilogue, and make non-movem sizes
+ suboptimal.
+
+ This method is not foolproof; it assumes that the "register asm"
+ declarations at the beginning of the function really are used
+ here (beware: they may be moved to temporary registers).
+ This way, we do not have to save/move the registers around into
+ temporaries; we can safely use them straight away. */
+ __asm__ volatile ("\
+ .syntax no_register_prefix \n\
+ \n\
+ ;; Check that the register asm declaration got right. \n\
+ ;; The GCC manual explicitly says TRT will happen. \n\
+ .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
+ .err \n\
+ .endif \n\
+ \n\
+ ;; Save the registers we'll use in the movem process \n\
+ ;; on the stack. \n\
+ subq 11*4,sp \n\
+ movem r10,[sp] \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r11 - src \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 44,r12 \n\
+0: \n\
+ movem [r11+],r10 \n\
+ subq 44,r12 \n\
+ bge 0b \n\
+ movem r10,[r13+] \n\
+ \n\
+ addq 44,r12 ;; compensate for last loop underflowing n \n\
+ \n\
+ ;; Restore registers from stack \n\
+ movem [sp+],r10"
+
+ /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
+ /* Inputs */ : "0" (dst), "1" (src), "2" (n));
+ }
+
+ /* Either we directly starts copying, using dword copying
+ in a loop, or we copy as much as possible with 'movem'
+ and then the last block (<44 bytes) is copied here.
+ This will work since 'movem' will have updated src,dst,n. */
+
+ while ( n >= 16 )
+ {
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ n -= 16;
+ }
+
+ /* A switch() is definitely the fastest although it takes a LOT of code.
+ * Particularly if you inline code this.
+ */
+ switch (n)
+ {
+ case 0:
+ break;
+ case 1:
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 2:
+ *((short*)dst)++ = *((short*)src)++;
+ break;
+ case 3:
+ *((short*)dst)++ = *((short*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 4:
+ *((long*)dst)++ = *((long*)src)++;
+ break;
+ case 5:
+ *((long*)dst)++ = *((long*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 6:
+ *((long*)dst)++ = *((long*)src)++;
+ *((short*)dst)++ = *((short*)src)++;
+ break;
+ case 7:
+ *((long*)dst)++ = *((long*)src)++;
+ *((short*)dst)++ = *((short*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 8:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ break;
+ case 9:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 10:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((short*)dst)++ = *((short*)src)++;
+ break;
+ case 11:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((short*)dst)++ = *((short*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 12:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ break;
+ case 13:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ case 14:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((short*)dst)++ = *((short*)src)++;
+ break;
+ case 15:
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((long*)dst)++ = *((long*)src)++;
+ *((short*)dst)++ = *((short*)src)++;
+ *((char*)dst)++ = *((char*)src)++;
+ break;
+ }
+
+ return return_dst; /* destination pointer. */
+} /* memcpy() */
+libc_hidden_def(memcpy)
diff --git a/libc/string/cris/memmove.c b/libc/string/cris/memmove.c
new file mode 100644
index 000000000..437637078
--- /dev/null
+++ b/libc/string/cris/memmove.c
@@ -0,0 +1,101 @@
+/* Taken from generic/memmove.c; trivially modified to work with
+ arch-specific memcopy.h for Cris.
+
+ Copy memory to memory until the specified number of bytes
+ has been copied. Overlap is handled correctly.
+ Copyright (C) 1991, 1995, 1996, 1997, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <string.h>
+
+#include "memcopy.h"
+#include "../generic/pagecopy.h"
+
+libc_hidden_proto(memmove)
+void *memmove (void *dest, const void *src, size_t len)
+{
+ unsigned long int dstp = (long int) dest;
+ unsigned long int srcp = (long int) src;
+
+ /* This test makes the forward copying code be used whenever possible.
+ Reduces the working set. */
+ if (dstp - srcp >= len) /* *Unsigned* compare! */
+ {
+#if 1
+#warning REMINDER: Cris arch-opt memmove assumes memcpy does forward copying!
+ memcpy(dest, src, len);
+#else
+ /* Copy from the beginning to the end. */
+
+ /* If there not too few bytes to copy, use word copy. */
+ if (len >= OP_T_THRES)
+ {
+ /* Copy just a few bytes to make DSTP aligned. */
+ len -= (-dstp) % OPSIZ;
+ BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);
+
+ /* Copy whole pages from SRCP to DSTP by virtual address
+ manipulation, as much as possible. */
+
+ PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);
+
+ /* Copy from SRCP to DSTP taking advantage of the known
+ alignment of DSTP. Number of bytes remaining is put
+ in the third argument, i.e. in LEN. This number may
+ vary from machine to machine. */
+
+ WORD_COPY_FWD (dstp, srcp, len, len);
+
+ /* Fall out and copy the tail. */
+ }
+
+ /* There are just a few bytes to copy. Use byte memory operations. */
+ BYTE_COPY_FWD (dstp, srcp, len);
+#endif
+ }
+ else
+ {
+ /* Copy from the end to the beginning. */
+ srcp += len;
+ dstp += len;
+
+ /* If there not too few bytes to copy, use word copy. */
+ if (len >= OP_T_THRES)
+ {
+ /* Copy just a few bytes to make DSTP aligned. */
+ len -= dstp % OPSIZ;
+ BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);
+
+ /* Copy from SRCP to DSTP taking advantage of the known
+ alignment of DSTP. Number of bytes remaining is put
+ in the third argument, i.e. in LEN. This number may
+ vary from machine to machine. */
+
+ WORD_COPY_BWD (dstp, srcp, len, len);
+
+ /* Fall out and copy the tail. */
+ }
+
+ /* There are just a few bytes to copy. Use byte memory operations. */
+ BYTE_COPY_BWD (dstp, srcp, len);
+ }
+
+ return (dest);
+}
+libc_hidden_def(memmove)
diff --git a/libc/string/cris/memset.c b/libc/string/cris/memset.c
new file mode 100644
index 000000000..7e71bc50f
--- /dev/null
+++ b/libc/string/cris/memset.c
@@ -0,0 +1,271 @@
+/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000 Axis Communications AB.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/*#************************************************************************#*/
+/*#-------------------------------------------------------------------------*/
+/*# */
+/*# FUNCTION NAME: memset() */
+/*# */
+/*# PARAMETERS: void* dst; Destination address. */
+/*# int c; Value of byte to write. */
+/*# int len; Number of bytes to write. */
+/*# */
+/*# RETURNS: dst. */
+/*# */
+/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
+/*# Framework taken from memcpy. This routine is */
+/*# very sensitive to compiler changes in register allocation. */
+/*# Should really be rewritten to avoid this problem. */
+/*# */
+/*#-------------------------------------------------------------------------*/
+/*# */
+/*# HISTORY */
+/*# */
+/*# DATE NAME CHANGES */
+/*# ---- ---- ------- */
+/*# 990713 HP Tired of watching this function (or */
+/*# really, the nonoptimized generic */
+/*# implementation) take up 90% of simulator */
+/*# output. Measurements needed. */
+/*# */
+/*#-------------------------------------------------------------------------*/
+
+/* No, there's no macro saying 12*4, since it is "hard" to get it into
+ the asm in a good way. Thus better to expose the problem everywhere.
+ */
+
+/* Assuming 1 cycle per dword written or read (ok, not really true), and
+ one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
+ so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
+
+#define ZERO_BLOCK_SIZE (1*12*4)
+
+void *memset(void *, int, unsigned long);
+
+libc_hidden_proto(memset)
+void *memset(void *pdst,
+ int c,
+ unsigned long plen)
+{
+ /* Ok. Now we want the parameters put in special registers.
+ Make sure the compiler is able to make something useful of this. */
+
+ register char *return_dst __asm__ ("r10") = pdst;
+ register long n __asm__ ("r12") = plen;
+ register int lc __asm__ ("r11") = c;
+
+ /* Most apps use memset sanely. Only those memsetting about 3..4
+ bytes or less get penalized compared to the generic implementation
+ - and that's not really sane use. */
+
+ /* Ugh. This is fragile at best. Check with newer GCC releases, if
+ they compile cascaded "x |= x << 8" sanely! */
+ __asm__("movu.b %0,$r13 \n\
+ lslq 8,$r13 \n\
+ move.b %0,$r13 \n\
+ move.d $r13,%0 \n\
+ lslq 16,$r13 \n\
+ or.d $r13,%0"
+ : "=r" (lc) : "0" (lc) : "r13");
+
+ {
+ register char *dst __asm__ ("r13") = pdst;
+
+ if (((unsigned long) pdst & 3) != 0
+ /* Oops! n=0 must be a legal call, regardless of alignment. */
+ && n >= 3)
+ {
+ if ((unsigned long)dst & 1)
+ {
+ *dst = (char) lc;
+ n--;
+ dst++;
+ }
+
+ if ((unsigned long)dst & 2)
+ {
+ *(short *)dst = lc;
+ n -= 2;
+ dst += 2;
+ }
+ }
+
+ /* Now the fun part. For the threshold value of this, check the equation
+ above. */
+ /* Decide which copying method to use. */
+ if (n >= ZERO_BLOCK_SIZE)
+ {
+ /* For large copies we use 'movem' */
+
+ /* It is not optimal to tell the compiler about clobbering any
+ registers; that will move the saving/restoring of those registers
+ to the function prologue/epilogue, and make non-movem sizes
+ suboptimal.
+
+ This method is not foolproof; it assumes that the "asm reg"
+ declarations at the beginning of the function really are used
+ here (beware: they may be moved to temporary registers).
+ This way, we do not have to save/move the registers around into
+ temporaries; we can safely use them straight away. */
+ __asm__ volatile (" \n\
+ .syntax no_register_prefix \n\
+ \n\
+ ;; Check that the register asm declaration got right. \n\
+ ;; The GCC manual explicitly says there's no warranty for that (too). \n\
+ .ifnc %0-%1-%4,$r13-$r12-$r11 \n\
+ .err \n\
+ .endif \n\
+ \n\
+ ;; Save the registers we'll clobber in the movem process \n\
+ ;; on the stack. Don't mention them to gcc, it will only be \n\
+ ;; upset. \n\
+ subq 11*4,sp \n\
+ movem r10,[sp] \n\
+ \n\
+ move.d r11,r0 \n\
+ move.d r11,r1 \n\
+ move.d r11,r2 \n\
+ move.d r11,r3 \n\
+ move.d r11,r4 \n\
+ move.d r11,r5 \n\
+ move.d r11,r6 \n\
+ move.d r11,r7 \n\
+ move.d r11,r8 \n\
+ move.d r11,r9 \n\
+ move.d r11,r10 \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 12*4,r12 \n\
+0: \n\
+ subq 12*4,r12 \n\
+ bge 0b \n\
+ movem r11,[r13+] \n\
+ \n\
+ addq 12*4,r12 ;; compensate for last loop underflowing n \n\
+ \n\
+ ;; Restore registers from stack \n\
+ movem [sp+],r10"
+
+ /* Outputs */ : "=r" (dst), "=r" (n)
+ /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
+
+ }
+
+ /* Either we directly starts copying, using dword copying
+ in a loop, or we copy as much as possible with 'movem'
+ and then the last block (<44 bytes) is copied here.
+ This will work since 'movem' will have updated src,dst,n. */
+
+ while ( n >= 16 )
+ {
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ n -= 16;
+ }
+
+ /* A switch() is definitely the fastest although it takes a LOT of code.
+ * Particularly if you inline code this.
+ */
+ switch (n)
+ {
+ case 0:
+ break;
+ case 1:
+ *(char*)dst = (char) lc;
+ break;
+ case 2:
+ *(short*)dst = (short) lc;
+ break;
+ case 3:
+ *((short*)dst)++ = (short) lc;
+ *(char*)dst = (char) lc;
+ break;
+ case 4:
+ *((long*)dst)++ = lc;
+ break;
+ case 5:
+ *((long*)dst)++ = lc;
+ *(char*)dst = (char) lc;
+ break;
+ case 6:
+ *((long*)dst)++ = lc;
+ *(short*)dst = (short) lc;
+ break;
+ case 7:
+ *((long*)dst)++ = lc;
+ *((short*)dst)++ = (short) lc;
+ *(char*)dst = (char) lc;
+ break;
+ case 8:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ break;
+ case 9:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *(char*)dst = (char) lc;
+ break;
+ case 10:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *(short*)dst = (short) lc;
+ break;
+ case 11:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((short*)dst)++ = (short) lc;
+ *(char*)dst = (char) lc;
+ break;
+ case 12:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ break;
+ case 13:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *(char*)dst = (char) lc;
+ break;
+ case 14:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *(short*)dst = (short) lc;
+ break;
+ case 15:
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((long*)dst)++ = lc;
+ *((short*)dst)++ = (short) lc;
+ *(char*)dst = (char) lc;
+ break;
+ }
+ }
+
+ return return_dst; /* destination pointer. */
+} /* memset() */
+libc_hidden_def(memset)
diff --git a/libc/string/cris/strcpy.c b/libc/string/cris/strcpy.c
new file mode 100644
index 000000000..0af25253e
--- /dev/null
+++ b/libc/string/cris/strcpy.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2006-2007 Axis Communications AB
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <string.h>
+
+libc_hidden_proto(strcpy)
+char *strcpy(char *dest, const char *src)
+{
+ char *ret = dest;
+ unsigned long himagic = 0x80808080L;
+ unsigned long lomagic = 0x01010101L;
+
+ while ((unsigned long)src & (sizeof src - 1))
+ {
+ if (!(*dest++ = *src++))
+ {
+ return ret;
+ }
+ }
+
+ while (1)
+ {
+ unsigned long value = *(unsigned long*)src;
+ unsigned long magic;
+
+ src += sizeof (unsigned long);
+
+ if ((magic = (value - lomagic) & himagic))
+ {
+ if (magic & ~value)
+ {
+ break;
+ }
+ }
+
+ *(unsigned long*)dest = value;
+ dest += sizeof (unsigned long);
+ }
+
+ src -= sizeof (unsigned long);
+
+ while ((*dest++ = *src++))
+ {
+ }
+
+ return ret;
+}
+libc_hidden_def(strcpy)
diff --git a/libc/string/cris/strncpy.c b/libc/string/cris/strncpy.c
new file mode 100644
index 000000000..93a6608bc
--- /dev/null
+++ b/libc/string/cris/strncpy.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2006-2007 Axis Communications AB
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <string.h>
+
+libc_hidden_proto(memset)
+
+libc_hidden_proto(strncpy)
+char *strncpy(char *dest, const char *src, size_t count)
+{
+ char *ret = dest;
+ unsigned long himagic = 0x80808080L;
+ unsigned long lomagic = 0x01010101L;
+
+ while (count && (unsigned long)src & (sizeof src - 1))
+ {
+ count--;
+ if (!(*dest++ = *src++))
+ {
+ goto finalize;
+ }
+ }
+
+ while (count >= sizeof (unsigned long))
+ {
+ unsigned long value = *(unsigned long*)src;
+ unsigned long magic;
+
+ if ((magic = (value - lomagic) & himagic))
+ {
+ if (magic & ~value)
+ {
+ break;
+ }
+ }
+
+ *(unsigned long*)dest = value;
+ dest += sizeof (unsigned long);
+ src += sizeof (unsigned long);
+ count -= sizeof (unsigned long);
+ }
+
+ while (count)
+ {
+ count--;
+ if (!(*dest++ = *src++))
+ break;
+ }
+
+finalize:
+ if (count)
+ {
+ memset(dest, '\0', count);
+ }
+
+ return ret;
+}
+libc_hidden_def(strncpy)
diff --git a/libc/string/xtensa/Makefile b/libc/string/xtensa/Makefile
new file mode 100644
index 000000000..0a95346fd
--- /dev/null
+++ b/libc/string/xtensa/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S
new file mode 100644
index 000000000..19f3a6818
--- /dev/null
+++ b/libc/string/xtensa/memcpy.S
@@ -0,0 +1,297 @@
+/* Optimized memcpy for Xtensa.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+ .macro src_b r, w0, w1
+#ifdef __XTENSA_EB__
+ src \r, \w0, \w1
+#else
+ src \r, \w1, \w0
+#endif
+ .endm
+
+ .macro ssa8 r
+#ifdef __XTENSA_EB__
+ ssa8b \r
+#else
+ ssa8l \r
+#endif
+ .endm
+
+/* If the Xtensa Unaligned Load Exception option is not used, this
+ code can run a few cycles faster by relying on the low address bits
+ being ignored. However, if the code is then run with an Xtensa ISS
+ client that checks for unaligned accesses, it will produce a lot of
+ warning messages. Set this flag to disable the use of unaligned
+ accesses and keep the ISS happy. */
+
+#define UNALIGNED_ADDRESSES_CHECKED 1
+
+/* Do not use .literal_position in the ENTRY macro. */
+#undef LITERAL_POSITION
+#define LITERAL_POSITION
+
+
+/* void *memcpy (void *dst, const void *src, size_t len)
+
+ The algorithm is as follows:
+
+ If the destination is unaligned, align it by conditionally
+ copying 1- and/or 2-byte pieces.
+
+ If the source is aligned, copy 16 bytes with a loop, and then finish up
+ with 8, 4, 2, and 1-byte copies conditional on the length.
+
+ Else (if source is unaligned), do the same, but use SRC to align the
+ source data.
+
+ This code tries to use fall-through branches for the common
+ case of aligned source and destination and multiple of 4 (or 8) length. */
+
+
+/* Byte by byte copy. */
+
+ .text
+ .align 4
+ .literal_position
+__memcpy_aux:
+
+ /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
+ (0 mod 4 alignment for LBEG). */
+ .byte 0
+
+.Lbytecopy:
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, 2f
+#else
+ beqz a4, 2f
+ add a7, a3, a4 // a7 = end address for source
+#endif
+1: l8ui a6, a3, 0
+ addi a3, a3, 1
+ s8i a6, a5, 0
+ addi a5, a5, 1
+#if !XCHAL_HAVE_LOOPS
+ blt a3, a7, 1b
+#endif
+2: retw
+
+
+/* Destination is unaligned. */
+
+ .align 4
+.Ldst1mod2: // dst is only byte aligned
+
+ /* Do short copies byte-by-byte. */
+ _bltui a4, 7, .Lbytecopy
+
+ /* Copy 1 byte. */
+ l8ui a6, a3, 0
+ addi a3, a3, 1
+ addi a4, a4, -1
+ s8i a6, a5, 0
+ addi a5, a5, 1
+
+ /* Return to main algorithm if dst is now aligned. */
+ _bbci.l a5, 1, .Ldstaligned
+
+.Ldst2mod4: // dst has 16-bit alignment
+
+ /* Do short copies byte-by-byte. */
+ _bltui a4, 6, .Lbytecopy
+
+ /* Copy 2 bytes. */
+ l8ui a6, a3, 0
+ l8ui a7, a3, 1
+ addi a3, a3, 2
+ addi a4, a4, -2
+ s8i a6, a5, 0
+ s8i a7, a5, 1
+ addi a5, a5, 2
+
+ /* dst is now aligned; return to main algorithm. */
+ j .Ldstaligned
+
+
+ENTRY (memcpy)
+ /* a2 = dst, a3 = src, a4 = len */
+
+ mov a5, a2 // copy dst so that a2 is return value
+ _bbsi.l a2, 0, .Ldst1mod2
+ _bbsi.l a2, 1, .Ldst2mod4
+.Ldstaligned:
+
+ /* Get number of loop iterations with 16B per iteration. */
+ srli a7, a4, 4
+
+ /* Check if source is aligned. */
+ movi a8, 3
+ _bany a3, a8, .Lsrcunaligned
+
+ /* Destination and source are word-aligned, use word copy. */
+#if XCHAL_HAVE_LOOPS
+ loopnez a7, 2f
+#else
+ beqz a7, 2f
+ slli a8, a7, 4
+ add a8, a8, a3 // a8 = end of last 16B source chunk
+#endif
+1: l32i a6, a3, 0
+ l32i a7, a3, 4
+ s32i a6, a5, 0
+ l32i a6, a3, 8
+ s32i a7, a5, 4
+ l32i a7, a3, 12
+ s32i a6, a5, 8
+ addi a3, a3, 16
+ s32i a7, a5, 12
+ addi a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+ blt a3, a8, 1b
+#endif
+
+ /* Copy any leftover pieces smaller than 16B. */
+2: bbci.l a4, 3, 3f
+
+ /* Copy 8 bytes. */
+ l32i a6, a3, 0
+ l32i a7, a3, 4
+ addi a3, a3, 8
+ s32i a6, a5, 0
+ s32i a7, a5, 4
+ addi a5, a5, 8
+
+3: bbsi.l a4, 2, 4f
+ bbsi.l a4, 1, 5f
+ bbsi.l a4, 0, 6f
+ retw
+
+ /* Copy 4 bytes. */
+4: l32i a6, a3, 0
+ addi a3, a3, 4
+ s32i a6, a5, 0
+ addi a5, a5, 4
+ bbsi.l a4, 1, 5f
+ bbsi.l a4, 0, 6f
+ retw
+
+ /* Copy 2 bytes. */
+5: l16ui a6, a3, 0
+ addi a3, a3, 2
+ s16i a6, a5, 0
+ addi a5, a5, 2
+ bbsi.l a4, 0, 6f
+ retw
+
+ /* Copy 1 byte. */
+6: l8ui a6, a3, 0
+ s8i a6, a5, 0
+
+.Ldone:
+ retw
+
+
+/* Destination is aligned; source is unaligned. */
+
+ .align 4
+.Lsrcunaligned:
+ /* Avoid loading anything for zero-length copies. */
+ _beqz a4, .Ldone
+
+ /* Copy 16 bytes per iteration for word-aligned dst and
+ unaligned src. */
+ ssa8 a3 // set shift amount from byte offset
+#if UNALIGNED_ADDRESSES_CHECKED
+ and a11, a3, a8 // save unalignment offset for below
+ sub a3, a3, a11 // align a3
+#endif
+ l32i a6, a3, 0 // load first word
+#if XCHAL_HAVE_LOOPS
+ loopnez a7, 2f
+#else
+ beqz a7, 2f
+ slli a10, a7, 4
+ add a10, a10, a3 // a10 = end of last 16B source chunk
+#endif
+1: l32i a7, a3, 4
+ l32i a8, a3, 8
+ src_b a6, a6, a7
+ s32i a6, a5, 0
+ l32i a9, a3, 12
+ src_b a7, a7, a8
+ s32i a7, a5, 4
+ l32i a6, a3, 16
+ src_b a8, a8, a9
+ s32i a8, a5, 8
+ addi a3, a3, 16
+ src_b a9, a9, a6
+ s32i a9, a5, 12
+ addi a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+ blt a3, a10, 1b
+#endif
+
+2: bbci.l a4, 3, 3f
+
+ /* Copy 8 bytes. */
+ l32i a7, a3, 4
+ l32i a8, a3, 8
+ src_b a6, a6, a7
+ s32i a6, a5, 0
+ addi a3, a3, 8
+ src_b a7, a7, a8
+ s32i a7, a5, 4
+ addi a5, a5, 8
+ mov a6, a8
+
+3: bbci.l a4, 2, 4f
+
+ /* Copy 4 bytes. */
+ l32i a7, a3, 4
+ addi a3, a3, 4
+ src_b a6, a6, a7
+ s32i a6, a5, 0
+ addi a5, a5, 4
+ mov a6, a7
+4:
+#if UNALIGNED_ADDRESSES_CHECKED
+ add a3, a3, a11 // readjust a3 with correct misalignment
+#endif
+ bbsi.l a4, 1, 5f
+ bbsi.l a4, 0, 6f
+ retw
+
+ /* Copy 2 bytes. */
+5: l8ui a6, a3, 0
+ l8ui a7, a3, 1
+ addi a3, a3, 2
+ s8i a6, a5, 0
+ s8i a7, a5, 1
+ addi a5, a5, 2
+ bbsi.l a4, 0, 6f
+ retw
+
+ /* Copy 1 byte. */
+6: l8ui a6, a3, 0
+ s8i a6, a5, 0
+ retw
+
+libc_hidden_def (memcpy)
diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S
new file mode 100644
index 000000000..c0928825d
--- /dev/null
+++ b/libc/string/xtensa/memset.S
@@ -0,0 +1,165 @@
+/* Optimized memset for Xtensa.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+/* Do not use .literal_position in the ENTRY macro. */
+#undef LITERAL_POSITION
+#define LITERAL_POSITION
+
+/* void *memset (void *dst, int c, size_t length)
+
+ The algorithm is as follows:
+
+ Create a word with c in all byte positions.
+
+ If the destination is aligned, set 16B chunks with a loop, and then
+ finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
+
+ If the destination is unaligned, align it by conditionally
+ setting 1B and/or 2B and then go to aligned case.
+
+ This code tries to use fall-through branches for the common
+ case of an aligned destination (except for the branches to
+ the alignment labels). */
+
+
+/* Byte-by-byte set. */
+
+ .text
+ .align 4
+ .literal_position
+__memset_aux:
+
+ /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
+ (0 mod 4 alignment for LBEG). */
+ .byte 0
+
+.Lbyteset:
+#if XCHAL_HAVE_LOOPS
+ loopnez a4, 2f
+#else
+ beqz a4, 2f
+ add a6, a5, a4 // a6 = ending address
+#endif
+1: s8i a3, a5, 0
+ addi a5, a5, 1
+#if !XCHAL_HAVE_LOOPS
+ blt a5, a6, 1b
+#endif
+2: retw
+
+
+/* Destination is unaligned. */
+
+ .align 4
+
+.Ldst1mod2: // dst is only byte aligned
+
+ /* Do short sizes byte-by-byte. */
+ bltui a4, 8, .Lbyteset
+
+ /* Set 1 byte. */
+ s8i a3, a5, 0
+ addi a5, a5, 1
+ addi a4, a4, -1
+
+ /* Now retest if dst is aligned. */
+ _bbci.l a5, 1, .Ldstaligned
+
+.Ldst2mod4: // dst has 16-bit alignment
+
+ /* Do short sizes byte-by-byte. */
+ bltui a4, 8, .Lbyteset
+
+ /* Set 2 bytes. */
+ s16i a3, a5, 0
+ addi a5, a5, 2
+ addi a4, a4, -2
+
+ /* dst is now aligned; return to main algorithm */
+ j .Ldstaligned
+
+
+ENTRY (memset)
+ /* a2 = dst, a3 = c, a4 = length */
+
+ /* Duplicate character into all bytes of word. */
+ extui a3, a3, 0, 8
+ slli a7, a3, 8
+ or a3, a3, a7
+ slli a7, a3, 16
+ or a3, a3, a7
+
+ mov a5, a2 // copy dst so that a2 is return value
+
+ /* Check if dst is unaligned. */
+ _bbsi.l a2, 0, .Ldst1mod2
+ _bbsi.l a2, 1, .Ldst2mod4
+.Ldstaligned:
+
+ /* Get number of loop iterations with 16B per iteration. */
+ srli a7, a4, 4
+
+ /* Destination is word-aligned. */
+#if XCHAL_HAVE_LOOPS
+ loopnez a7, 2f
+#else
+ beqz a7, 2f
+ slli a6, a7, 4
+ add a6, a6, a5 // a6 = end of last 16B chunk
+#endif
+ /* Set 16 bytes per iteration. */
+1: s32i a3, a5, 0
+ s32i a3, a5, 4
+ s32i a3, a5, 8
+ s32i a3, a5, 12
+ addi a5, a5, 16
+#if !XCHAL_HAVE_LOOPS
+ blt a5, a6, 1b
+#endif
+
+ /* Set any leftover pieces smaller than 16B. */
+2: bbci.l a4, 3, 3f
+
+ /* Set 8 bytes. */
+ s32i a3, a5, 0
+ s32i a3, a5, 4
+ addi a5, a5, 8
+
+3: bbci.l a4, 2, 4f
+
+ /* Set 4 bytes. */
+ s32i a3, a5, 0
+ addi a5, a5, 4
+
+4: bbci.l a4, 1, 5f
+
+ /* Set 2 bytes. */
+ s16i a3, a5, 0
+ addi a5, a5, 2
+
+5: bbci.l a4, 0, 6f
+
+ /* Set 1 byte. */
+ s8i a3, a5, 0
+6: retw
+
+libc_hidden_def (memset)
diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S
new file mode 100644
index 000000000..90c418d12
--- /dev/null
+++ b/libc/string/xtensa/strcmp.S
@@ -0,0 +1,313 @@
+/* Optimized strcmp for Xtensa.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define MASK0 0xff000000
+#define MASK1 0x00ff0000
+#define MASK2 0x0000ff00
+#define MASK3 0x000000ff
+#else
+#define MASK0 0x000000ff
+#define MASK1 0x0000ff00
+#define MASK2 0x00ff0000
+#define MASK3 0xff000000
+#endif
+
+#define MASK4 0x40404040
+
+ .literal .Lmask0, MASK0
+ .literal .Lmask1, MASK1
+ .literal .Lmask2, MASK2
+ .literal .Lmask3, MASK3
+ .literal .Lmask4, MASK4
+
+ .text
+ENTRY (strcmp)
+ /* a2 = s1, a3 = s2 */
+
+ l8ui a8, a2, 0 // byte 0 from s1
+ l8ui a9, a3, 0 // byte 0 from s2
+ movi a10, 3 // mask
+ bne a8, a9, .Lretdiff
+
+ or a11, a2, a3
+ bnone a11, a10, .Laligned
+
+ xor a11, a2, a3 // compare low two bits of s1 and s2
+ bany a11, a10, .Lunaligned // if they have different alignment
+
+ /* s1/s2 are not word-aligned. */
+ addi a2, a2, 1 // advance s1
+ beqz a8, .Leq // bytes equal, if zero, strings are equal
+ addi a3, a3, 1 // advance s2
+ bnone a2, a10, .Laligned // if s1/s2 now aligned
+ l8ui a8, a2, 0 // byte 1 from s1
+ l8ui a9, a3, 0 // byte 1 from s2
+ addi a2, a2, 1 // advance s1
+ bne a8, a9, .Lretdiff // if different, return difference
+ beqz a8, .Leq // bytes equal, if zero, strings are equal
+ addi a3, a3, 1 // advance s2
+ bnone a2, a10, .Laligned // if s1/s2 now aligned
+ l8ui a8, a2, 0 // byte 2 from s1
+ l8ui a9, a3, 0 // byte 2 from s2
+ addi a2, a2, 1 // advance s1
+ bne a8, a9, .Lretdiff // if different, return difference
+ beqz a8, .Leq // bytes equal, if zero, strings are equal
+ addi a3, a3, 1 // advance s2
+ j .Laligned
+
+/* s1 and s2 have different alignment.
+
+ If the zero-overhead loop option is available, use an (almost)
+ infinite zero-overhead loop with conditional exits so we only pay
+ for taken branches when exiting the loop.
+
+ Note: It is important for this unaligned case to come before the
+ code for aligned strings, because otherwise some of the branches
+ above cannot reach and have to be transformed to branches around
+ jumps. The unaligned code is smaller and the branches can reach
+ over it. */
+
+ .align 4
+ /* (2 mod 4) alignment for loop instruction */
+.Lunaligned:
+#if XCHAL_HAVE_LOOPS
+ _movi.n a8, 0 // set up for the maximum loop count
+ loop a8, .Lretdiff // loop forever (almost anyway)
+#endif
+.Lnextbyte:
+ l8ui a8, a2, 0
+ l8ui a9, a3, 0
+ addi a2, a2, 1
+ bne a8, a9, .Lretdiff
+ addi a3, a3, 1
+#if XCHAL_HAVE_LOOPS
+ beqz a8, .Lretdiff
+#else
+ bnez a8, .Lnextbyte
+#endif
+.Lretdiff:
+ sub a2, a8, a9
+ retw
+
+/* s1 is word-aligned; s2 is word-aligned.
+
+ If the zero-overhead loop option is available, use an (almost)
+ infinite zero-overhead loop with conditional exits so we only pay
+ for taken branches when exiting the loop. */
+
+/* New algorithm, relying on the fact that all normal ASCII is between
+ 32 and 127.
+
+ Rather than check all bytes for zero:
+ Take one word (4 bytes). Call it w1.
+ Shift w1 left by one into w1'.
+ Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't.
+ Check that all 4 bit 6's (one for each byte) are one:
+ If they are, we are definitely not done.
+ If they are not, we are probably done, but need to check for zero. */
+
+ .align 4
+#if XCHAL_HAVE_LOOPS
+.Laligned:
+ .begin no-transform
+ l32r a4, .Lmask0 // mask for byte 0
+ l32r a7, .Lmask4
+ /* Loop forever. (a4 is more than than the maximum number
+ of iterations) */
+ loop a4, .Laligned_done
+
+ /* First unrolled loop body. */
+ l32i a8, a2, 0 // get word from s1
+ l32i a9, a3, 0 // get word from s2
+ slli a5, a8, 1
+ bne a8, a9, .Lwne2
+ or a9, a8, a5
+ bnall a9, a7, .Lprobeq
+
+ /* Second unrolled loop body. */
+ l32i a8, a2, 4 // get word from s1+4
+ l32i a9, a3, 4 // get word from s2+4
+ slli a5, a8, 1
+ bne a8, a9, .Lwne2
+ or a9, a8, a5
+ bnall a9, a7, .Lprobeq2
+
+ addi a2, a2, 8 // advance s1 pointer
+ addi a3, a3, 8 // advance s2 pointer
+.Laligned_done:
+ or a1, a1, a1 // nop
+
+.Lprobeq2:
+ /* Adjust pointers to account for the loop unrolling. */
+ addi a2, a2, 4
+ addi a3, a3, 4
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+.Laligned:
+ movi a4, MASK0 // mask for byte 0
+ movi a7, MASK4
+ j .Lfirstword
+.Lnextword:
+ addi a2, a2, 4 // advance s1 pointer
+ addi a3, a3, 4 // advance s2 pointer
+.Lfirstword:
+ l32i a8, a2, 0 // get word from s1
+ l32i a9, a3, 0 // get word from s2
+ slli a5, a8, 1
+ bne a8, a9, .Lwne2
+ or a9, a8, a5
+ ball a9, a7, .Lnextword
+#endif /* !XCHAL_HAVE_LOOPS */
+
+ /* align (0 mod 4) */
+.Lprobeq:
+ /* Words are probably equal, but check for sure.
+ If not, loop over the rest of string using normal algorithm. */
+
+ bnone a8, a4, .Leq // if byte 0 is zero
+ l32r a5, .Lmask1 // mask for byte 1
+ l32r a6, .Lmask2 // mask for byte 2
+ bnone a8, a5, .Leq // if byte 1 is zero
+ l32r a7, .Lmask3 // mask for byte 3
+ bnone a8, a6, .Leq // if byte 2 is zero
+ bnone a8, a7, .Leq // if byte 3 is zero
+ addi.n a2, a2, 4 // advance s1 pointer
+ addi.n a3, a3, 4 // advance s2 pointer
+#if XCHAL_HAVE_LOOPS
+
+ /* align (1 mod 4) */
+ loop a4, .Leq // loop forever (a4 is bigger than max iters)
+ .end no-transform
+
+ l32i a8, a2, 0 // get word from s1
+ l32i a9, a3, 0 // get word from s2
+ addi a2, a2, 4 // advance s1 pointer
+ bne a8, a9, .Lwne
+ bnone a8, a4, .Leq // if byte 0 is zero
+ bnone a8, a5, .Leq // if byte 1 is zero
+ bnone a8, a6, .Leq // if byte 2 is zero
+ bnone a8, a7, .Leq // if byte 3 is zero
+ addi a3, a3, 4 // advance s2 pointer
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+ j .Lfirstword2
+.Lnextword2:
+ addi a3, a3, 4 // advance s2 pointer
+.Lfirstword2:
+ l32i a8, a2, 0 // get word from s1
+ l32i a9, a3, 0 // get word from s2
+ addi a2, a2, 4 // advance s1 pointer
+ bne a8, a9, .Lwne
+ bnone a8, a4, .Leq // if byte 0 is zero
+ bnone a8, a5, .Leq // if byte 1 is zero
+ bnone a8, a6, .Leq // if byte 2 is zero
+ bany a8, a7, .Lnextword2 // if byte 3 is zero
+#endif /* !XCHAL_HAVE_LOOPS */
+
+ /* Words are equal; some byte is zero. */
+.Leq: movi a2, 0 // return equal
+ retw
+
+.Lwne2: /* Words are not equal. On big-endian processors, if none of the
+ bytes are zero, the return value can be determined by a simple
+ comparison. */
+#ifdef __XTENSA_EB__
+ or a10, a8, a5
+ bnall a10, a7, .Lsomezero
+ bgeu a8, a9, .Lposreturn
+ movi a2, -1
+ retw
+.Lposreturn:
+ movi a2, 1
+ retw
+.Lsomezero: // There is probably some zero byte.
+#endif /* __XTENSA_EB__ */
+.Lwne: /* Words are not equal. */
+ xor a2, a8, a9 // get word with nonzero in byte that differs
+ bany a2, a4, .Ldiff0 // if byte 0 differs
+ movi a5, MASK1 // mask for byte 1
+ bnone a8, a4, .Leq // if byte 0 is zero
+ bany a2, a5, .Ldiff1 // if byte 1 differs
+ movi a6, MASK2 // mask for byte 2
+ bnone a8, a5, .Leq // if byte 1 is zero
+ bany a2, a6, .Ldiff2 // if byte 2 differs
+ bnone a8, a6, .Leq // if byte 2 is zero
+#ifdef __XTENSA_EB__
+.Ldiff3:
+.Ldiff2:
+.Ldiff1:
+ /* Byte 0 is equal (at least) and there is a difference before a zero
+ byte. Just subtract words to get the return value.
+ The high order equal bytes cancel, leaving room for the sign. */
+ sub a2, a8, a9
+ retw
+
+.Ldiff0:
+ /* Need to make room for the sign, so can't subtract whole words. */
+ extui a10, a8, 24, 8
+ extui a11, a9, 24, 8
+ sub a2, a10, a11
+ retw
+
+#else /* !__XTENSA_EB__ */
+ /* Little-endian is a little more difficult because can't subtract
+ whole words. */
+.Ldiff3:
+ /* Bytes 0-2 are equal; byte 3 is different.
+ For little-endian need to have a sign bit for the difference. */
+ extui a10, a8, 24, 8
+ extui a11, a9, 24, 8
+ sub a2, a10, a11
+ retw
+
+.Ldiff0:
+ /* Byte 0 is different. */
+ extui a10, a8, 0, 8
+ extui a11, a9, 0, 8
+ sub a2, a10, a11
+ retw
+
+.Ldiff1:
+ /* Byte 0 is equal; byte 1 is different. */
+ extui a10, a8, 8, 8
+ extui a11, a9, 8, 8
+ sub a2, a10, a11
+ retw
+
+.Ldiff2:
+ /* Bytes 0-1 are equal; byte 2 is different. */
+ extui a10, a8, 16, 8
+ extui a11, a9, 16, 8
+ sub a2, a10, a11
+ retw
+
+#endif /* !__XTENSA_EB */
+
+libc_hidden_def (strcmp)
+
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias (strcmp, strcoll)
+libc_hidden_def (strcoll)
+#endif
diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S
new file mode 100644
index 000000000..108070384
--- /dev/null
+++ b/libc/string/xtensa/strcpy.S
@@ -0,0 +1,150 @@
+/* Optimized strcpy for Xtensa.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define MASK0 0xff000000
+#define MASK1 0x00ff0000
+#define MASK2 0x0000ff00
+#define MASK3 0x000000ff
+#else
+#define MASK0 0x000000ff
+#define MASK1 0x0000ff00
+#define MASK2 0x00ff0000
+#define MASK3 0xff000000
+#endif
+
+ .text
+ENTRY (strcpy)
+ /* a2 = dst, a3 = src */
+
+ mov a10, a2 // leave dst in return value register
+ movi a4, MASK0
+ movi a5, MASK1
+ movi a6, MASK2
+ movi a7, MASK3
+ bbsi.l a3, 0, .Lsrc1mod2
+ bbsi.l a3, 1, .Lsrc2mod4
+.Lsrcaligned:
+
+ /* Check if the destination is aligned. */
+ movi a8, 3
+ bnone a10, a8, .Laligned
+
+ j .Ldstunaligned
+
+.Lsrc1mod2: // src address is odd
+ l8ui a8, a3, 0 // get byte 0
+ addi a3, a3, 1 // advance src pointer
+ s8i a8, a10, 0 // store byte 0
+ beqz a8, 1f // if byte 0 is zero
+ addi a10, a10, 1 // advance dst pointer
+ bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned
+
+.Lsrc2mod4: // src address is 2 mod 4
+ l8ui a8, a3, 0 // get byte 0
+ /* 1-cycle interlock */
+ s8i a8, a10, 0 // store byte 0
+ beqz a8, 1f // if byte 0 is zero
+ l8ui a8, a3, 1 // get byte 0
+ addi a3, a3, 2 // advance src pointer
+ s8i a8, a10, 1 // store byte 0
+ addi a10, a10, 2 // advance dst pointer
+ bnez a8, .Lsrcaligned
+1: retw
+
+
+/* dst is word-aligned; src is word-aligned. */
+
+ .align 4
+#if XCHAL_HAVE_LOOPS
+ /* (2 mod 4) alignment for loop instruction */
+.Laligned:
+ _movi.n a8, 0 // set up for the maximum loop count
+ loop a8, .Lz3 // loop forever (almost anyway)
+ l32i a8, a3, 0 // get word from src
+ addi a3, a3, 4 // advance src pointer
+ bnone a8, a4, .Lz0 // if byte 0 is zero
+ bnone a8, a5, .Lz1 // if byte 1 is zero
+ bnone a8, a6, .Lz2 // if byte 2 is zero
+ s32i a8, a10, 0 // store word to dst
+ bnone a8, a7, .Lz3 // if byte 3 is zero
+ addi a10, a10, 4 // advance dst pointer
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+1: addi a10, a10, 4 // advance dst pointer
+.Laligned:
+ l32i a8, a3, 0 // get word from src
+ addi a3, a3, 4 // advance src pointer
+ bnone a8, a4, .Lz0 // if byte 0 is zero
+ bnone a8, a5, .Lz1 // if byte 1 is zero
+ bnone a8, a6, .Lz2 // if byte 2 is zero
+ s32i a8, a10, 0 // store word to dst
+ bany a8, a7, 1b // if byte 3 is zero
+#endif /* !XCHAL_HAVE_LOOPS */
+
+.Lz3: /* Byte 3 is zero. */
+ retw
+
+.Lz0: /* Byte 0 is zero. */
+#ifdef __XTENSA_EB__
+ movi a8, 0
+#endif
+ s8i a8, a10, 0
+ retw
+
+.Lz1: /* Byte 1 is zero. */
+#ifdef __XTENSA_EB__
+ extui a8, a8, 16, 16
+#endif
+ s16i a8, a10, 0
+ retw
+
+.Lz2: /* Byte 2 is zero. */
+#ifdef __XTENSA_EB__
+ extui a8, a8, 16, 16
+#endif
+ s16i a8, a10, 0
+ movi a8, 0
+ s8i a8, a10, 2
+ retw
+
+ .align 4
+ /* (2 mod 4) alignment for loop instruction */
+.Ldstunaligned:
+
+#if XCHAL_HAVE_LOOPS
+ _movi.n a8, 0 // set up for the maximum loop count
+ loop a8, 2f // loop forever (almost anyway)
+#endif
+1: l8ui a8, a3, 0
+ addi a3, a3, 1
+ s8i a8, a10, 0
+ addi a10, a10, 1
+#if XCHAL_HAVE_LOOPS
+ beqz a8, 2f
+#else
+ bnez a8, 1b
+#endif
+2: retw
+
+libc_hidden_def (strcpy)
diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S
new file mode 100644
index 000000000..dd72c16fa
--- /dev/null
+++ b/libc/string/xtensa/strlen.S
@@ -0,0 +1,104 @@
+/* Optimized strlen for Xtensa.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define MASK0 0xff000000
+#define MASK1 0x00ff0000
+#define MASK2 0x0000ff00
+#define MASK3 0x000000ff
+#else
+#define MASK0 0x000000ff
+#define MASK1 0x0000ff00
+#define MASK2 0x00ff0000
+#define MASK3 0xff000000
+#endif
+
+ .text
+ENTRY (strlen)
+ /* a2 = s */
+
+ addi a3, a2, -4 // because we overincrement at the end
+ movi a4, MASK0
+ movi a5, MASK1
+ movi a6, MASK2
+ movi a7, MASK3
+ bbsi.l a2, 0, .L1mod2
+ bbsi.l a2, 1, .L2mod4
+ j .Laligned
+
+.L1mod2: // address is odd
+ l8ui a8, a3, 4 // get byte 0
+ addi a3, a3, 1 // advance string pointer
+ beqz a8, .Lz3 // if byte 0 is zero
+ bbci.l a3, 1, .Laligned // if string pointer is now word-aligned
+
+.L2mod4: // address is 2 mod 4
+ addi a3, a3, 2 // advance ptr for aligned access
+ l32i a8, a3, 0 // get word with first two bytes of string
+ bnone a8, a6, .Lz2 // if byte 2 (of word, not string) is zero
+ bany a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero
+
+ /* Byte 3 is zero. */
+ addi a3, a3, 3 // point to zero byte
+ sub a2, a3, a2 // subtract to get length
+ retw
+
+
+/* String is word-aligned. */
+
+ .align 4
+ /* (2 mod 4) alignment for loop instruction */
+.Laligned:
+#if XCHAL_HAVE_LOOPS
+ _movi.n a8, 0 // set up for the maximum loop count
+ loop a8, .Lz3 // loop forever (almost anyway)
+#endif
+1: l32i a8, a3, 4 // get next word of string
+ addi a3, a3, 4 // advance string pointer
+ bnone a8, a4, .Lz0 // if byte 0 is zero
+ bnone a8, a5, .Lz1 // if byte 1 is zero
+ bnone a8, a6, .Lz2 // if byte 2 is zero
+#if XCHAL_HAVE_LOOPS
+ bnone a8, a7, .Lz3 // if byte 3 is zero
+#else
+ bany a8, a7, 1b // repeat if byte 3 is non-zero
+#endif
+
+.Lz3: /* Byte 3 is zero. */
+ addi a3, a3, 3 // point to zero byte
+ /* Fall through.... */
+
+.Lz0: /* Byte 0 is zero. */
+ sub a2, a3, a2 // subtract to get length
+ retw
+
+.Lz1: /* Byte 1 is zero. */
+ addi a3, a3, 1 // point to zero byte
+ sub a2, a3, a2 // subtract to get length
+ retw
+
+.Lz2: /* Byte 2 is zero. */
+ addi a3, a3, 2 // point to zero byte
+ sub a2, a3, a2 // subtract to get length
+ retw
+
+libc_hidden_def (strlen)
diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S
new file mode 100644
index 000000000..7ba2ef77d
--- /dev/null
+++ b/libc/string/xtensa/strncpy.S
@@ -0,0 +1,241 @@
+/* Optimized strcpy for Xtensa.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <bits/xtensa-config.h>
+
+#ifdef __XTENSA_EB__
+#define MASK0 0xff000000
+#define MASK1 0x00ff0000
+#define MASK2 0x0000ff00
+#define MASK3 0x000000ff
+#else
+#define MASK0 0x000000ff
+#define MASK1 0x0000ff00
+#define MASK2 0x00ff0000
+#define MASK3 0xff000000
+#endif
+
+/* Do not use .literal_position in the ENTRY macro. */
+#undef LITERAL_POSITION
+#define LITERAL_POSITION
+
+ .text
+ .align 4
+ .literal_position
+__strncpy_aux:
+
+.Lsrc1mod2: // src address is odd
+ l8ui a8, a3, 0 // get byte 0
+ addi a3, a3, 1 // advance src pointer
+ s8i a8, a10, 0 // store byte 0
+ addi a4, a4, -1 // decrement n
+ beqz a4, .Lret // if n is zero
+ addi a10, a10, 1 // advance dst pointer
+ beqz a8, .Lfill // if byte 0 is zero
+ bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned
+
+.Lsrc2mod4: // src address is 2 mod 4
+ l8ui a8, a3, 0 // get byte 0
+ addi a4, a4, -1 // decrement n
+ s8i a8, a10, 0 // store byte 0
+ beqz a4, .Lret // if n is zero
+ addi a10, a10, 1 // advance dst pointer
+ beqz a8, .Lfill // if byte 0 is zero
+ l8ui a8, a3, 1 // get byte 0
+ addi a3, a3, 2 // advance src pointer
+ s8i a8, a10, 0 // store byte 0
+ addi a4, a4, -1 // decrement n
+ beqz a4, .Lret // if n is zero
+ addi a10, a10, 1 // advance dst pointer
+ bnez a8, .Lsrcaligned
+ j .Lfill
+
+.Lret:
+ retw
+
+
+ENTRY (strncpy)
+ /* a2 = dst, a3 = src */
+
+ mov a10, a2 // leave dst in return value register
+ beqz a4, .Lret // if n is zero
+
+ movi a11, MASK0
+ movi a5, MASK1
+ movi a6, MASK2
+ movi a7, MASK3
+ bbsi.l a3, 0, .Lsrc1mod2
+ bbsi.l a3, 1, .Lsrc2mod4
+.Lsrcaligned:
+
+ /* Check if the destination is aligned. */
+ movi a8, 3
+ bnone a10, a8, .Laligned
+
+ j .Ldstunaligned
+
+
+/* Fill the dst with zeros -- n is at least 1. */
+
+.Lfill:
+ movi a9, 0
+ bbsi.l a10, 0, .Lfill1mod2
+ bbsi.l a10, 1, .Lfill2mod4
+.Lfillaligned:
+ blti a4, 4, .Lfillcleanup
+
+ /* Loop filling complete words with zero. */
+#if XCHAL_HAVE_LOOPS
+
+ srai a8, a4, 2
+ loop a8, 1f
+ s32i a9, a10, 0
+ addi a10, a10, 4
+
+1: slli a8, a8, 2
+ sub a4, a4, a8
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+1: s32i a9, a10, 0
+ addi a10, a10, 4
+ addi a4, a4, -4
+ bgei a4, 4, 1b
+
+#endif /* !XCHAL_HAVE_LOOPS */
+
+ beqz a4, 2f
+
+.Lfillcleanup:
+ /* Fill leftover (1 to 3) bytes with zero. */
+ s8i a9, a10, 0 // store byte 0
+ addi a4, a4, -1 // decrement n
+ addi a10, a10, 1
+ bnez a4, .Lfillcleanup
+
+2: retw
+
+.Lfill1mod2: // dst address is odd
+ s8i a9, a10, 0 // store byte 0
+ addi a4, a4, -1 // decrement n
+ beqz a4, 2b // if n is zero
+ addi a10, a10, 1 // advance dst pointer
+ bbci.l a10, 1, .Lfillaligned // if dst is now word-aligned
+
+.Lfill2mod4: // dst address is 2 mod 4
+ s8i a9, a10, 0 // store byte 0
+ addi a4, a4, -1 // decrement n
+ beqz a4, 2b // if n is zero
+ s8i a9, a10, 1 // store byte 1
+ addi a4, a4, -1 // decrement n
+ beqz a4, 2b // if n is zero
+ addi a10, a10, 2 // advance dst pointer
+ j .Lfillaligned
+
+
+/* dst is word-aligned; src is word-aligned; n is at least 1. */
+
+ .align 4
+ /* (2 mod 4) alignment for loop instruction */
+.Laligned:
+#if XCHAL_HAVE_LOOPS
+ _movi.n a8, 0 // set up for the maximum loop count
+ loop a8, 1f // loop forever (almost anyway)
+ blti a4, 5, .Ldstunaligned // n is near limit; do one at a time
+ l32i a8, a3, 0 // get word from src
+ addi a3, a3, 4 // advance src pointer
+ bnone a8, a11, .Lz0 // if byte 0 is zero
+ bnone a8, a5, .Lz1 // if byte 1 is zero
+ bnone a8, a6, .Lz2 // if byte 2 is zero
+ s32i a8, a10, 0 // store word to dst
+ addi a4, a4, -4 // decrement n
+ addi a10, a10, 4 // advance dst pointer
+ bnone a8, a7, .Lfill // if byte 3 is zero
+1:
+
+#else /* !XCHAL_HAVE_LOOPS */
+
+1: blti a4, 5, .Ldstunaligned // n is near limit; do one at a time
+ l32i a8, a3, 0 // get word from src
+ addi a3, a3, 4 // advance src pointer
+ bnone a8, a11, .Lz0 // if byte 0 is zero
+ bnone a8, a5, .Lz1 // if byte 1 is zero
+ bnone a8, a6, .Lz2 // if byte 2 is zero
+ s32i a8, a10, 0 // store word to dst
+ addi a4, a4, -4 // decrement n
+ addi a10, a10, 4 // advance dst pointer
+ bany a8, a7, 1b // no zeroes
+#endif /* !XCHAL_HAVE_LOOPS */
+
+ j .Lfill
+
+.Lz0: /* Byte 0 is zero. */
+#ifdef __XTENSA_EB__
+ movi a8, 0
+#endif
+ s8i a8, a10, 0
+ addi a4, a4, -1 // decrement n
+ addi a10, a10, 1 // advance dst pointer
+ j .Lfill
+
+.Lz1: /* Byte 1 is zero. */
+#ifdef __XTENSA_EB__
+ extui a8, a8, 16, 16
+#endif
+ s16i a8, a10, 0
+ addi a4, a4, -2 // decrement n
+ addi a10, a10, 2 // advance dst pointer
+ j .Lfill
+
+.Lz2: /* Byte 2 is zero. */
+#ifdef __XTENSA_EB__
+ extui a8, a8, 16, 16
+#endif
+ s16i a8, a10, 0
+ movi a8, 0
+ s8i a8, a10, 2
+ addi a4, a4, -3 // decrement n
+ addi a10, a10, 3 // advance dst pointer
+ j .Lfill
+
+ .align 4
+ /* (2 mod 4) alignment for loop instruction */
+.Ldstunaligned:
+
+#if XCHAL_HAVE_LOOPS
+ _movi.n a8, 0 // set up for the maximum loop count
+ loop a8, 2f // loop forever (almost anyway)
+#endif
+1: l8ui a8, a3, 0
+ addi a3, a3, 1
+ s8i a8, a10, 0
+ addi a4, a4, -1
+ beqz a4, 3f
+ addi a10, a10, 1
+#if XCHAL_HAVE_LOOPS
+ beqz a8, 2f
+#else
+ bnez a8, 1b
+#endif
+2: j .Lfill
+
+3: retw
+
+libc_hidden_def (strncpy)