diff options
Diffstat (limited to 'libc/string')
-rw-r--r-- | libc/string/avr32/Makefile | 26 | ||||
-rw-r--r-- | libc/string/avr32/bcopy.S | 26 | ||||
-rw-r--r-- | libc/string/avr32/bzero.S | 24 | ||||
-rw-r--r-- | libc/string/avr32/memcmp.S | 61 | ||||
-rw-r--r-- | libc/string/avr32/memcpy.S | 111 | ||||
-rw-r--r-- | libc/string/avr32/memmove.S | 116 | ||||
-rw-r--r-- | libc/string/avr32/memset.S | 65 | ||||
-rw-r--r-- | libc/string/avr32/strcmp.S | 91 | ||||
-rw-r--r-- | libc/string/avr32/strlen.S | 62 | ||||
-rw-r--r-- | libc/string/cris/memcopy.h | 62 | ||||
-rw-r--r-- | libc/string/cris/memcpy.c | 264 | ||||
-rw-r--r-- | libc/string/cris/memmove.c | 101 | ||||
-rw-r--r-- | libc/string/cris/memset.c | 271 | ||||
-rw-r--r-- | libc/string/cris/strcpy.c | 51 | ||||
-rw-r--r-- | libc/string/cris/strncpy.c | 61 | ||||
-rw-r--r-- | libc/string/xtensa/Makefile | 13 | ||||
-rw-r--r-- | libc/string/xtensa/memcpy.S | 297 | ||||
-rw-r--r-- | libc/string/xtensa/memset.S | 165 | ||||
-rw-r--r-- | libc/string/xtensa/strcmp.S | 313 | ||||
-rw-r--r-- | libc/string/xtensa/strcpy.S | 150 | ||||
-rw-r--r-- | libc/string/xtensa/strlen.S | 104 | ||||
-rw-r--r-- | libc/string/xtensa/strncpy.S | 241 |
22 files changed, 2675 insertions, 0 deletions
diff --git a/libc/string/avr32/Makefile b/libc/string/avr32/Makefile new file mode 100644 index 000000000..0002ffdce --- /dev/null +++ b/libc/string/avr32/Makefile @@ -0,0 +1,26 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org> +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU Library General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more +# details. +# +# You should have received a copy of the GNU Library General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +top_srcdir := ../../../ +top_builddir := ../../../ + +all: objs + +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/avr32/bcopy.S b/libc/string/avr32/bcopy.S new file mode 100644 index 000000000..e1d173165 --- /dev/null +++ b/libc/string/avr32/bcopy.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#include <features.h> + +#ifdef __UCLIBC_SUSV3_LEGACY__ + + .text + .global bcopy + .type bcopy, @function + .align 1 +bcopy: + /* Swap the first two arguments */ + eor r11, r12 + eor r12, r11 + eor r11, r12 + rjmp HIDDEN_JUMPTARGET(memmove) + + .size bcopy, . - bcopy + +#endif /* __UCLIBC_SUSV3_LEGACY__ */ diff --git a/libc/string/avr32/bzero.S b/libc/string/avr32/bzero.S new file mode 100644 index 000000000..928148dcb --- /dev/null +++ b/libc/string/avr32/bzero.S @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#include <features.h> + +#ifdef __UCLIBC_SUSV3_LEGACY__ + + .text + .global bzero + .type bzero, @function + .align 1 +bzero: + mov r10, r11 + mov r11, 0 + rjmp HIDDEN_JUMPTARGET(memset) + + .size bzero, . - bzero + +#endif /* __UCLIBC_SUSV3_LEGACY__ */ diff --git a/libc/string/avr32/memcmp.S b/libc/string/avr32/memcmp.S new file mode 100644 index 000000000..5d7eac3d9 --- /dev/null +++ b/libc/string/avr32/memcmp.S @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#include <features.h> + +#define s1 r12 +#define s2 r11 +#define len r10 + + .text + .global memcmp + .type memcmp, @function + .align 1 +memcmp: + sub len, 4 + brlt .Lless_than_4 + +1: ld.w r8, s1++ + ld.w r9, s2++ + cp.w r8, r9 + brne .Lfound_word + sub len, 4 + brge 1b + +.Lless_than_4: + sub len, -4 + reteq 0 + +1: ld.ub r8, s1++ + ld.ub r9, s2++ + sub r8, r9 + retne r8 + sub len, 1 + brgt 1b + + retal 0 + +.Lfound_word: + mov len, 4 + +2: bfextu r11, r9, 24, 8 + bfextu r12, r8, 24, 8 + sub r12, r11 + retne r12 + lsl r8, 8 + lsl r9, 8 + sub len, 1 + brne 2b + retal r12 + + .size memcmp, . - memcmp + +libc_hidden_def(memcmp) +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias(memcmp,bcmp) +#endif diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S new file mode 100644 index 000000000..f95aabd13 --- /dev/null +++ b/libc/string/avr32/memcpy.S @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +/* Don't use r12 as dst since we must return it unmodified */ +#define dst r9 +#define src r11 +#define len r10 + + .text + .global memcpy + .type memcpy, @function +memcpy: + pref src[0] + mov dst, r12 + + /* If we have less than 32 bytes, don't do anything fancy */ + cp.w len, 32 + brge .Lmore_than_31 + + sub len, 1 + retlt r12 +1: ld.ub r8, src++ + st.b dst++, r8 + sub len, 1 + brge 1b + retal r12 + +.Lmore_than_31: + pushm r0-r7, lr + + /* Check alignment */ + mov r8, src + andl r8, 31, COH + brne .Lunaligned_src + mov r8, dst + andl r8, 3, COH + brne .Lunaligned_dst + +.Laligned_copy: + sub len, 32 + brlt .Lless_than_32 + +1: /* Copy 32 bytes at a time */ + ldm src, r0-r7 + sub src, -32 + stm dst, r0-r7 + sub dst, -32 + sub len, 32 + brge 1b + +.Lless_than_32: + /* Copy 16 more bytes if possible */ + sub len, -16 + brlt .Lless_than_16 + ldm src, r0-r3 + sub src, -16 + sub len, 16 + stm dst, r0-r3 + sub dst, -16 + +.Lless_than_16: + /* Do the remaining as byte copies */ + neg len + add pc, pc, len << 2 + .rept 15 + ld.ub r0, src++ + st.b dst++, r0 + .endr + + popm r0-r7, pc + +.Lunaligned_src: + /* Make src cacheline-aligned. r8 = (src & 31) */ + rsub r8, r8, 32 + sub len, r8 +1: ld.ub r0, src++ + st.b dst++, r0 + sub r8, 1 + brne 1b + + /* If dst is word-aligned, we're ready to go */ + pref src[0] + mov r8, 3 + tst dst, r8 + breq .Laligned_copy + +.Lunaligned_dst: + /* src is aligned, but dst is not. Expect bad performance */ + sub len, 4 + brlt 2f +1: ld.w r0, src++ + st.w dst++, r0 + sub len, 4 + brge 1b + +2: neg len + add pc, pc, len << 2 + .rept 3 + ld.ub r0, src++ + st.b dst++, r0 + .endr + + popm r0-r7, pc + .size memcpy, . - memcpy + +libc_hidden_def(memcpy) diff --git a/libc/string/avr32/memmove.S b/libc/string/avr32/memmove.S new file mode 100644 index 000000000..8ca4da54d --- /dev/null +++ b/libc/string/avr32/memmove.S @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#define dst r12 +#define src r11 +#define len r10 + + .text + .global memmove + .type memmove, @function +memmove: + cp.w src, dst + brge HIDDEN_JUMPTARGET(memcpy) + + add dst, len + add src, len + pref src[-1] + + /* + * The rest is basically the same as in memcpy.S except that + * the direction is reversed. + */ + cp.w len, 32 + brge .Lmore_than_31 + + sub len, 1 + retlt r12 +1: ld.ub r8, --src + st.b --dst, r8 + sub len, 1 + brge 1b + retal r12 + +.Lmore_than_31: + pushm r0-r7, lr + + /* Check alignment */ + mov r8, src + andl r8, 31, COH + brne .Lunaligned_src + mov r8, r12 + andl r8, 3, COH + brne .Lunaligned_dst + +.Laligned_copy: + sub len, 32 + brlt .Lless_than_32 + +1: /* Copy 32 bytes at a time */ + sub src, 32 + ldm src, r0-r7 + sub dst, 32 + sub len, 32 + stm dst, r0-r7 + brge 1b + +.Lless_than_32: + /* Copy 16 more bytes if possible */ + sub len, -16 + brlt .Lless_than_16 + sub src, 16 + ldm src, r0-r3 + sub dst, 16 + sub len, 16 + stm dst, r0-r3 + +.Lless_than_16: + /* Do the remaining as byte copies */ + sub len, -16 + breq 2f +1: ld.ub r0, --src + st.b --dst, r0 + sub len, 1 + brne 1b + +2: popm r0-r7, pc + +.Lunaligned_src: + /* Make src cacheline-aligned. r8 = (src & 31) */ + sub len, r8 +1: ld.ub r0, --src + st.b --dst, r0 + sub r8, 1 + brne 1b + + /* If dst is word-aligned, we're ready to go */ + pref src[-4] + mov r8, 3 + tst dst, r8 + breq .Laligned_copy + +.Lunaligned_dst: + /* src is aligned, but dst is not. Expect bad performance */ + sub len, 4 + brlt 2f +1: ld.w r0, --src + st.w --dst, r0 + sub len, 4 + brge 1b + +2: neg len + add pc, pc, len << 2 + .rept 3 + ld.ub r0, --src + st.b --dst, r0 + .endr + + popm r0-r7, pc + .size memmove, . - memmove + +libc_hidden_def(memmove) diff --git a/libc/string/avr32/memset.S b/libc/string/avr32/memset.S new file mode 100644 index 000000000..964bf4834 --- /dev/null +++ b/libc/string/avr32/memset.S @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#include <features.h> + +#define s r12 +#define c r11 +#define n r10 + + .text + .global memset + .type memset, @function + + .align 1 +memset: + cp.w n, 32 + mov r9, s + brge .Llarge_memset + + sub n, 1 + retlt s +1: st.b s++, c + sub n, 1 + brge 1b + + retal r9 + +.Llarge_memset: + mov r8, r11 + mov r11, 3 + bfins r8, r8, 8, 8 + bfins r8, r8, 16, 16 + tst s, r11 + breq 2f + +1: st.b s++, r8 + sub n, 1 + tst s, r11 + brne 1b + +2: mov r11, r9 + mov r9, r8 + sub n, 8 + +3: st.d s++, r8 + sub n, 8 + brge 3b + + /* If we are done, n == -8 and we'll skip all st.b insns below */ + neg n + lsl n, 1 + add pc, n + .rept 7 + st.b s++, r8 + .endr + retal r11 + + .size memset, . - memset + +libc_hidden_def(memset) diff --git a/libc/string/avr32/strcmp.S b/libc/string/avr32/strcmp.S new file mode 100644 index 000000000..e9f087577 --- /dev/null +++ b/libc/string/avr32/strcmp.S @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#include <features.h> + +#define s1 r12 +#define s2 r11 +#define len r10 + + .text + .global strcmp + .type strcmp, @function + .align 1 +strcmp: + mov r8, 3 + tst s1, r8 + brne .Lunaligned_s1 + tst s2, r8 + brne .Lunaligned_s2 + +1: ld.w r8, s1++ + ld.w r9, s2++ + cp.w r8, r9 + brne 2f + tnbz r8 + brne 1b + retal 0 + +2: bfextu r12, r8, 24, 8 + bfextu r11, r9, 24, 8 + sub r12, r11 + retne r12 + cp.w r11, 0 + reteq 0 + bfextu r12, r8, 16, 8 + bfextu r11, r9, 16, 8 + sub r12, r11 + retne r12 + cp.w r11, 0 + reteq 0 + bfextu r12, r8, 8, 8 + bfextu r11, r9, 8, 8 + sub r12, r11 + retne r12 + cp.w r11, 0 + reteq 0 + bfextu r12, r8, 0, 8 + bfextu r11, r9, 0, 8 + sub r12, r11 + retal r12 + +.Lunaligned_s1: +3: tst s1, r8 + breq 4f + ld.ub r10, s1++ + ld.ub r9, s2++ + sub r10, r9 + retne r10 + cp.w r9, 0 + brne 3b + retal r10 + +4: tst s2, r8 + breq 1b + +.Lunaligned_s2: + /* + * s1 and s2 can't both be aligned, and unaligned word loads + * can trigger spurious exceptions if we cross a page boundary. + * Do it the slow way... + */ +1: ld.ub r8, s1++ + ld.ub r9, s2++ + sub r8, r9 + retne r8 + cp.w r9, 0 + brne 1b + retal 0 + + .size strcmp, . - strcmp + +libc_hidden_def(strcmp) +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias(strcmp, strcoll) +libc_hidden_def(strcoll) +#endif diff --git a/libc/string/avr32/strlen.S b/libc/string/avr32/strlen.S new file mode 100644 index 000000000..d2808998d --- /dev/null +++ b/libc/string/avr32/strlen.S @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +#include <features.h> + +#define str r12 + + .text + .global strlen + .type strlen, @function +strlen: + mov r11, r12 + + mov r9, str + andl r9, 3, COH + brne .Lunaligned_str + +1: ld.w r8, str++ + tnbz r8 + brne 1b + + sub r12, r11 + bfextu r9, r8, 24, 8 + cp.w r9, 0 + subeq r12, 4 + reteq r12 + bfextu r9, r8, 16, 8 + cp.w r9, 0 + subeq r12, 3 + reteq r12 + bfextu r9, r8, 8, 8 + cp.w r9, 0 + subeq r12, 2 + reteq r12 + sub r12, 1 + retal r12 + +.Lunaligned_str: + add pc, pc, r9 << 3 + sub r0, r0, 0 /* 4-byte nop */ + ld.ub r8, str++ + sub r8, r8, 0 + breq 1f + ld.ub r8, str++ + sub r8, r8, 0 + breq 1f + ld.ub r8, str++ + sub r8, r8, 0 + brne 1b + +1: sub r12, 1 + sub r12, r11 + retal r12 + + .size strlen, . - strlen + +libc_hidden_def(strlen) diff --git a/libc/string/cris/memcopy.h b/libc/string/cris/memcopy.h new file mode 100644 index 000000000..0eae998da --- /dev/null +++ b/libc/string/cris/memcopy.h @@ -0,0 +1,62 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + Modified for use in uClibc (C) 2007 Axis Communications AB. + Minimal modifications: include path name and #undef of WORD_COPY_FWD/BWD + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include "../generic/memcopy.h" + +/* We override the word-copying macros, partly because misalignment in one + pointer isn't cause for a special function, partly because we want to + get rid of all the static functions in generic/memcopy.c; these macros + are only used in memmove.c since we have arch-specific mempcpy, memcpy and + memset. */ + +#undef OP_T_THRES +#define OP_T_THRES OPSIZ + +#undef WORD_COPY_FWD +#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ + do \ + { \ + unsigned long enddst_bp = dst_bp + nbytes - (nbytes % OPSIZ); \ + nbytes_left = (nbytes % OPSIZ); \ + while (dst_bp < (unsigned long) enddst_bp) \ + { \ + op_t x = *(op_t *) src_bp; \ + src_bp += sizeof x; \ + *(op_t *) dst_bp = x; \ + dst_bp += sizeof x; \ + } \ + } while (0) + +#undef WORD_COPY_BWD +#define WORD_COPY_BWD(dst_bp, src_bp, nbytes_left, nbytes) \ + do \ + { \ + unsigned long enddst_bp = dst_bp - nbytes + (nbytes % OPSIZ); \ + nbytes_left = (nbytes % OPSIZ); \ + while (dst_bp > enddst_bp) \ + { \ + op_t x; \ + src_bp -= sizeof x; \ + x = *(op_t *) src_bp; \ + dst_bp -= sizeof x; \ + *(op_t *) dst_bp = x; \ + } \ + } while (0) diff --git a/libc/string/cris/memcpy.c b/libc/string/cris/memcpy.c new file mode 100644 index 000000000..a85108109 --- /dev/null +++ b/libc/string/cris/memcpy.c @@ -0,0 +1,264 @@ +/* Copyright (C) 2001, 2003 Free Software Foundation, Inc. + Copyright (C) 1994, 1995, 2000 Axis Communications AB. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/*#************************************************************************#*/ +/*#-------------------------------------------------------------------------*/ +/*# */ +/*# FUNCTION NAME: memcpy() */ +/*# */ +/*# PARAMETERS: void* dst; Destination address. */ +/*# void* src; Source address. */ +/*# int len; Number of bytes to copy. */ +/*# */ +/*# RETURNS: dst. */ +/*# */ +/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */ +/*# about copying of overlapping memory areas. This routine is */ +/*# very sensitive to compiler changes in register allocation. */ +/*# Should really be rewritten to avoid this problem. */ +/*# */ +/*#-------------------------------------------------------------------------*/ +/*# */ +/*# HISTORY */ +/*# */ +/*# DATE NAME CHANGES */ +/*# ---- ---- ------- */ +/*# 941007 Kenny R Creation */ +/*# 941011 Kenny R Lots of optimizations and inlining. */ +/*# 941129 Ulf A Adapted for use in libc. */ +/*# 950216 HP N==0 forgotten if non-aligned src/dst. */ +/*# Added some optimizations. */ +/*# 001025 HP Make src and dst char *. Align dst to */ +/*# dword, not just word-if-both-src-and-dst- */ +/*# are-misaligned. */ +/*# 070806 RW Modified for uClibc */ +/*# (__arch_v32 -> __CONFIG_CRISV32__, */ +/*# include features.h to reach it.) */ +/*# */ +/*#-------------------------------------------------------------------------*/ + +#include <features.h> + +#ifdef __CONFIG_CRISV32__ +/* For CRISv32, movem is very cheap. */ +#define MEMCPY_BLOCK_THRESHOLD (44) +#else +/* Break even between movem and move16 is at 38.7*2, but modulo 44. */ +#define MEMCPY_BLOCK_THRESHOLD (44*2) +#endif + +void *memcpy(void *, const void *, unsigned int); + +libc_hidden_proto(memcpy) +void *memcpy(void *pdst, + const void *psrc, + unsigned int pn) +{ + /* Ok. Now we want the parameters put in special registers. + Make sure the compiler is able to make something useful of this. + As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). + + If gcc was allright, it really would need no temporaries, and no + stack space to save stuff on. */ + +#ifndef MEMPCPY + register void *return_dst __asm__ ("r10") = pdst; +#else + /* FIXME: Use R10 for something. */ +# define return_dst dst +#endif + + register char *dst __asm__ ("r13") = pdst; + register char *src __asm__ ("r11") = (char *) psrc; + register int n __asm__ ("r12") = pn; + + + /* When src is aligned but not dst, this makes a few extra needless + cycles. I believe it would take as many to check that the + re-alignment was unnecessary. */ + if (((unsigned long) dst & 3) != 0 + /* Don't align if we wouldn't copy more than a few bytes; so we + don't have to check further for overflows. */ + && n >= 3) + { + if ((unsigned long) dst & 1) + { + n--; + *(char*)dst = *(char*)src; + src++; + dst++; + } + + if ((unsigned long) dst & 2) + { + n -= 2; + *(short*)dst = *(short*)src; + src += 2; + dst += 2; + } + } + + /* Decide which copying method to use. */ + if (n >= MEMCPY_BLOCK_THRESHOLD) + { + /* For large copies we use 'movem' */ + + /* It is not optimal to tell the compiler about clobbering any + registers; that will move the saving/restoring of those registers + to the function prologue/epilogue, and make non-movem sizes + suboptimal. + + This method is not foolproof; it assumes that the "register asm" + declarations at the beginning of the function really are used + here (beware: they may be moved to temporary registers). + This way, we do not have to save/move the registers around into + temporaries; we can safely use them straight away. */ + __asm__ volatile ("\ + .syntax no_register_prefix \n\ + \n\ + ;; Check that the register asm declaration got right. \n\ + ;; The GCC manual explicitly says TRT will happen. \n\ + .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ + .err \n\ + .endif \n\ + \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,sp \n\ + movem r10,[sp] \n\ + \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 44,r12 \n\ +0: \n\ + movem [r11+],r10 \n\ + subq 44,r12 \n\ + bge 0b \n\ + movem r10,[r13+] \n\ + \n\ + addq 44,r12 ;; compensate for last loop underflowing n \n\ + \n\ + ;; Restore registers from stack \n\ + movem [sp+],r10" + + /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) + /* Inputs */ : "0" (dst), "1" (src), "2" (n)); + } + + /* Either we directly starts copying, using dword copying + in a loop, or we copy as much as possible with 'movem' + and then the last block (<44 bytes) is copied here. + This will work since 'movem' will have updated src,dst,n. */ + + while ( n >= 16 ) + { + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + n -= 16; + } + + /* A switch() is definitely the fastest although it takes a LOT of code. + * Particularly if you inline code this. + */ + switch (n) + { + case 0: + break; + case 1: + *((char*)dst)++ = *((char*)src)++; + break; + case 2: + *((short*)dst)++ = *((short*)src)++; + break; + case 3: + *((short*)dst)++ = *((short*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + case 4: + *((long*)dst)++ = *((long*)src)++; + break; + case 5: + *((long*)dst)++ = *((long*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + case 6: + *((long*)dst)++ = *((long*)src)++; + *((short*)dst)++ = *((short*)src)++; + break; + case 7: + *((long*)dst)++ = *((long*)src)++; + *((short*)dst)++ = *((short*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + case 8: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + break; + case 9: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + case 10: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((short*)dst)++ = *((short*)src)++; + break; + case 11: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((short*)dst)++ = *((short*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + case 12: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + break; + case 13: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + case 14: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((short*)dst)++ = *((short*)src)++; + break; + case 15: + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((long*)dst)++ = *((long*)src)++; + *((short*)dst)++ = *((short*)src)++; + *((char*)dst)++ = *((char*)src)++; + break; + } + + return return_dst; /* destination pointer. */ +} /* memcpy() */ +libc_hidden_def(memcpy) diff --git a/libc/string/cris/memmove.c b/libc/string/cris/memmove.c new file mode 100644 index 000000000..437637078 --- /dev/null +++ b/libc/string/cris/memmove.c @@ -0,0 +1,101 @@ +/* Taken from generic/memmove.c; trivially modified to work with + arch-specific memcopy.h for Cris. + + Copy memory to memory until the specified number of bytes + has been copied. Overlap is handled correctly. + Copyright (C) 1991, 1995, 1996, 1997, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <string.h> + +#include "memcopy.h" +#include "../generic/pagecopy.h" + +libc_hidden_proto(memmove) +void *memmove (void *dest, const void *src, size_t len) +{ + unsigned long int dstp = (long int) dest; + unsigned long int srcp = (long int) src; + + /* This test makes the forward copying code be used whenever possible. + Reduces the working set. */ + if (dstp - srcp >= len) /* *Unsigned* compare! */ + { +#if 1 +#warning REMINDER: Cris arch-opt memmove assumes memcpy does forward copying! + memcpy(dest, src, len); +#else + /* Copy from the beginning to the end. */ + + /* If there not too few bytes to copy, use word copy. */ + if (len >= OP_T_THRES) + { + /* Copy just a few bytes to make DSTP aligned. */ + len -= (-dstp) % OPSIZ; + BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ); + + /* Copy whole pages from SRCP to DSTP by virtual address + manipulation, as much as possible. */ + + PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); + + /* Copy from SRCP to DSTP taking advantage of the known + alignment of DSTP. Number of bytes remaining is put + in the third argument, i.e. in LEN. This number may + vary from machine to machine. */ + + WORD_COPY_FWD (dstp, srcp, len, len); + + /* Fall out and copy the tail. */ + } + + /* There are just a few bytes to copy. Use byte memory operations. */ + BYTE_COPY_FWD (dstp, srcp, len); +#endif + } + else + { + /* Copy from the end to the beginning. */ + srcp += len; + dstp += len; + + /* If there not too few bytes to copy, use word copy. */ + if (len >= OP_T_THRES) + { + /* Copy just a few bytes to make DSTP aligned. */ + len -= dstp % OPSIZ; + BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ); + + /* Copy from SRCP to DSTP taking advantage of the known + alignment of DSTP. Number of bytes remaining is put + in the third argument, i.e. in LEN. This number may + vary from machine to machine. */ + + WORD_COPY_BWD (dstp, srcp, len, len); + + /* Fall out and copy the tail. */ + } + + /* There are just a few bytes to copy. Use byte memory operations. */ + BYTE_COPY_BWD (dstp, srcp, len); + } + + return (dest); +} +libc_hidden_def(memmove) diff --git a/libc/string/cris/memset.c b/libc/string/cris/memset.c new file mode 100644 index 000000000..7e71bc50f --- /dev/null +++ b/libc/string/cris/memset.c @@ -0,0 +1,271 @@ +/* Copyright (C) 2001, 2003 Free Software Foundation, Inc. + Copyright (C) 1999, 2000 Axis Communications AB. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/*#************************************************************************#*/ +/*#-------------------------------------------------------------------------*/ +/*# */ +/*# FUNCTION NAME: memset() */ +/*# */ +/*# PARAMETERS: void* dst; Destination address. */ +/*# int c; Value of byte to write. */ +/*# int len; Number of bytes to write. */ +/*# */ +/*# RETURNS: dst. */ +/*# */ +/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */ +/*# Framework taken from memcpy. This routine is */ +/*# very sensitive to compiler changes in register allocation. */ +/*# Should really be rewritten to avoid this problem. */ +/*# */ +/*#-------------------------------------------------------------------------*/ +/*# */ +/*# HISTORY */ +/*# */ +/*# DATE NAME CHANGES */ +/*# ---- ---- ------- */ +/*# 990713 HP Tired of watching this function (or */ +/*# really, the nonoptimized generic */ +/*# implementation) take up 90% of simulator */ +/*# output. Measurements needed. */ +/*# */ +/*#-------------------------------------------------------------------------*/ + +/* No, there's no macro saying 12*4, since it is "hard" to get it into + the asm in a good way. Thus better to expose the problem everywhere. + */ + +/* Assuming 1 cycle per dword written or read (ok, not really true), and + one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1) + so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */ + +#define ZERO_BLOCK_SIZE (1*12*4) + +void *memset(void *, int, unsigned long); + +libc_hidden_proto(memset) +void *memset(void *pdst, + int c, + unsigned long plen) +{ + /* Ok. Now we want the parameters put in special registers. + Make sure the compiler is able to make something useful of this. */ + + register char *return_dst __asm__ ("r10") = pdst; + register long n __asm__ ("r12") = plen; + register int lc __asm__ ("r11") = c; + + /* Most apps use memset sanely. Only those memsetting about 3..4 + bytes or less get penalized compared to the generic implementation + - and that's not really sane use. */ + + /* Ugh. This is fragile at best. Check with newer GCC releases, if + they compile cascaded "x |= x << 8" sanely! */ + __asm__("movu.b %0,$r13 \n\ + lslq 8,$r13 \n\ + move.b %0,$r13 \n\ + move.d $r13,%0 \n\ + lslq 16,$r13 \n\ + or.d $r13,%0" + : "=r" (lc) : "0" (lc) : "r13"); + + { + register char *dst __asm__ ("r13") = pdst; + + if (((unsigned long) pdst & 3) != 0 + /* Oops! n=0 must be a legal call, regardless of alignment. */ + && n >= 3) + { + if ((unsigned long)dst & 1) + { + *dst = (char) lc; + n--; + dst++; + } + + if ((unsigned long)dst & 2) + { + *(short *)dst = lc; + n -= 2; + dst += 2; + } + } + + /* Now the fun part. For the threshold value of this, check the equation + above. */ + /* Decide which copying method to use. */ + if (n >= ZERO_BLOCK_SIZE) + { + /* For large copies we use 'movem' */ + + /* It is not optimal to tell the compiler about clobbering any + registers; that will move the saving/restoring of those registers + to the function prologue/epilogue, and make non-movem sizes + suboptimal. + + This method is not foolproof; it assumes that the "asm reg" + declarations at the beginning of the function really are used + here (beware: they may be moved to temporary registers). + This way, we do not have to save/move the registers around into + temporaries; we can safely use them straight away. */ + __asm__ volatile (" \n\ + .syntax no_register_prefix \n\ + \n\ + ;; Check that the register asm declaration got right. \n\ + ;; The GCC manual explicitly says there's no warranty for that (too). \n\ + .ifnc %0-%1-%4,$r13-$r12-$r11 \n\ + .err \n\ + .endif \n\ + \n\ + ;; Save the registers we'll clobber in the movem process \n\ + ;; on the stack. Don't mention them to gcc, it will only be \n\ + ;; upset. \n\ + subq 11*4,sp \n\ + movem r10,[sp] \n\ + \n\ + move.d r11,r0 \n\ + move.d r11,r1 \n\ + move.d r11,r2 \n\ + move.d r11,r3 \n\ + move.d r11,r4 \n\ + move.d r11,r5 \n\ + move.d r11,r6 \n\ + move.d r11,r7 \n\ + move.d r11,r8 \n\ + move.d r11,r9 \n\ + move.d r11,r10 \n\ + \n\ + ;; Now we've got this: \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 12*4,r12 \n\ +0: \n\ + subq 12*4,r12 \n\ + bge 0b \n\ + movem r11,[r13+] \n\ + \n\ + addq 12*4,r12 ;; compensate for last loop underflowing n \n\ + \n\ + ;; Restore registers from stack \n\ + movem [sp+],r10" + + /* Outputs */ : "=r" (dst), "=r" (n) + /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); + + } + + /* Either we directly starts copying, using dword copying + in a loop, or we copy as much as possible with 'movem' + and then the last block (<44 bytes) is copied here. + This will work since 'movem' will have updated src,dst,n. */ + + while ( n >= 16 ) + { + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + n -= 16; + } + + /* A switch() is definitely the fastest although it takes a LOT of code. + * Particularly if you inline code this. + */ + switch (n) + { + case 0: + break; + case 1: + *(char*)dst = (char) lc; + break; + case 2: + *(short*)dst = (short) lc; + break; + case 3: + *((short*)dst)++ = (short) lc; + *(char*)dst = (char) lc; + break; + case 4: + *((long*)dst)++ = lc; + break; + case 5: + *((long*)dst)++ = lc; + *(char*)dst = (char) lc; + break; + case 6: + *((long*)dst)++ = lc; + *(short*)dst = (short) lc; + break; + case 7: + *((long*)dst)++ = lc; + *((short*)dst)++ = (short) lc; + *(char*)dst = (char) lc; + break; + case 8: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + break; + case 9: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *(char*)dst = (char) lc; + break; + case 10: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *(short*)dst = (short) lc; + break; + case 11: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((short*)dst)++ = (short) lc; + *(char*)dst = (char) lc; + break; + case 12: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + break; + case 13: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *(char*)dst = (char) lc; + break; + case 14: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *(short*)dst = (short) lc; + break; + case 15: + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((long*)dst)++ = lc; + *((short*)dst)++ = (short) lc; + *(char*)dst = (char) lc; + break; + } + } + + return return_dst; /* destination pointer. */ +} /* memset() */ +libc_hidden_def(memset) diff --git a/libc/string/cris/strcpy.c b/libc/string/cris/strcpy.c new file mode 100644 index 000000000..0af25253e --- /dev/null +++ b/libc/string/cris/strcpy.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2006-2007 Axis Communications AB + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <string.h> + +libc_hidden_proto(strcpy) +char *strcpy(char *dest, const char *src) +{ + char *ret = dest; + unsigned long himagic = 0x80808080L; + unsigned long lomagic = 0x01010101L; + + while ((unsigned long)src & (sizeof src - 1)) + { + if (!(*dest++ = *src++)) + { + return ret; + } + } + + while (1) + { + unsigned long value = *(unsigned long*)src; + unsigned long magic; + + src += sizeof (unsigned long); + + if ((magic = (value - lomagic) & himagic)) + { + if (magic & ~value) + { + break; + } + } + + *(unsigned long*)dest = value; + dest += sizeof (unsigned long); + } + + src -= sizeof (unsigned long); + + while ((*dest++ = *src++)) + { + } + + return ret; +} +libc_hidden_def(strcpy) diff --git a/libc/string/cris/strncpy.c b/libc/string/cris/strncpy.c new file mode 100644 index 000000000..93a6608bc --- /dev/null +++ b/libc/string/cris/strncpy.c @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2006-2007 Axis Communications AB + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <string.h> + +libc_hidden_proto(memset) + +libc_hidden_proto(strncpy) +char *strncpy(char *dest, const char *src, size_t count) +{ + char *ret = dest; + unsigned long himagic = 0x80808080L; + unsigned long lomagic = 0x01010101L; + + while (count && (unsigned long)src & (sizeof src - 1)) + { + count--; + if (!(*dest++ = *src++)) + { + goto finalize; + } + } + + while (count >= sizeof (unsigned long)) + { + unsigned long value = *(unsigned long*)src; + unsigned long magic; + + if ((magic = (value - lomagic) & himagic)) + { + if (magic & ~value) + { + break; + } + } + + *(unsigned long*)dest = value; + dest += sizeof (unsigned long); + src += sizeof (unsigned long); + count -= sizeof (unsigned long); + } + + while (count) + { + count--; + if (!(*dest++ = *src++)) + break; + } + +finalize: + if (count) + { + memset(dest, '\0', count); + } + + return ret; +} +libc_hidden_def(strncpy) diff --git a/libc/string/xtensa/Makefile b/libc/string/xtensa/Makefile new file mode 100644 index 000000000..0a95346fd --- /dev/null +++ b/libc/string/xtensa/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> +# +# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S new file mode 100644 index 000000000..19f3a6818 --- /dev/null +++ b/libc/string/xtensa/memcpy.S @@ -0,0 +1,297 @@ +/* Optimized memcpy for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <bits/xtensa-config.h> + + .macro src_b r, w0, w1 +#ifdef __XTENSA_EB__ + src \r, \w0, \w1 +#else + src \r, \w1, \w0 +#endif + .endm + + .macro ssa8 r +#ifdef __XTENSA_EB__ + ssa8b \r +#else + ssa8l \r +#endif + .endm + +/* If the Xtensa Unaligned Load Exception option is not used, this + code can run a few cycles faster by relying on the low address bits + being ignored. However, if the code is then run with an Xtensa ISS + client that checks for unaligned accesses, it will produce a lot of + warning messages. Set this flag to disable the use of unaligned + accesses and keep the ISS happy. */ + +#define UNALIGNED_ADDRESSES_CHECKED 1 + +/* Do not use .literal_position in the ENTRY macro. */ +#undef LITERAL_POSITION +#define LITERAL_POSITION + + +/* void *memcpy (void *dst, const void *src, size_t len) + + The algorithm is as follows: + + If the destination is unaligned, align it by conditionally + copying 1- and/or 2-byte pieces. + + If the source is aligned, copy 16 bytes with a loop, and then finish up + with 8, 4, 2, and 1-byte copies conditional on the length. + + Else (if source is unaligned), do the same, but use SRC to align the + source data. + + This code tries to use fall-through branches for the common + case of aligned source and destination and multiple of 4 (or 8) length. */ + + +/* Byte by byte copy. */ + + .text + .align 4 + .literal_position +__memcpy_aux: + + /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ + (0 mod 4 alignment for LBEG). */ + .byte 0 + +.Lbytecopy: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a7, a3, a4 // a7 = end address for source +#endif +1: l8ui a6, a3, 0 + addi a3, a3, 1 + s8i a6, a5, 0 + addi a5, a5, 1 +#if !XCHAL_HAVE_LOOPS + blt a3, a7, 1b +#endif +2: retw + + +/* Destination is unaligned. */ + + .align 4 +.Ldst1mod2: // dst is only byte aligned + + /* Do short copies byte-by-byte. */ + _bltui a4, 7, .Lbytecopy + + /* Copy 1 byte. */ + l8ui a6, a3, 0 + addi a3, a3, 1 + addi a4, a4, -1 + s8i a6, a5, 0 + addi a5, a5, 1 + + /* Return to main algorithm if dst is now aligned. */ + _bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short copies byte-by-byte. */ + _bltui a4, 6, .Lbytecopy + + /* Copy 2 bytes. */ + l8ui a6, a3, 0 + l8ui a7, a3, 1 + addi a3, a3, 2 + addi a4, a4, -2 + s8i a6, a5, 0 + s8i a7, a5, 1 + addi a5, a5, 2 + + /* dst is now aligned; return to main algorithm. */ + j .Ldstaligned + + +ENTRY (memcpy) + /* a2 = dst, a3 = src, a4 = len */ + + mov a5, a2 // copy dst so that a2 is return value + _bbsi.l a2, 0, .Ldst1mod2 + _bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + + /* Check if source is aligned. */ + movi a8, 3 + _bany a3, a8, .Lsrcunaligned + + /* Destination and source are word-aligned, use word copy. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a8, a7, 4 + add a8, a8, a3 // a8 = end of last 16B source chunk +#endif +1: l32i a6, a3, 0 + l32i a7, a3, 4 + s32i a6, a5, 0 + l32i a6, a3, 8 + s32i a7, a5, 4 + l32i a7, a3, 12 + s32i a6, a5, 8 + addi a3, a3, 16 + s32i a7, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + blt a3, a8, 1b +#endif + + /* Copy any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Copy 8 bytes. */ + l32i a6, a3, 0 + l32i a7, a3, 4 + addi a3, a3, 8 + s32i a6, a5, 0 + s32i a7, a5, 4 + addi a5, a5, 8 + +3: bbsi.l a4, 2, 4f + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + retw + + /* Copy 4 bytes. */ +4: l32i a6, a3, 0 + addi a3, a3, 4 + s32i a6, a5, 0 + addi a5, a5, 4 + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + retw + + /* Copy 2 bytes. */ +5: l16ui a6, a3, 0 + addi a3, a3, 2 + s16i a6, a5, 0 + addi a5, a5, 2 + bbsi.l a4, 0, 6f + retw + + /* Copy 1 byte. */ +6: l8ui a6, a3, 0 + s8i a6, a5, 0 + +.Ldone: + retw + + +/* Destination is aligned; source is unaligned. */ + + .align 4 +.Lsrcunaligned: + /* Avoid loading anything for zero-length copies. */ + _beqz a4, .Ldone + + /* Copy 16 bytes per iteration for word-aligned dst and + unaligned src. */ + ssa8 a3 // set shift amount from byte offset +#if UNALIGNED_ADDRESSES_CHECKED + and a11, a3, a8 // save unalignment offset for below + sub a3, a3, a11 // align a3 +#endif + l32i a6, a3, 0 // load first word +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a10, a7, 4 + add a10, a10, a3 // a10 = end of last 16B source chunk +#endif +1: l32i a7, a3, 4 + l32i a8, a3, 8 + src_b a6, a6, a7 + s32i a6, a5, 0 + l32i a9, a3, 12 + src_b a7, a7, a8 + s32i a7, a5, 4 + l32i a6, a3, 16 + src_b a8, a8, a9 + s32i a8, a5, 8 + addi a3, a3, 16 + src_b a9, a9, a6 + s32i a9, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + blt a3, a10, 1b +#endif + +2: bbci.l a4, 3, 3f + + /* Copy 8 bytes. */ + l32i a7, a3, 4 + l32i a8, a3, 8 + src_b a6, a6, a7 + s32i a6, a5, 0 + addi a3, a3, 8 + src_b a7, a7, a8 + s32i a7, a5, 4 + addi a5, a5, 8 + mov a6, a8 + +3: bbci.l a4, 2, 4f + + /* Copy 4 bytes. */ + l32i a7, a3, 4 + addi a3, a3, 4 + src_b a6, a6, a7 + s32i a6, a5, 0 + addi a5, a5, 4 + mov a6, a7 +4: +#if UNALIGNED_ADDRESSES_CHECKED + add a3, a3, a11 // readjust a3 with correct misalignment +#endif + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + retw + + /* Copy 2 bytes. */ +5: l8ui a6, a3, 0 + l8ui a7, a3, 1 + addi a3, a3, 2 + s8i a6, a5, 0 + s8i a7, a5, 1 + addi a5, a5, 2 + bbsi.l a4, 0, 6f + retw + + /* Copy 1 byte. */ +6: l8ui a6, a3, 0 + s8i a6, a5, 0 + retw + +libc_hidden_def (memcpy) diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S new file mode 100644 index 000000000..c0928825d --- /dev/null +++ b/libc/string/xtensa/memset.S @@ -0,0 +1,165 @@ +/* Optimized memset for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <bits/xtensa-config.h> + +/* Do not use .literal_position in the ENTRY macro. */ +#undef LITERAL_POSITION +#define LITERAL_POSITION + +/* void *memset (void *dst, int c, size_t length) + + The algorithm is as follows: + + Create a word with c in all byte positions. + + If the destination is aligned, set 16B chunks with a loop, and then + finish up with 8B, 4B, 2B, and 1B stores conditional on the length. + + If the destination is unaligned, align it by conditionally + setting 1B and/or 2B and then go to aligned case. + + This code tries to use fall-through branches for the common + case of an aligned destination (except for the branches to + the alignment labels). */ + + +/* Byte-by-byte set. */ + + .text + .align 4 + .literal_position +__memset_aux: + + /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ + (0 mod 4 alignment for LBEG). */ + .byte 0 + +.Lbyteset: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a6, a5, a4 // a6 = ending address +#endif +1: s8i a3, a5, 0 + addi a5, a5, 1 +#if !XCHAL_HAVE_LOOPS + blt a5, a6, 1b +#endif +2: retw + + +/* Destination is unaligned. */ + + .align 4 + +.Ldst1mod2: // dst is only byte aligned + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 1 byte. */ + s8i a3, a5, 0 + addi a5, a5, 1 + addi a4, a4, -1 + + /* Now retest if dst is aligned. */ + _bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 + addi a4, a4, -2 + + /* dst is now aligned; return to main algorithm */ + j .Ldstaligned + + +ENTRY (memset) + /* a2 = dst, a3 = c, a4 = length */ + + /* Duplicate character into all bytes of word. */ + extui a3, a3, 0, 8 + slli a7, a3, 8 + or a3, a3, a7 + slli a7, a3, 16 + or a3, a3, a7 + + mov a5, a2 // copy dst so that a2 is return value + + /* Check if dst is unaligned. */ + _bbsi.l a2, 0, .Ldst1mod2 + _bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + + /* Destination is word-aligned. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a6, a7, 4 + add a6, a6, a5 // a6 = end of last 16B chunk +#endif + /* Set 16 bytes per iteration. */ +1: s32i a3, a5, 0 + s32i a3, a5, 4 + s32i a3, a5, 8 + s32i a3, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + blt a5, a6, 1b +#endif + + /* Set any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Set 8 bytes. */ + s32i a3, a5, 0 + s32i a3, a5, 4 + addi a5, a5, 8 + +3: bbci.l a4, 2, 4f + + /* Set 4 bytes. */ + s32i a3, a5, 0 + addi a5, a5, 4 + +4: bbci.l a4, 1, 5f + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 + +5: bbci.l a4, 0, 6f + + /* Set 1 byte. */ + s8i a3, a5, 0 +6: retw + +libc_hidden_def (memset) diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S new file mode 100644 index 000000000..90c418d12 --- /dev/null +++ b/libc/string/xtensa/strcmp.S @@ -0,0 +1,313 @@ +/* Optimized strcmp for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <bits/xtensa-config.h> + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + +#define MASK4 0x40404040 + + .literal .Lmask0, MASK0 + .literal .Lmask1, MASK1 + .literal .Lmask2, MASK2 + .literal .Lmask3, MASK3 + .literal .Lmask4, MASK4 + + .text +ENTRY (strcmp) + /* a2 = s1, a3 = s2 */ + + l8ui a8, a2, 0 // byte 0 from s1 + l8ui a9, a3, 0 // byte 0 from s2 + movi a10, 3 // mask + bne a8, a9, .Lretdiff + + or a11, a2, a3 + bnone a11, a10, .Laligned + + xor a11, a2, a3 // compare low two bits of s1 and s2 + bany a11, a10, .Lunaligned // if they have different alignment + + /* s1/s2 are not word-aligned. */ + addi a2, a2, 1 // advance s1 + beqz a8, .Leq // bytes equal, if zero, strings are equal + addi a3, a3, 1 // advance s2 + bnone a2, a10, .Laligned // if s1/s2 now aligned + l8ui a8, a2, 0 // byte 1 from s1 + l8ui a9, a3, 0 // byte 1 from s2 + addi a2, a2, 1 // advance s1 + bne a8, a9, .Lretdiff // if different, return difference + beqz a8, .Leq // bytes equal, if zero, strings are equal + addi a3, a3, 1 // advance s2 + bnone a2, a10, .Laligned // if s1/s2 now aligned + l8ui a8, a2, 0 // byte 2 from s1 + l8ui a9, a3, 0 // byte 2 from s2 + addi a2, a2, 1 // advance s1 + bne a8, a9, .Lretdiff // if different, return difference + beqz a8, .Leq // bytes equal, if zero, strings are equal + addi a3, a3, 1 // advance s2 + j .Laligned + +/* s1 and s2 have different alignment. + + If the zero-overhead loop option is available, use an (almost) + infinite zero-overhead loop with conditional exits so we only pay + for taken branches when exiting the loop. + + Note: It is important for this unaligned case to come before the + code for aligned strings, because otherwise some of the branches + above cannot reach and have to be transformed to branches around + jumps. The unaligned code is smaller and the branches can reach + over it. */ + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Lunaligned: +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, .Lretdiff // loop forever (almost anyway) +#endif +.Lnextbyte: + l8ui a8, a2, 0 + l8ui a9, a3, 0 + addi a2, a2, 1 + bne a8, a9, .Lretdiff + addi a3, a3, 1 +#if XCHAL_HAVE_LOOPS + beqz a8, .Lretdiff +#else + bnez a8, .Lnextbyte +#endif +.Lretdiff: + sub a2, a8, a9 + retw + +/* s1 is word-aligned; s2 is word-aligned. + + If the zero-overhead loop option is available, use an (almost) + infinite zero-overhead loop with conditional exits so we only pay + for taken branches when exiting the loop. */ + +/* New algorithm, relying on the fact that all normal ASCII is between + 32 and 127. + + Rather than check all bytes for zero: + Take one word (4 bytes). Call it w1. + Shift w1 left by one into w1'. + Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. + Check that all 4 bit 6's (one for each byte) are one: + If they are, we are definitely not done. + If they are not, we are probably done, but need to check for zero. */ + + .align 4 +#if XCHAL_HAVE_LOOPS +.Laligned: + .begin no-transform + l32r a4, .Lmask0 // mask for byte 0 + l32r a7, .Lmask4 + /* Loop forever. (a4 is more than than the maximum number + of iterations) */ + loop a4, .Laligned_done + + /* First unrolled loop body. */ + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + slli a5, a8, 1 + bne a8, a9, .Lwne2 + or a9, a8, a5 + bnall a9, a7, .Lprobeq + + /* Second unrolled loop body. */ + l32i a8, a2, 4 // get word from s1+4 + l32i a9, a3, 4 // get word from s2+4 + slli a5, a8, 1 + bne a8, a9, .Lwne2 + or a9, a8, a5 + bnall a9, a7, .Lprobeq2 + + addi a2, a2, 8 // advance s1 pointer + addi a3, a3, 8 // advance s2 pointer +.Laligned_done: + or a1, a1, a1 // nop + +.Lprobeq2: + /* Adjust pointers to account for the loop unrolling. */ + addi a2, a2, 4 + addi a3, a3, 4 + +#else /* !XCHAL_HAVE_LOOPS */ + +.Laligned: + movi a4, MASK0 // mask for byte 0 + movi a7, MASK4 + j .Lfirstword +.Lnextword: + addi a2, a2, 4 // advance s1 pointer + addi a3, a3, 4 // advance s2 pointer +.Lfirstword: + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + slli a5, a8, 1 + bne a8, a9, .Lwne2 + or a9, a8, a5 + ball a9, a7, .Lnextword +#endif /* !XCHAL_HAVE_LOOPS */ + + /* align (0 mod 4) */ +.Lprobeq: + /* Words are probably equal, but check for sure. + If not, loop over the rest of string using normal algorithm. */ + + bnone a8, a4, .Leq // if byte 0 is zero + l32r a5, .Lmask1 // mask for byte 1 + l32r a6, .Lmask2 // mask for byte 2 + bnone a8, a5, .Leq // if byte 1 is zero + l32r a7, .Lmask3 // mask for byte 3 + bnone a8, a6, .Leq // if byte 2 is zero + bnone a8, a7, .Leq // if byte 3 is zero + addi.n a2, a2, 4 // advance s1 pointer + addi.n a3, a3, 4 // advance s2 pointer +#if XCHAL_HAVE_LOOPS + + /* align (1 mod 4) */ + loop a4, .Leq // loop forever (a4 is bigger than max iters) + .end no-transform + + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + addi a2, a2, 4 // advance s1 pointer + bne a8, a9, .Lwne + bnone a8, a4, .Leq // if byte 0 is zero + bnone a8, a5, .Leq // if byte 1 is zero + bnone a8, a6, .Leq // if byte 2 is zero + bnone a8, a7, .Leq // if byte 3 is zero + addi a3, a3, 4 // advance s2 pointer + +#else /* !XCHAL_HAVE_LOOPS */ + + j .Lfirstword2 +.Lnextword2: + addi a3, a3, 4 // advance s2 pointer +.Lfirstword2: + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + addi a2, a2, 4 // advance s1 pointer + bne a8, a9, .Lwne + bnone a8, a4, .Leq // if byte 0 is zero + bnone a8, a5, .Leq // if byte 1 is zero + bnone a8, a6, .Leq // if byte 2 is zero + bany a8, a7, .Lnextword2 // if byte 3 is zero +#endif /* !XCHAL_HAVE_LOOPS */ + + /* Words are equal; some byte is zero. */ +.Leq: movi a2, 0 // return equal + retw + +.Lwne2: /* Words are not equal. On big-endian processors, if none of the + bytes are zero, the return value can be determined by a simple + comparison. */ +#ifdef __XTENSA_EB__ + or a10, a8, a5 + bnall a10, a7, .Lsomezero + bgeu a8, a9, .Lposreturn + movi a2, -1 + retw +.Lposreturn: + movi a2, 1 + retw +.Lsomezero: // There is probably some zero byte. +#endif /* __XTENSA_EB__ */ +.Lwne: /* Words are not equal. */ + xor a2, a8, a9 // get word with nonzero in byte that differs + bany a2, a4, .Ldiff0 // if byte 0 differs + movi a5, MASK1 // mask for byte 1 + bnone a8, a4, .Leq // if byte 0 is zero + bany a2, a5, .Ldiff1 // if byte 1 differs + movi a6, MASK2 // mask for byte 2 + bnone a8, a5, .Leq // if byte 1 is zero + bany a2, a6, .Ldiff2 // if byte 2 differs + bnone a8, a6, .Leq // if byte 2 is zero +#ifdef __XTENSA_EB__ +.Ldiff3: +.Ldiff2: +.Ldiff1: + /* Byte 0 is equal (at least) and there is a difference before a zero + byte. Just subtract words to get the return value. + The high order equal bytes cancel, leaving room for the sign. */ + sub a2, a8, a9 + retw + +.Ldiff0: + /* Need to make room for the sign, so can't subtract whole words. */ + extui a10, a8, 24, 8 + extui a11, a9, 24, 8 + sub a2, a10, a11 + retw + +#else /* !__XTENSA_EB__ */ + /* Little-endian is a little more difficult because can't subtract + whole words. */ +.Ldiff3: + /* Bytes 0-2 are equal; byte 3 is different. + For little-endian need to have a sign bit for the difference. */ + extui a10, a8, 24, 8 + extui a11, a9, 24, 8 + sub a2, a10, a11 + retw + +.Ldiff0: + /* Byte 0 is different. */ + extui a10, a8, 0, 8 + extui a11, a9, 0, 8 + sub a2, a10, a11 + retw + +.Ldiff1: + /* Byte 0 is equal; byte 1 is different. */ + extui a10, a8, 8, 8 + extui a11, a9, 8, 8 + sub a2, a10, a11 + retw + +.Ldiff2: + /* Bytes 0-1 are equal; byte 2 is different. */ + extui a10, a8, 16, 8 + extui a11, a9, 16, 8 + sub a2, a10, a11 + retw + +#endif /* !__XTENSA_EB */ + +libc_hidden_def (strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias (strcmp, strcoll) +libc_hidden_def (strcoll) +#endif diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S new file mode 100644 index 000000000..108070384 --- /dev/null +++ b/libc/string/xtensa/strcpy.S @@ -0,0 +1,150 @@ +/* Optimized strcpy for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <bits/xtensa-config.h> + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + + .text +ENTRY (strcpy) + /* a2 = dst, a3 = src */ + + mov a10, a2 // leave dst in return value register + movi a4, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a3, 0, .Lsrc1mod2 + bbsi.l a3, 1, .Lsrc2mod4 +.Lsrcaligned: + + /* Check if the destination is aligned. */ + movi a8, 3 + bnone a10, a8, .Laligned + + j .Ldstunaligned + +.Lsrc1mod2: // src address is odd + l8ui a8, a3, 0 // get byte 0 + addi a3, a3, 1 // advance src pointer + s8i a8, a10, 0 // store byte 0 + beqz a8, 1f // if byte 0 is zero + addi a10, a10, 1 // advance dst pointer + bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned + +.Lsrc2mod4: // src address is 2 mod 4 + l8ui a8, a3, 0 // get byte 0 + /* 1-cycle interlock */ + s8i a8, a10, 0 // store byte 0 + beqz a8, 1f // if byte 0 is zero + l8ui a8, a3, 1 // get byte 0 + addi a3, a3, 2 // advance src pointer + s8i a8, a10, 1 // store byte 0 + addi a10, a10, 2 // advance dst pointer + bnez a8, .Lsrcaligned +1: retw + + +/* dst is word-aligned; src is word-aligned. */ + + .align 4 +#if XCHAL_HAVE_LOOPS + /* (2 mod 4) alignment for loop instruction */ +.Laligned: + _movi.n a8, 0 // set up for the maximum loop count + loop a8, .Lz3 // loop forever (almost anyway) + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + bnone a8, a7, .Lz3 // if byte 3 is zero + addi a10, a10, 4 // advance dst pointer + +#else /* !XCHAL_HAVE_LOOPS */ + +1: addi a10, a10, 4 // advance dst pointer +.Laligned: + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + bany a8, a7, 1b // if byte 3 is zero +#endif /* !XCHAL_HAVE_LOOPS */ + +.Lz3: /* Byte 3 is zero. */ + retw + +.Lz0: /* Byte 0 is zero. */ +#ifdef __XTENSA_EB__ + movi a8, 0 +#endif + s8i a8, a10, 0 + retw + +.Lz1: /* Byte 1 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + retw + +.Lz2: /* Byte 2 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + movi a8, 0 + s8i a8, a10, 2 + retw + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Ldstunaligned: + +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, 2f // loop forever (almost anyway) +#endif +1: l8ui a8, a3, 0 + addi a3, a3, 1 + s8i a8, a10, 0 + addi a10, a10, 1 +#if XCHAL_HAVE_LOOPS + beqz a8, 2f +#else + bnez a8, 1b +#endif +2: retw + +libc_hidden_def (strcpy) diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S new file mode 100644 index 000000000..dd72c16fa --- /dev/null +++ b/libc/string/xtensa/strlen.S @@ -0,0 +1,104 @@ +/* Optimized strlen for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <bits/xtensa-config.h> + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + + .text +ENTRY (strlen) + /* a2 = s */ + + addi a3, a2, -4 // because we overincrement at the end + movi a4, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a2, 0, .L1mod2 + bbsi.l a2, 1, .L2mod4 + j .Laligned + +.L1mod2: // address is odd + l8ui a8, a3, 4 // get byte 0 + addi a3, a3, 1 // advance string pointer + beqz a8, .Lz3 // if byte 0 is zero + bbci.l a3, 1, .Laligned // if string pointer is now word-aligned + +.L2mod4: // address is 2 mod 4 + addi a3, a3, 2 // advance ptr for aligned access + l32i a8, a3, 0 // get word with first two bytes of string + bnone a8, a6, .Lz2 // if byte 2 (of word, not string) is zero + bany a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero + + /* Byte 3 is zero. */ + addi a3, a3, 3 // point to zero byte + sub a2, a3, a2 // subtract to get length + retw + + +/* String is word-aligned. */ + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Laligned: +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, .Lz3 // loop forever (almost anyway) +#endif +1: l32i a8, a3, 4 // get next word of string + addi a3, a3, 4 // advance string pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero +#if XCHAL_HAVE_LOOPS + bnone a8, a7, .Lz3 // if byte 3 is zero +#else + bany a8, a7, 1b // repeat if byte 3 is non-zero +#endif + +.Lz3: /* Byte 3 is zero. */ + addi a3, a3, 3 // point to zero byte + /* Fall through.... */ + +.Lz0: /* Byte 0 is zero. */ + sub a2, a3, a2 // subtract to get length + retw + +.Lz1: /* Byte 1 is zero. */ + addi a3, a3, 1 // point to zero byte + sub a2, a3, a2 // subtract to get length + retw + +.Lz2: /* Byte 2 is zero. */ + addi a3, a3, 2 // point to zero byte + sub a2, a3, a2 // subtract to get length + retw + +libc_hidden_def (strlen) diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S new file mode 100644 index 000000000..7ba2ef77d --- /dev/null +++ b/libc/string/xtensa/strncpy.S @@ -0,0 +1,241 @@ +/* Optimized strcpy for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <bits/xtensa-config.h> + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + +/* Do not use .literal_position in the ENTRY macro. */ +#undef LITERAL_POSITION +#define LITERAL_POSITION + + .text + .align 4 + .literal_position +__strncpy_aux: + +.Lsrc1mod2: // src address is odd + l8ui a8, a3, 0 // get byte 0 + addi a3, a3, 1 // advance src pointer + s8i a8, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + beqz a8, .Lfill // if byte 0 is zero + bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned + +.Lsrc2mod4: // src address is 2 mod 4 + l8ui a8, a3, 0 // get byte 0 + addi a4, a4, -1 // decrement n + s8i a8, a10, 0 // store byte 0 + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + beqz a8, .Lfill // if byte 0 is zero + l8ui a8, a3, 1 // get byte 0 + addi a3, a3, 2 // advance src pointer + s8i a8, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + bnez a8, .Lsrcaligned + j .Lfill + +.Lret: + retw + + +ENTRY (strncpy) + /* a2 = dst, a3 = src */ + + mov a10, a2 // leave dst in return value register + beqz a4, .Lret // if n is zero + + movi a11, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a3, 0, .Lsrc1mod2 + bbsi.l a3, 1, .Lsrc2mod4 +.Lsrcaligned: + + /* Check if the destination is aligned. */ + movi a8, 3 + bnone a10, a8, .Laligned + + j .Ldstunaligned + + +/* Fill the dst with zeros -- n is at least 1. */ + +.Lfill: + movi a9, 0 + bbsi.l a10, 0, .Lfill1mod2 + bbsi.l a10, 1, .Lfill2mod4 +.Lfillaligned: + blti a4, 4, .Lfillcleanup + + /* Loop filling complete words with zero. */ +#if XCHAL_HAVE_LOOPS + + srai a8, a4, 2 + loop a8, 1f + s32i a9, a10, 0 + addi a10, a10, 4 + +1: slli a8, a8, 2 + sub a4, a4, a8 + +#else /* !XCHAL_HAVE_LOOPS */ + +1: s32i a9, a10, 0 + addi a10, a10, 4 + addi a4, a4, -4 + bgei a4, 4, 1b + +#endif /* !XCHAL_HAVE_LOOPS */ + + beqz a4, 2f + +.Lfillcleanup: + /* Fill leftover (1 to 3) bytes with zero. */ + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + addi a10, a10, 1 + bnez a4, .Lfillcleanup + +2: retw + +.Lfill1mod2: // dst address is odd + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + addi a10, a10, 1 // advance dst pointer + bbci.l a10, 1, .Lfillaligned // if dst is now word-aligned + +.Lfill2mod4: // dst address is 2 mod 4 + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + s8i a9, a10, 1 // store byte 1 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + addi a10, a10, 2 // advance dst pointer + j .Lfillaligned + + +/* dst is word-aligned; src is word-aligned; n is at least 1. */ + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Laligned: +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, 1f // loop forever (almost anyway) + blti a4, 5, .Ldstunaligned // n is near limit; do one at a time + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a11, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + addi a4, a4, -4 // decrement n + addi a10, a10, 4 // advance dst pointer + bnone a8, a7, .Lfill // if byte 3 is zero +1: + +#else /* !XCHAL_HAVE_LOOPS */ + +1: blti a4, 5, .Ldstunaligned // n is near limit; do one at a time + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a11, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + addi a4, a4, -4 // decrement n + addi a10, a10, 4 // advance dst pointer + bany a8, a7, 1b // no zeroes +#endif /* !XCHAL_HAVE_LOOPS */ + + j .Lfill + +.Lz0: /* Byte 0 is zero. */ +#ifdef __XTENSA_EB__ + movi a8, 0 +#endif + s8i a8, a10, 0 + addi a4, a4, -1 // decrement n + addi a10, a10, 1 // advance dst pointer + j .Lfill + +.Lz1: /* Byte 1 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + addi a4, a4, -2 // decrement n + addi a10, a10, 2 // advance dst pointer + j .Lfill + +.Lz2: /* Byte 2 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + movi a8, 0 + s8i a8, a10, 2 + addi a4, a4, -3 // decrement n + addi a10, a10, 3 // advance dst pointer + j .Lfill + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Ldstunaligned: + +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, 2f // loop forever (almost anyway) +#endif +1: l8ui a8, a3, 0 + addi a3, a3, 1 + s8i a8, a10, 0 + addi a4, a4, -1 + beqz a4, 3f + addi a10, a10, 1 +#if XCHAL_HAVE_LOOPS + beqz a8, 2f +#else + bnez a8, 1b +#endif +2: j .Lfill + +3: retw + +libc_hidden_def (strncpy) |