summaryrefslogtreecommitdiffstats
path: root/libc/string/generic/memcopy.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string/generic/memcopy.h')
-rw-r--r--libc/string/generic/memcopy.h228
1 files changed, 210 insertions, 18 deletions
diff --git a/libc/string/generic/memcopy.h b/libc/string/generic/memcopy.h
index df1ba9a97..555d26efe 100644
--- a/libc/string/generic/memcopy.h
+++ b/libc/string/generic/memcopy.h
@@ -107,24 +107,6 @@ typedef unsigned char byte;
} \
} while (0)
-/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
- the assumption that DST_BP is aligned on an OPSIZ multiple. If
- not all bytes could be easily copied, store remaining number of bytes
- in NBYTES_LEFT, otherwise store 0. */
-/* extern void _wordcopy_fwd_aligned __P ((long int, long int, size_t)); */
-/* extern void _wordcopy_fwd_dest_aligned __P ((long int, long int, size_t)); */
-#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
- do \
- { \
- if (src_bp % OPSIZ == 0) \
- _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
- else \
- _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
- src_bp += (nbytes) & -OPSIZ; \
- dst_bp += (nbytes) & -OPSIZ; \
- (nbytes_left) = (nbytes) % OPSIZ; \
- } while (0)
-
/* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
beginning at the words (of type op_t) right before the pointers and
continuing towards smaller addresses. May take advantage of that
@@ -148,3 +130,213 @@ typedef unsigned char byte;
/* Threshold value for when to enter the unrolled loops. */
#define OP_T_THRES 16
+
+#ifdef __ARCH_HAS_BWD_MEMCPY__
+
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+static void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1;
+ a0 = a1 = 0L;
+ switch (len % 8)
+ {
+ case 2:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 6 * OPSIZ;
+ dstp -= 7 * OPSIZ;
+ len += 6;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 5 * OPSIZ;
+ dstp -= 6 * OPSIZ;
+ len += 5;
+ goto do2;
+ case 4:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 4 * OPSIZ;
+ dstp -= 5 * OPSIZ;
+ len += 4;
+ goto do3;
+ case 5:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 3 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ len += 3;
+ goto do4;
+ case 6:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 2 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do5;
+ case 7:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 1 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do6;
+
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 0 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ goto do7;
+ case 1:
+ a1 = ((op_t *) srcp)[0];
+ srcp -=-1 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do
+ {
+ do8:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ do7:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = a0;
+ do6:
+ a0 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = a1;
+ do5:
+ a1 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = a0;
+ do4:
+ a0 = ((op_t *) srcp)[4];
+ ((op_t *) dstp)[4] = a1;
+ do3:
+ a1 = ((op_t *) srcp)[5];
+ ((op_t *) dstp)[5] = a0;
+ do2:
+ a0 = ((op_t *) srcp)[6];
+ ((op_t *) dstp)[6] = a1;
+ do1:
+ a1 = ((op_t *) srcp)[7];
+ ((op_t *) dstp)[7] = a0;
+
+ srcp += 8 * OPSIZ;
+ dstp += 8 * OPSIZ;
+ len -= 8;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = a1;
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+static void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1, a2, a3;
+ int sh_1, sh_2;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+ a0 = a1 = a2 = a3 = 0L;
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ case 2:
+ a1 = ((op_t *) srcp)[0];
+ a2 = ((op_t *) srcp)[1];
+ srcp -= 1 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ srcp -= 0 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a3 = ((op_t *) srcp)[0];
+ a0 = ((op_t *) srcp)[1];
+ srcp -=-1 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ len += 0;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp)[0];
+ a3 = ((op_t *) srcp)[1];
+ srcp -=-2 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do
+ {
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+ do3:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
+ do2:
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
+ do1:
+ a3 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
+
+ srcp += 4 * OPSIZ;
+ dstp += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+}
+
+
+/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
+ the assumption that DST_BP is aligned on an OPSIZ multiple. If
+ not all bytes could be easily copied, store remaining number of bytes
+ in NBYTES_LEFT, otherwise store 0. */
+/* extern void _wordcopy_fwd_aligned __P ((long int, long int, size_t)); */
+/* extern void _wordcopy_fwd_dest_aligned __P ((long int, long int, size_t)); */
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
+ do \
+ { \
+ if (src_bp % OPSIZ == 0) \
+ _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
+ else \
+ _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
+ src_bp += (nbytes) & -OPSIZ; \
+ dst_bp += (nbytes) & -OPSIZ; \
+ (nbytes_left) = (nbytes) % OPSIZ; \
+ } while (0)
+
+#endif /* __ARCH_HAS_BWD_MEMCPY__ */
+