|
|
Message-Id: <20251113161518.57357-2-pincheng.plct@isrc.iscas.ac.cn>
Date: Fri, 14 Nov 2025 00:15:18 +0800
From: Pincheng Wang <pincheng.plct@...c.iscas.ac.cn>
To: musl@...ts.openwall.com
Cc: pincheng.plct@...c.iscas.ac.cn
Subject: [PATCH 1/1] riscv64: add optimized memset, memcpy and memmove
Add RISC-V vector extension optimized memset, memcpy and memmove
implementation with runtime CPU capability detection via HW_CAP.
The implementations provide both vector and scalar variants in a single
binary. At process startup, __init_riscv_string_optimizations() queries
AT_HWCAP to detect RVV support and selects the appropriate
implementations via function pointer dispatch. This allows the same libc
to run correctly on both vector-capable and non-vector RISC-V CPUs.
The vector implementation uses m8 register grouping and processes data
in vector-length chunks, providing significant performance improvements
on RVV-capable hardware while maintaining compatibility with non-vector
RISC-V systems.
Signed-off-by: Pincheng Wang <pincheng.plct@...c.iscas.ac.cn>
---
src/env/__libc_start_main.c | 3 ++
src/internal/libc.h | 3 ++
src/string/riscv64/memcpy.c | 4 ++
src/string/riscv64/memcpy_vector.S | 27 ++++++++++++++
src/string/riscv64/memmove.c | 4 ++
src/string/riscv64/memmove_vector.S | 51 ++++++++++++++++++++++++++
src/string/riscv64/memset.c | 4 ++
src/string/riscv64/memset_dispatch.c | 36 ++++++++++++++++++
src/string/riscv64/memset_vector.S | 28 ++++++++++++++
src/string/riscv64/string_dispatch.c | 55 ++++++++++++++++++++++++++++
10 files changed, 215 insertions(+)
create mode 100644 src/string/riscv64/memcpy.c
create mode 100644 src/string/riscv64/memcpy_vector.S
create mode 100644 src/string/riscv64/memmove.c
create mode 100644 src/string/riscv64/memmove_vector.S
create mode 100644 src/string/riscv64/memset.c
create mode 100644 src/string/riscv64/memset_dispatch.c
create mode 100644 src/string/riscv64/memset_vector.S
create mode 100644 src/string/riscv64/string_dispatch.c
diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c
index c5b277bd..c23e63f4 100644
--- a/src/env/__libc_start_main.c
+++ b/src/env/__libc_start_main.c
@@ -38,6 +38,9 @@ void __init_libc(char **envp, char *pn)
__init_tls(aux);
__init_ssp((void *)aux[AT_RANDOM]);
+#ifdef __riscv
+ __init_riscv_string_optimizations();
+#endif
if (aux[AT_UID]==aux[AT_EUID] && aux[AT_GID]==aux[AT_EGID]
&& !aux[AT_SECURE]) return;
diff --git a/src/internal/libc.h b/src/internal/libc.h
index 619bba86..28860b06 100644
--- a/src/internal/libc.h
+++ b/src/internal/libc.h
@@ -40,6 +40,9 @@ extern hidden struct __libc __libc;
hidden void __init_libc(char **, char *);
hidden void __init_tls(size_t *);
hidden void __init_ssp(void *);
+#ifdef __riscv
+hidden void __init_riscv_string_optimizations(void);
+#endif
hidden void __libc_start_init(void);
hidden void __funcs_on_exit(void);
hidden void __funcs_on_quick_exit(void);
diff --git a/src/string/riscv64/memcpy.c b/src/string/riscv64/memcpy.c
new file mode 100644
index 00000000..01892e69
--- /dev/null
+++ b/src/string/riscv64/memcpy.c
@@ -0,0 +1,4 @@
+/* Rename the generic memcpy to __memcpy_scalar and include it */
+#define memcpy __memcpy_scalar
+#include "../memcpy.c"
+#undef memcpy
diff --git a/src/string/riscv64/memcpy_vector.S b/src/string/riscv64/memcpy_vector.S
new file mode 100644
index 00000000..6a045832
--- /dev/null
+++ b/src/string/riscv64/memcpy_vector.S
@@ -0,0 +1,27 @@
+ .text
+ .global __memcpy_vect
+ .option push
+ .option arch, +v
+/* void *__memcpy_vect(void *dest, const void *src, size_t n)
+ * a0 = dest, a1 = src, a2 = n
+ * Returns a0.
+ */
+__memcpy_vect:
+ mv t0, a0 /* running dst */
+ mv t1, a1 /* running src */
+ beqz a2, .Ldone_copy /* n == 0 then return */
+
+.Lbulk_copy:
+ vsetvli t2, a2, e8, m8, ta, ma /* t2 = vl (bytes) */
+ vle8.v v0, (t1)
+ vse8.v v0, (t0)
+ add t0, t0, t2
+ add t1, t1, t2
+ sub a2, a2, t2
+ bnez a2, .Lbulk_copy
+ /* fallthrough */
+
+.Ldone_copy:
+ ret
+.size __memcpy_vect, .-__memcpy_vect
+.option pop
diff --git a/src/string/riscv64/memmove.c b/src/string/riscv64/memmove.c
new file mode 100644
index 00000000..915d6ba9
--- /dev/null
+++ b/src/string/riscv64/memmove.c
@@ -0,0 +1,4 @@
+/* Rename the generic memmove to __memmove_scalar and include it */
+#define memmove __memmove_scalar
+#include "../memmove.c"
+#undef memmove
diff --git a/src/string/riscv64/memmove_vector.S b/src/string/riscv64/memmove_vector.S
new file mode 100644
index 00000000..aec87a52
--- /dev/null
+++ b/src/string/riscv64/memmove_vector.S
@@ -0,0 +1,51 @@
+ .text
+ .global __memmove_vect
+ .option push
+ .option arch, +v
+/* void *__memmove_vect(void *dest, const void *src, size_t n)
+ * a0 = dest, a1 = src, a2 = n
+ * Returns a0.
+ */
+__memmove_vect:
+ beqz a2, .Ldone_move /* n == 0 */
+ beq a0, a1, .Ldone_move /* dst == src */
+
+ /* overlap check */
+ bgeu a1, a0, .Lforward_move /* src >= dst then forward move*/
+
+ sub t2, a0, a1 /* t2 = dst - src */
+ bgeu t2, a2, .Lforward_move /* no overlap then forward move */
+
+ /* backward move */
+ add t0, a0, a2 /* running dst_end */
+ add t1, a1, a2 /* running src_end */
+
+.Lbackward_loop:
+ vsetvli t3, a2, e8, m8, ta, ma /* t3 = vl (bytes) */
+ sub t0, t0, t3
+ sub t1, t1, t3
+ vle8.v v0, (t1)
+ vse8.v v0, (t0)
+ sub a2, a2, t3
+ bnez a2, .Lbackward_loop
+ j .Ldone_move
+
+ /* forward move, same as __memcpy_vect */
+.Lforward_move:
+ mv t0, a0 /* running dst */
+ mv t1, a1 /* running src */
+
+.Lforward_loop:
+ vsetvli t3, a2, e8, m8, ta, ma
+ vle8.v v0, (t1)
+ vse8.v v0, (t0)
+ add t0, t0, t3
+ add t1, t1, t3
+ sub a2, a2, t3
+ bnez a2, .Lforward_loop
+ /* fallthrough */
+
+.Ldone_move:
+ ret
+.size __memmove_vect, .-__memmove_vect
+.option pop
diff --git a/src/string/riscv64/memset.c b/src/string/riscv64/memset.c
new file mode 100644
index 00000000..11fa3032
--- /dev/null
+++ b/src/string/riscv64/memset.c
@@ -0,0 +1,4 @@
+/* Rename the generic memset to __memset_scalar and include it */
+#define memset __memset_scalar
+#include "../memset.c"
+#undef memset
diff --git a/src/string/riscv64/memset_dispatch.c b/src/string/riscv64/memset_dispatch.c
new file mode 100644
index 00000000..8e36d0f9
--- /dev/null
+++ b/src/string/riscv64/memset_dispatch.c
@@ -0,0 +1,36 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/auxv.h>
+#include "libc.h"
+
+void *__memset_scalar(void *s, int c, size_t n);
+void *__memset_vect(void *s, int c, size_t n);
+
+/* memset function pointer, runtime-dispatched based on RVV support */
+__attribute__((visibility("hidden")))
+#ifndef __riscv_vector
+void *(*__memset_ptr)(void *, int, size_t) = __memset_scalar;
+#else
+void *(*__memset_ptr)(void *, int, size_t) = __memset_vect;
+#endif
+
+void *memset(void *s, int c, size_t n)
+{
+ return __memset_ptr(s, c, n);
+}
+
+static __inline int __has_rvv_via_hwcap(void)
+{
+ const unsigned long V_bit = (1ul << ('V' - 'A'));
+ unsigned long hwcap = __getauxval(AT_HWCAP);
+ return (hwcap & V_bit) != 0;
+}
+
+__attribute__((visibility("hidden")))
+void __init_riscv_string_optimizations(void)
+{
+ if (__has_rvv_via_hwcap())
+ __memset_ptr = __memset_vect;
+ else
+ __memset_ptr = __memset_scalar;
+}
diff --git a/src/string/riscv64/memset_vector.S b/src/string/riscv64/memset_vector.S
new file mode 100644
index 00000000..513645d3
--- /dev/null
+++ b/src/string/riscv64/memset_vector.S
@@ -0,0 +1,28 @@
+ .text
+ .global __memset_vect
+ .option push
+ .option arch, +v
+/* void *__memset_vect(void *s, int c, size_t n)
+ * a0 = s (dest), a1 = c (fill byte), a2 = n (size)
+ * Returns a0.
+ */
+__memset_vect:
+ mv t0, a0 /* running dst; keep a0 as return */
+ beqz a2, .Ldone_vect /* n == 0 then return */
+
+ /* Broadcast fill byte once. */
+ vsetvli t1, zero, e8, m8, ta, ma
+ vmv.v.x v0, a1
+
+.Lbulk_vect:
+ vsetvli t1, a2, e8, m8, ta, ma /* t1 = vl (bytes) */
+ vse8.v v0, (t0)
+ add t0, t0, t1
+ sub a2, a2, t1
+ bnez a2, .Lbulk_vect
+ /* fallthrough */
+
+.Ldone_vect:
+ ret
+.size __memset_vect, .-__memset_vect
+.option pop
diff --git a/src/string/riscv64/string_dispatch.c b/src/string/riscv64/string_dispatch.c
new file mode 100644
index 00000000..2844d906
--- /dev/null
+++ b/src/string/riscv64/string_dispatch.c
@@ -0,0 +1,55 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/auxv.h>
+#include "libc.h"
+
+void *__memset_scalar(void *s, int c, size_t n);
+void *__memset_vect(void *s, int c, size_t n);
+void *__memcpy_scalar(void *restrict dest, const void *restrict src, size_t n);
+void *__memcpy_vect(void *restrict dest, const void *restrict src, size_t n);
+void *__memmove_scalar(void *restrict dest, const void *restrict src, size_t n);
+void *__memmove_vect(void *restrict dest, const void *restrict src, size_t n);
+
+/* string function pointer, runtime-dispatched based on RVV support */
+__attribute__((visibility("hidden")))
+#ifndef __riscv_vector
+void *(*__memset_ptr)(void *, int, size_t) = __memset_scalar;
+void *(*__memcpy_ptr)(void *, const void *, size_t) = __memcpy_scalar;
+void *(*__memmove_ptr)(void *, const void *, size_t) = __memmove_scalar;
+#else
+void *(*__memset_ptr)(void *, int, size_t) = __memset_vect;
+void *(*__memcpy_ptr)(void *, const void *, size_t) = __memcpy_vect;
+void *(*__memmove_ptr)(void *, const void *, size_t) = __memmove_vect;
+#endif
+
+void *memset(void *s, int c, size_t n)
+{
+ return __memset_ptr(s, c, n);
+}
+
+void *memcpy(void *restrict dest, const void *restrict src, size_t n)
+{
+ return __memcpy_ptr(dest, src, n);
+}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ return __memmove_ptr(dest, src, n);
+}
+
+static __inline int __has_rvv_via_hwcap(void)
+{
+ const unsigned long V_bit = (1ul << ('V' - 'A'));
+ unsigned long hwcap = __getauxval(AT_HWCAP);
+ return (hwcap & V_bit) != 0;
+}
+
+__attribute__((visibility("hidden")))
+void __init_riscv_string_optimizations(void)
+{
+ if (__has_rvv_via_hwcap()) {
+ __memset_ptr = __memset_vect;
+ __memcpy_ptr = __memcpy_vect;
+ __memmove_ptr = __memmove_vect;
+ }
+}
--
2.39.5
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.