![]() |
|
Message-Id: <20250925131557.8907-2-pincheng.plct@isrc.iscas.ac.cn> Date: Thu, 25 Sep 2025 21:15:57 +0800 From: Pincheng Wang <pincheng.plct@...c.iscas.ac.cn> To: musl@...ts.openwall.com Cc: pincheng.plct@...c.iscas.ac.cn Subject: [PATCH 1/1] riscv64: optimize memset implementation with vector extension Use head-tail filling strategy for small sizes and dynamic vsetvli approach for vector loops to reduce branch overhead. Add conditional compilation to fall back to scalar implementation when __riscv_vector is not available. Signed-off-by: Pincheng Wang <pincheng.plct@...c.iscas.ac.cn> --- src/string/riscv64/memset.S | 101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/string/riscv64/memset.S diff --git a/src/string/riscv64/memset.S b/src/string/riscv64/memset.S new file mode 100644 index 00000000..5fc6ee14 --- /dev/null +++ b/src/string/riscv64/memset.S @@ -0,0 +1,101 @@ +#ifdef __riscv_vector + + .text + .global memset +/* void *memset(void *s, int c, size_t n) + * a0 = s (dest), a1 = c (fill byte), a2 = n (size) + * Returns a0. + */ +memset: + mv t0, a0 /* running dst; keep a0 as return */ + beqz a2, .Ldone /* n == 0 → return */ + + li t3, 8 + bltu a2, t3, .Lsmall /* small-size fast path */ + + /* Broadcast fill byte once. */ + vsetvli t1, zero, e8, m8, ta, ma + vmv.v.x v0, a1 + +.Lbulk: + vsetvli t1, a2, e8, m8, ta, ma /* t1 = vl (bytes) */ + vse8.v v0, (t0) + add t0, t0, t1 + sub a2, a2, t1 + bnez a2, .Lbulk + j .Ldone + +/* Small-size fast path (< 8). + * Head-tail fills to minimize branches and avoid vsetvli overhead. + */ +.Lsmall: + /* Fill s[0], s[n-1] */ + sb a1, 0(t0) + add t2, t0, a2 + sb a1, -1(t2) + li t3, 2 + bleu a2, t3, .Ldone + + /* Fill s[1], s[2], s[n-2], s[n-3] */ + sb a1, 1(t0) + sb a1, 2(t0) + sb a1, -2(t2) + sb a1, -3(t2) + li t3, 6 + bleu a2, t3, .Ldone + + /* Fill s[3], s[n-4] */ + sb a1, 3(t0) + sb a1, -4(t2) + /* fallthrough for n <= 8 */ + +.Ldone: + ret +.size memset, .-memset + +#else /* !__riscv_vector */ + + .text + .global memset +/* Fallback scalar memset + * void *memset(void *s, int c, size_t n) + */ +memset: + mv t0, a0 /* running dst; keep a0 as return */ + beqz a2, .Ldone + + andi a1, a1, 0xff /* use low 8 bits only */ + + /* Head-tail strategy for small n */ + sb a1, 0(t0) /* s[0] */ + add t2, t0, a2 + sb a1, -1(t2) /* s[n-1] */ + li t3, 2 + bleu a2, t3, .Ldone + + sb a1, 1(t0) + sb a1, 2(t0) + sb a1, -2(t2) + sb a1, -3(t2) + li t3, 6 + bleu a2, t3, .Ldone + + sb a1, 3(t0) + sb a1, -4(t2) + li t3, 8 + bleu a2, t3, .Ldone + + /* Linear fill middle region [4, n-4) */ + addi t4, t0, 4 + addi t5, t2, -4 +.Lloop: + bgeu t4, t5, .Ldone + sb a1, 0(t4) + addi t4, t4, 1 + j .Lloop + +.Ldone: + ret +.size memset, .-memset + +#endif -- 2.39.5
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.