Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Tue, 3 Aug 2021 22:27:35 +0200
From: Szabolcs Nagy <nsz@...t70.net>
To: Stefan Kanthak <stefan.kanthak@...go.de>
Cc: musl@...ts.openwall.com
Subject: Re: [Patch] src/math/i386/remquo.s: remove conditional
 branch, shorter bit twiddling

* Stefan Kanthak <stefan.kanthak@...go.de> [2021-08-01 17:59:52 +0200]:
> Halve the number of instructions (from 12 to 6) to fetch the
> (3-bit partial) quotient from the FPU flags C0:C3:C1, and
> perform its negation without conditional branch.

i haven't tested it but it looks good.

i think we should not tweak x87 asm code too much though.
it can introduce bugs and there are not many users of it.
i think only the size saving can justify keeping any i386
math code at all.

but i'm not against committing this.
thanks for the patch.

> --- -/math/i386/remquo.s
> +++ +/math/i386/remquo.s
> @@ -2,49 +2,44 @@
>  .type remquof,@function
>  remquof:
>         mov 12(%esp),%ecx
> +       mov 8(%esp),%eax
> +       xor 4(%esp),%eax
>         flds 8(%esp)
>         flds 4(%esp)
> -       mov 11(%esp),%dh
> -       xor 7(%esp),%dh
> -       jmp 1f
> +       jmp 0f
> 
>  .global remquol
>  .type remquol,@function
>  remquol:
>         mov 28(%esp),%ecx
> +       mov 24(%esp),%eax
> +       xor 12(%esp),%eax
> +       cwtl
>         fldt 16(%esp)
>         fldt 4(%esp)
> -       mov 25(%esp),%dh
> -       xor 13(%esp),%dh
> -       jmp 1f
> +       jmp 0f
> 
>  .global remquo
>  .type remquo,@function
>  remquo:
>         mov 20(%esp),%ecx
> +       mov 16(%esp),%eax
> +       xor 8(%esp),%eax
>         fldl 12(%esp)
>         fldl 4(%esp)
> -       mov 19(%esp),%dh
> -       xor 11(%esp),%dh
> +0:     cltd
>  1:     fprem1
>         fnstsw %ax
>         sahf
>         jp 1b
>         fstp %st(1)
> -       mov %ah,%dl
> -       shr %dl
> -       and $1,%dl
> -       mov %ah,%al
> -       shr $5,%al
> -       and $2,%al
> -       or %al,%dl
> -       mov %ah,%al
> -       shl $2,%al
> -       and $4,%al
> -       or %al,%dl
> -       test %dh,%dh
> -       jns 1f
> -       neg %dl
> -1:     movsbl %dl,%edx
> -       mov %edx,(%ecx)
> +       adc %al,%al
> +       shl $2,%ah
> +       adc %al,%al
> +       shl $5,%ah
> +       adc %al,%al
> +       and $7,%eax
> +       xor %edx,%eax
> +       sub %edx,%eax
> +       mov %eax,(%ecx)
>         ret


Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.