#include #include #include #include struct udiv { uint64_t add; uint32_t mul; int s1, s2, inc; }; static void precompute_udiv(uint32_t div, struct udiv *p) { int s2_adj = (sizeof(long)==8) ? 32 : 0; if (!(div&(div-1))) { if (sizeof(long) == 8) { p->s1 = 0; p->mul = 1; p->add = 0; for (p->s2=0; div>1; div>>=1, p->s2++); } else if (div == 1) { p->s1 = 0; p->s2 = 0; p->mul = 0xffffffff; p->add = 0xffffffff; } else { p->s1 = 0; p->mul = 0x80000000; p->add = 0; for (p->s2=0; div>2; div>>=1, p->s2++); } return; } int bits = 0; again: p->s1 = bits; uint32_t tmp = 1u<<31, quo = tmp/div, rem = tmp%div; int log=0; tmp=div; do log++; while (tmp>>=1); int exp, rdshf; uint32_t pow, rdmul=0; for (exp=0, pow=1u<= div - rem; quo *= 2; rem *= 2; if (ovf) quo++, rem -= div; if (exp >= log-bits || div-rem <= pow) break; if (!rdmul && rem <= pow) rdmul = quo, rdshf = exp; } if (exp < log) { p->mul = quo+1; p->s2 = exp + s2_adj; p->inc = 0; p->add = 0; } else if (div & 1) { p->mul = rdmul; p->s2 = rdshf + s2_adj; p->inc = 1; p->add = rdmul; } else { do bits++; while (!((div >>= 1) & 1)); goto again; } } static uint32_t umod(uint32_t x, uint32_t div, struct udiv *p) { #if 0 int s32 = (sizeof(long)==8) ? 0 : 32; return x - ((unsigned long)(1ull * (x>>p->s1) * p->mul + p->add >> s32) >> p->s2)*div; #elif 0 int s32 = (sizeof(long)==8) ? 0 : 32; uint32_t v = x; v >>= p->s1; if (v!=-1) v += p->inc; v = (unsigned long)((1ull * v * p->mul) >> s32) >> p->s2; return x-v*div; #else if (!(div&(div-1))) return x&(div-1); uint32_t v = x; v >>= p->s1; if (v + p->inc) v += p->inc; int s32=32, s2=p->s2; if (sizeof(long) == 8) s32=s2, s2=0; v = (1ull * v * p->mul) >> s32; v >>= s2; return x-v*div; #endif } #define N 1000000 uint32_t ext_umod(uint32_t x, uint32_t div, struct udiv *p) { return umod(x, div, p); } int main(int argc, char **argv) { size_t i; struct timespec t0, t; uint32_t *a = calloc(N, sizeof *a); for (i=0; i