#include #include #include #include struct udiv { uint32_t mul; unsigned char s1, s2, inc; }; static void precompute_udiv(uint32_t div, struct udiv *p) { int bits = 0; while (!(div & 1)) div >>= 1, bits++; p->s1 = bits; if (div == 1) { p->mul = (sizeof(long) == 8); p->s2 = 0; p->inc = 0; return; } uint32_t tmp = 1u<<31, quo = tmp/div, rem = tmp%div; int log=0; tmp=div; do log++; while (tmp>>=1); int exp, rdshf; uint32_t pow, rdmul=0; for (exp=0, pow=1u<= div - rem; quo *= 2; rem *= 2; if (ovf) quo++, rem -= div; if (exp >= log-bits || div-rem <= pow) break; if (!rdmul && rem <= pow) rdmul = quo, rdshf = exp; } int s2_adj = (sizeof(long)==8) ? 32 : 0; if (exp < log) { p->mul = quo+1; p->s2 = exp + s2_adj; p->inc = 0; } else { p->mul = rdmul; p->s2 = rdshf + s2_adj; p->inc = 1; } } static uint32_t umod(uint32_t x, uint32_t div, struct udiv *p) { uint32_t v = x >> p->s1; if (sizeof(long) == 8 || p->mul) { if (v + p->inc) v += p->inc; int s32=32, s2=p->s2; if (sizeof(long) == 8) s32=s2, s2=0; v = (1ull * v * p->mul) >> s32; v >>= s2; } return x-v*div; } #define N 1000000 uint32_t ext_umod(uint32_t x, uint32_t div, struct udiv *p) { return umod(x, div, p); } int main(int argc, char **argv) { size_t i; struct timespec t0, t; uint32_t *a = calloc(N, sizeof *a); for (i=0; i