#define _XOPEN_SOURCE 700 #include #include #include #include static inline unsigned rdtsc() { #if defined __i386__ || defined __x86_64__ unsigned x; __asm__ __volatile__ ( "rdtsc" : "=a"(x) : : "rdx" ); // __asm__ __volatile__ ( "cpuid ; rdtsc" : "=a"(x) // : : "rbx", "rcx", "rdx" ); return x; #else struct timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); return ts.tv_nsec; #endif } char buf[32768+100]; int main() { unsigned i, t, tmin; unsigned long long tmean; unsigned n; // I need a million of iterations to get a stable "min" measurement #define REP (1024*4096) for (n=2; n<32768; n+=(n<64 ? 2 : n<512 ? 32 : n)) { int repeat = (1024 / (n|1)) ? : 1; memset(buf, 0, n); tmin = -1; tmean = 0; for (i=0; i < REP; i++) { int j = repeat; __asm__ __volatile__ ("" : : : "memory"); t = rdtsc(); do { memset(buf, 0, n); __asm__ __volatile__ ("" : : : "memory"); } while (--j != 0); t = rdtsc() - t; __asm__ __volatile__ ("" : : : "memory"); if (t < tmin) tmin = t; tmean += t; } printf("size %u: min=%.2f, avg=%.2f\n", n, (double)tmin / repeat, (double)tmean / (repeat*REP) ); } return 0; }