#include #include #include #include #include int main(){ int j,k,i; double times; struct timespec tv0,tv; int sz[] = {16, 32, 48, 64, 96, 200, 300, 400, 600, 1<<10, 1<<11, 1<<12, 1<<13, 1<<14, 1<<15, 1<<16, 0}; int off[16] = {0, 0, 0, -8, 8, 16, 0, 0, -16, -12, 0, 4, -4, 0, 0, 12}; int al[16] = {0, 0, 8, 4, 8, 0, 8, 16, 8, 16, 4, 2, 1, 8, 16, 1}; char *buf = malloc((1<<16)+32); if (buf == NULL) { printf("malloc failed\n"); exit(1); } // warm up for (j=0; sz[j]; j++) for (k=0; k<20; k++) for (i=0; i<256; i++) memset(buf + al[i%16], 0, sz[j] + off[i%16]); printf("%-15s %-20s %-20s\n", "size(bytes)", "min_time(s)", "speed(GB/s)"); for (j=0; sz[j]; j++) { double min_time = DBL_MAX; for (k=0; k<20; k++) { clock_gettime(CLOCK_REALTIME, &tv0); // large loop count is important for small sizes for (i=0; i<256; i++) memset(buf + al[i%16], 0, sz[j] + off[i%16]); clock_gettime(CLOCK_REALTIME, &tv); tv.tv_sec -= tv0.tv_sec; if ((tv.tv_nsec -= tv0.tv_nsec) < 0) { tv.tv_nsec += 1000000000; tv.tv_sec--; } times = tv.tv_sec + (double)tv.tv_nsec/1e9; min_time = min_time < times ? min_time : times; } printf("%-15d %-20.9lf %-20.2lf\n",sz[j], min_time, 256*sz[j] / (min_time * 1e9)); } return 0; }