/* * Tested with: * gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1) * * Without OMP, this works just fine with no special options: * $ gcc test.c -o test && ./test alpha bravo charlie * 0x7ffc32236700 ./test * 0x7ffc32236700 alpha * 0x7ffc32236700 bravo * 0x7ffc32236700 charlie * * Broken with OMP: * $ gcc -fopenmp test.c -o test && ./test alpha bravo charlie * 0x7f1c78b1ea40 charlie * 0x7f1c79b20a40 alpha * 0x7ffc450c4d30 ./test unaligned! * 0x7f1c7931fa40 bravo * * The rest assumes -fopenmp: * * Using -mavx2 and -DUSE_VTYPE (declare as vtype) seems to help. * Just -mavx2 (and declaring char with aligned attribute) does not help. * Just -mincoming-stack-boundary=4 doesn't help. I wasn't allowed to try lower. * Just -mpreferred-stack-boundary=5 obviously doesn't help. * Using both incoming & preferred doesn't help either (it got worse, now all * threads got misaligned). * -mstackrealign make no difference (even with incoming & preferred). * */ #include #include #ifdef _OPENMP #include #endif #if USE_VTYPE && __AVX2__ #include #endif /* * I had to place this in a separate function to avoid having the unaligned * branch optimized away (even using -O0), since gcc is totally convinced it * simply can't be unaligned! */ void out(char *pt) { if ((size_t)pt & 31) printf("%s unaligned!\n", pt); else puts(pt); } int main(int argc, char **argv) { int i; #ifdef _OPENMP #pragma omp parallel for #endif for (i = 0; i < argc; i++) { #if USE_VTYPE && __AVX2__ __m256i _pt[1024 / sizeof(__m256i)]; char *pt = (void*)_pt; #else __attribute__ ((aligned(32))) char pt[1024]; #endif sprintf(pt, "%p ", pt); strcat(pt, argv[i]); out(pt); } return 0; }