#include #include #include // 256 bytes = 4 * 64 = 4 * cachelines // with 4-way associative cache one row // traverse each set. // With 8-way associative cache the col number // should be doubled. #define COLS 256 #define ROWS 1024 #define VFL 6291456 #ifndef OFF #define OFF 0 #endif char arr[ROWS][COLS+OFF*4] __attribute__((aligned(4096))); char arr2[VFL]; int main() { int i, j, k; time_t t; volatile int xor; srand(0x42069); // init array with random values for (i = 0; i < ROWS; i++) { for (j = 0; j < COLS; j++) { arr[i][j] = rand()%255; } } // "flush" 6MiB cache // for(i = 0; i < VFL; i++)arr2[i] = i*i; // for(i = 0; i < VFL; i++)arr2[i] += 1; // mem barrier __sync_synchronize(); t = clock(); for (i = 1; i < ROWS; i++) { for (j = 0; j < COLS; j++) { arr[i][j] *= arr[i-1][j]; xor ^= arr[i][j]; } } t = clock() - t; printf("%ld",t); }