/* * Scenario 4: Cache Misses - Memory Access Patterns * ================================================== * This program demonstrates the performance impact of memory access patterns. * Row-major vs column-major traversal of a 2D array. * * Compile: gcc -O2 -o cache_demo cache_demo.c * * EXERCISES: * 1. Run: ./cache_demo * 2. Profile: perf stat -e cache-misses,cache-references ./cache_demo * 3. Why is one so much faster? */ #include #include #include #include #define ROWS 8192 #define COLS 8192 /* * Global array to ensure it's not optimized away. * This is a 64MB array (8192 * 8192 * sizeof(int) = 256MB if int is 4 bytes) * Wait, that's too big. Let's use smaller dimensions or chars. */ /* Using static to avoid stack overflow */ static int matrix[ROWS][COLS]; double get_time(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec + ts.tv_nsec / 1e9; } long sum_row_major(void) { /* * Row-major traversal: access sequential memory addresses * Memory layout: [0][0], [0][1], [0][2], ... [0][COLS-1], [1][0], ... * This matches how C stores 2D arrays - CACHE FRIENDLY */ long sum = 0; for (int i = 0; i < ROWS; i++) { for (int j = 0; j < COLS; j++) { sum += matrix[i][j]; } } return sum; } long sum_col_major(void) { /* * Column-major traversal: jump around in memory * Access pattern: [0][0], [1][0], [2][0], ... [ROWS-1][0], [0][1], ... * Each access is COLS * sizeof(int) bytes apart - CACHE HOSTILE */ long sum = 0; for (int j = 0; j < COLS; j++) { for (int i = 0; i < ROWS; i++) { sum += matrix[i][j]; } } return sum; } void init_matrix(void) { /* Initialize with some values */ for (int i = 0; i < ROWS; i++) { for (int j = 0; j < COLS; j++) { matrix[i][j] = (i + j) % 100; } } } int main(void) { printf("Matrix size: %d x %d = %zu bytes\n", ROWS, COLS, sizeof(matrix)); printf("Cache line size (typical): 64 bytes\n"); printf("Stride in column-major: %zu bytes\n\n", COLS * sizeof(int)); init_matrix(); double start, elapsed; long result; /* Warm up */ result = sum_row_major(); result = sum_col_major(); /* Row-major benchmark */ start = get_time(); result = sum_row_major(); elapsed = get_time() - start; printf("Row-major sum: %ld in %.3f seconds\n", result, elapsed); /* Column-major benchmark */ start = get_time(); result = sum_col_major(); elapsed = get_time() - start; printf("Column-major sum: %ld in %.3f seconds\n", result, elapsed); printf("\n"); printf("To see cache misses, run:\n"); printf(" perf stat -e cache-misses,cache-references,L1-dcache-load-misses ./cache_demo\n"); return 0; }