110 lines
2.8 KiB
C
110 lines
2.8 KiB
C
/*
|
|
* Scenario 4: Cache Misses - Memory Access Patterns
|
|
* ==================================================
|
|
* This program demonstrates the performance impact of memory access patterns.
|
|
* Row-major vs column-major traversal of a 2D array.
|
|
*
|
|
* Compile: gcc -O2 -o cache_demo cache_demo.c
|
|
*
|
|
* EXERCISES:
|
|
* 1. Run: ./cache_demo
|
|
* 2. Profile: perf stat -e cache-misses,cache-references ./cache_demo
|
|
* 3. Why is one so much faster?
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <string.h>
|
|
|
|
#define ROWS 8192
|
|
#define COLS 8192
|
|
|
|
/*
|
|
* Global array to ensure it's not optimized away.
|
|
* This is a 64MB array (8192 * 8192 * sizeof(int) = 256MB if int is 4 bytes)
|
|
* Wait, that's too big. Let's use smaller dimensions or chars.
|
|
*/
|
|
|
|
/* Using static to avoid stack overflow */
|
|
static int matrix[ROWS][COLS];
|
|
|
|
double get_time(void) {
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
return ts.tv_sec + ts.tv_nsec / 1e9;
|
|
}
|
|
|
|
long sum_row_major(void) {
|
|
/*
|
|
* Row-major traversal: access sequential memory addresses
|
|
* Memory layout: [0][0], [0][1], [0][2], ... [0][COLS-1], [1][0], ...
|
|
* This matches how C stores 2D arrays - CACHE FRIENDLY
|
|
*/
|
|
long sum = 0;
|
|
for (int i = 0; i < ROWS; i++) {
|
|
for (int j = 0; j < COLS; j++) {
|
|
sum += matrix[i][j];
|
|
}
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
long sum_col_major(void) {
|
|
/*
|
|
* Column-major traversal: jump around in memory
|
|
* Access pattern: [0][0], [1][0], [2][0], ... [ROWS-1][0], [0][1], ...
|
|
* Each access is COLS * sizeof(int) bytes apart - CACHE HOSTILE
|
|
*/
|
|
long sum = 0;
|
|
for (int j = 0; j < COLS; j++) {
|
|
for (int i = 0; i < ROWS; i++) {
|
|
sum += matrix[i][j];
|
|
}
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
void init_matrix(void) {
|
|
/* Initialize with some values */
|
|
for (int i = 0; i < ROWS; i++) {
|
|
for (int j = 0; j < COLS; j++) {
|
|
matrix[i][j] = (i + j) % 100;
|
|
}
|
|
}
|
|
}
|
|
|
|
int main(void) {
|
|
printf("Matrix size: %d x %d = %zu bytes\n",
|
|
ROWS, COLS, sizeof(matrix));
|
|
printf("Cache line size (typical): 64 bytes\n");
|
|
printf("Stride in column-major: %zu bytes\n\n", COLS * sizeof(int));
|
|
|
|
init_matrix();
|
|
|
|
double start, elapsed;
|
|
long result;
|
|
|
|
/* Warm up */
|
|
result = sum_row_major();
|
|
result = sum_col_major();
|
|
|
|
/* Row-major benchmark */
|
|
start = get_time();
|
|
result = sum_row_major();
|
|
elapsed = get_time() - start;
|
|
printf("Row-major sum: %ld in %.3f seconds\n", result, elapsed);
|
|
|
|
/* Column-major benchmark */
|
|
start = get_time();
|
|
result = sum_col_major();
|
|
elapsed = get_time() - start;
|
|
printf("Column-major sum: %ld in %.3f seconds\n", result, elapsed);
|
|
|
|
printf("\n");
|
|
printf("To see cache misses, run:\n");
|
|
printf(" perf stat -e cache-misses,cache-references,L1-dcache-load-misses ./cache_demo\n");
|
|
|
|
return 0;
|
|
}
|