/* * GOOD: Contiguous Array Traversal * ================================= * This program uses a contiguous array for excellent cache locality. * The CPU prefetcher can predict sequential access patterns. * * Compile: make array_sum * Run: ./array_sum * Profile: perf stat -e cache-misses,cache-references ./array_sum */ #include #include #include #define N 10000000 /* 10 million elements */ double get_time(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec + ts.tv_nsec / 1e9; } long sum_array(int *arr, int n) { long sum = 0; for (int i = 0; i < n; i++) { sum += arr[i]; } return sum; } int *create_array(int n) { int *arr = malloc(n * sizeof(int)); if (!arr) { perror("malloc array"); exit(1); } for (int i = 0; i < n; i++) { arr[i] = i % 100; } return arr; } int main(void) { printf("Contiguous Array Traversal (%d elements)\n", N); printf("Sequential memory access - CPU prefetcher works perfectly.\n\n"); printf("Creating array...\n"); int *arr = create_array(N); /* Warm up */ sum_array(arr, N); double start = get_time(); long result = sum_array(arr, N); double elapsed = get_time() - start; printf("Array sum: %ld in %.4f seconds\n\n", result, elapsed); printf("To see cache behavior, run:\n"); printf(" perf stat -e cache-misses,cache-references ./array_sum\n"); free(arr); return 0; }