66 lines
1.5 KiB
C
66 lines
1.5 KiB
C
/*
|
|
* GOOD: Contiguous Array Traversal
|
|
* =================================
|
|
* This program uses a contiguous array for excellent cache locality.
|
|
* The CPU prefetcher can predict sequential access patterns.
|
|
*
|
|
* Compile: make array_sum
|
|
* Run: ./array_sum
|
|
* Profile: perf stat -e cache-misses,cache-references ./array_sum
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
#define N 10000000 /* 10 million elements */
|
|
|
|
double get_time(void) {
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
return ts.tv_sec + ts.tv_nsec / 1e9;
|
|
}
|
|
|
|
long sum_array(int *arr, int n) {
|
|
long sum = 0;
|
|
for (int i = 0; i < n; i++) {
|
|
sum += arr[i];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
int *create_array(int n) {
|
|
int *arr = malloc(n * sizeof(int));
|
|
if (!arr) {
|
|
perror("malloc array");
|
|
exit(1);
|
|
}
|
|
for (int i = 0; i < n; i++) {
|
|
arr[i] = i % 100;
|
|
}
|
|
return arr;
|
|
}
|
|
|
|
int main(void) {
|
|
printf("Contiguous Array Traversal (%d elements)\n", N);
|
|
printf("Sequential memory access - CPU prefetcher works perfectly.\n\n");
|
|
|
|
printf("Creating array...\n");
|
|
int *arr = create_array(N);
|
|
|
|
/* Warm up */
|
|
sum_array(arr, N);
|
|
|
|
double start = get_time();
|
|
long result = sum_array(arr, N);
|
|
double elapsed = get_time() - start;
|
|
|
|
printf("Array sum: %ld in %.4f seconds\n\n", result, elapsed);
|
|
|
|
printf("To see cache behavior, run:\n");
|
|
printf(" perf stat -e cache-misses,cache-references ./array_sum\n");
|
|
|
|
free(arr);
|
|
return 0;
|
|
}
|