#!/usr/bin/env python3 """ Scenario 3: Syscall Storm in Python ==================================== This demonstrates buffered vs unbuffered I/O in Python. EXERCISES: 1. Create test file: dd if=/dev/urandom of=testfile bs=1M count=1 2. Run: python3 read_python.py testfile 3. Profile: strace -c python3 read_python.py testfile """ import sys import os import time def read_unbuffered(filename): """Read file byte-by-byte using os.read (raw syscalls).""" fd = os.open(filename, os.O_RDONLY) byte_count = 0 checksum = 0 while True: data = os.read(fd, 1) # Read 1 byte at a time! if not data: break byte_count += 1 checksum += data[0] os.close(fd) return byte_count, checksum def read_buffered(filename): """Read file using Python's buffered I/O.""" byte_count = 0 checksum = 0 with open(filename, 'rb') as f: while True: # Python's file object is buffered internally data = f.read(65536) # Read 64KB chunks if not data: break byte_count += len(data) checksum += sum(data) return byte_count, checksum def read_byte_by_byte_buffered(filename): """ Read byte-by-byte but through Python's buffered file object. This is slow in Python but not due to syscalls! """ byte_count = 0 checksum = 0 with open(filename, 'rb') as f: while True: data = f.read(1) # Looks like 1 byte, but file is buffered if not data: break byte_count += 1 checksum += data[0] return byte_count, checksum def benchmark(name, func, filename): """Run and time a function.""" start = time.perf_counter() byte_count, checksum = func(filename) elapsed = time.perf_counter() - start print(f"{name:30s}: {elapsed:.3f}s ({byte_count} bytes, checksum={checksum})") return elapsed def main(): if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") print("\nCreate a test file with:") print(" dd if=/dev/urandom of=testfile bs=1M count=1") sys.exit(1) filename = sys.argv[1] print("Reading file with different strategies...\n") t_buf = benchmark("Buffered (64KB chunks)", read_buffered, filename) t_byte_buf = benchmark("Byte-by-byte (buffered file)", read_byte_by_byte_buffered, filename) # Only run unbuffered test if file is small (< 2M) file_size = os.path.getsize(filename) if file_size < 2_000_000: t_unbuf = benchmark("Unbuffered (raw syscalls)", read_unbuffered, filename) print(f"\nSpeedup (buffered vs unbuffered): {t_unbuf/t_buf:.1f}x") else: print(f"\nSkipping unbuffered test (file too large: {file_size} bytes)") print("For unbuffered test, create smaller file: dd if=/dev/urandom of=smallfile bs=10K count=1") print("Then run: strace -c python3 read_python.py smallfile") if __name__ == "__main__": main()