2026-01-11 05:31:06 +05:30

107 lines
3.0 KiB
Python

#!/usr/bin/env python3
"""
Scenario 3: Syscall Storm in Python
====================================
This demonstrates buffered vs unbuffered I/O in Python.
EXERCISES:
1. Create test file: dd if=/dev/urandom of=testfile bs=1M count=1
2. Run: python3 read_python.py testfile
3. Profile: strace -c python3 read_python.py testfile
"""
import sys
import os
import time
def read_unbuffered(filename):
"""Read file byte-by-byte using os.read (raw syscalls)."""
fd = os.open(filename, os.O_RDONLY)
byte_count = 0
checksum = 0
while True:
data = os.read(fd, 1) # Read 1 byte at a time!
if not data:
break
byte_count += 1
checksum += data[0]
os.close(fd)
return byte_count, checksum
def read_buffered(filename):
"""Read file using Python's buffered I/O."""
byte_count = 0
checksum = 0
with open(filename, 'rb') as f:
while True:
# Python's file object is buffered internally
data = f.read(65536) # Read 64KB chunks
if not data:
break
byte_count += len(data)
checksum += sum(data)
return byte_count, checksum
def read_byte_by_byte_buffered(filename):
"""
Read byte-by-byte but through Python's buffered file object.
This is slow in Python but not due to syscalls!
"""
byte_count = 0
checksum = 0
with open(filename, 'rb') as f:
while True:
data = f.read(1) # Looks like 1 byte, but file is buffered
if not data:
break
byte_count += 1
checksum += data[0]
return byte_count, checksum
def benchmark(name, func, filename):
"""Run and time a function."""
start = time.perf_counter()
byte_count, checksum = func(filename)
elapsed = time.perf_counter() - start
print(f"{name:30s}: {elapsed:.3f}s ({byte_count} bytes, checksum={checksum})")
return elapsed
def main():
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <filename>")
print("\nCreate a test file with:")
print(" dd if=/dev/urandom of=testfile bs=1M count=1")
sys.exit(1)
filename = sys.argv[1]
print("Reading file with different strategies...\n")
t_buf = benchmark("Buffered (64KB chunks)", read_buffered, filename)
t_byte_buf = benchmark("Byte-by-byte (buffered file)", read_byte_by_byte_buffered, filename)
# Only run unbuffered test if file is small (< 2M)
file_size = os.path.getsize(filename)
if file_size < 2_000_000:
t_unbuf = benchmark("Unbuffered (raw syscalls)", read_unbuffered, filename)
print(f"\nSpeedup (buffered vs unbuffered): {t_unbuf/t_buf:.1f}x")
else:
print(f"\nSkipping unbuffered test (file too large: {file_size} bytes)")
print("For unbuffered test, create smaller file: dd if=/dev/urandom of=smallfile bs=10K count=1")
print("Then run: strace -c python3 read_python.py smallfile")
if __name__ == "__main__":
main()