scenario1: expand with multiple FFI calls vs one
This commit is contained in:
parent
ac27eead57
commit
7e8b1191fa
@ -5,24 +5,26 @@
|
||||
- Profile Python code with `cProfile`
|
||||
- Generate flamegraphs with `py-spy`
|
||||
- Use `ctypes` to call C code from Python
|
||||
- Understand ctypes call overhead and when to move loops to C
|
||||
|
||||
## Files
|
||||
- `prime_slow.py` - Pure Python implementation (slow)
|
||||
- `prime.c` - C implementation of the hot function
|
||||
- `prime_fast.py` - Python calling C via ctypes
|
||||
- `prime_fastish.py` - Python loop calling C `is_prime` via ctypes
|
||||
- `prime_fast.py` - Entire counting loop in C via ctypes
|
||||
|
||||
## Exercises
|
||||
|
||||
### Step 1: Measure the baseline
|
||||
```bash
|
||||
time python3 prime_slow.py 100000
|
||||
time python3 prime_slow.py
|
||||
```
|
||||
|
||||
Note the `real` time (wall clock) and `user` time (CPU time in user space).
|
||||
|
||||
### Step 2: Profile with cProfile
|
||||
```bash
|
||||
python3 -m cProfile -s cumtime prime_slow.py 100000
|
||||
python3 -m cProfile -s cumtime prime_slow.py
|
||||
```
|
||||
|
||||
Look for:
|
||||
@ -36,20 +38,31 @@ py-spy record -o prime_slow.svg -- python3 prime_slow.py
|
||||
|
||||
Open `prime_slow.svg` in a browser. The widest bar at the top shows where time is spent.
|
||||
|
||||
### Step 4: Compile and run the optimized version
|
||||
### Step 4: Compile and run the partially optimized version
|
||||
```bash
|
||||
# Compile the C library
|
||||
gcc -O2 -fPIC -shared -o libprime.so prime.c
|
||||
|
||||
# Run the fast version
|
||||
time python3 prime_fast.py 100000
|
||||
# Run the "fastish" version (C is_prime, Python loop)
|
||||
time python3 prime_fastish.py
|
||||
```
|
||||
|
||||
### Step 5: Compare
|
||||
- How much faster is the C version?
|
||||
- Generate a flamegraph for `prime_fast.py` - what's different?
|
||||
This version calls the C `is_prime` function, but the counting loop is still in Python.
|
||||
|
||||
### Step 5: Run the fully optimized version
|
||||
```bash
|
||||
time python3 prime_fast.py
|
||||
```
|
||||
|
||||
This version calls `count_primes` entirely in C - no Python loop.
|
||||
|
||||
### Step 6: Compare
|
||||
- How much faster is `prime_fastish.py` vs `prime_slow.py`?
|
||||
- How much faster is `prime_fast.py` vs `prime_fastish.py`?
|
||||
- Generate flamegraphs for both - what's different?
|
||||
|
||||
## Discussion Questions
|
||||
1. Why is the C version faster? (Hint: interpreter overhead, type checking)
|
||||
2. When is it worth rewriting in C vs. finding a library?
|
||||
3. What's the trade-off of using `ctypes` vs native Python?
|
||||
2. Why is `prime_fast.py` faster than `prime_fastish.py`? (Hint: ctypes call overhead)
|
||||
3. When is it worth rewriting in C vs. finding a library?
|
||||
4. What's the trade-off of using `ctypes` vs native Python?
|
||||
|
||||
63
scenario1-python-to-c/prime_fastish.py
Normal file
63
scenario1-python-to-c/prime_fastish.py
Normal file
@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Scenario 1: The Optimized Version
|
||||
=================================
|
||||
This version calls the C implementation via ctypes.
|
||||
|
||||
SETUP:
|
||||
gcc -O2 -fPIC -shared -o libprime.so prime.c
|
||||
|
||||
EXERCISES:
|
||||
1. Compare: time python3 prime_fast.py
|
||||
2. Profile: py-spy record -o prime_fast.svg -- python3 prime_fast.py
|
||||
3. Compare the flamegraphs - what changed?
|
||||
"""
|
||||
|
||||
import ctypes
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Load the shared library
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
lib_path = os.path.join(script_dir, "libprime.so")
|
||||
|
||||
try:
|
||||
libprime = ctypes.CDLL(lib_path)
|
||||
except OSError as e:
|
||||
print(f"Error: Could not load {lib_path}")
|
||||
print("Please compile first: gcc -O2 -fPIC -shared -o libprime.so prime.c")
|
||||
sys.exit(1)
|
||||
|
||||
# Define function signatures
|
||||
libprime.is_prime.argtypes = [ctypes.c_int64]
|
||||
libprime.is_prime.restype = ctypes.c_int
|
||||
|
||||
libprime.count_primes.argtypes = [ctypes.c_int64]
|
||||
libprime.count_primes.restype = ctypes.c_int64
|
||||
|
||||
|
||||
def is_prime(n):
|
||||
"""Python wrapper for C is_prime."""
|
||||
return bool(libprime.is_prime(n))
|
||||
|
||||
|
||||
def count_primes(limit):
|
||||
"""Count all primes up to limit."""
|
||||
count = 0
|
||||
for n in range(2, limit + 1):
|
||||
if is_prime(n):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
def main():
|
||||
limit = 1_000_000
|
||||
if len(sys.argv) > 1:
|
||||
limit = int(sys.argv[1])
|
||||
|
||||
print(f"Counting primes up to {limit} (using C library)...")
|
||||
result = count_primes(limit)
|
||||
print(f"Found {result} primes")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user