scenario1: expand with multiple FFI calls vs one
This commit is contained in:
parent
ac27eead57
commit
7e8b1191fa
@ -5,24 +5,26 @@
|
|||||||
- Profile Python code with `cProfile`
|
- Profile Python code with `cProfile`
|
||||||
- Generate flamegraphs with `py-spy`
|
- Generate flamegraphs with `py-spy`
|
||||||
- Use `ctypes` to call C code from Python
|
- Use `ctypes` to call C code from Python
|
||||||
|
- Understand ctypes call overhead and when to move loops to C
|
||||||
|
|
||||||
## Files
|
## Files
|
||||||
- `prime_slow.py` - Pure Python implementation (slow)
|
- `prime_slow.py` - Pure Python implementation (slow)
|
||||||
- `prime.c` - C implementation of the hot function
|
- `prime.c` - C implementation of the hot function
|
||||||
- `prime_fast.py` - Python calling C via ctypes
|
- `prime_fastish.py` - Python loop calling C `is_prime` via ctypes
|
||||||
|
- `prime_fast.py` - Entire counting loop in C via ctypes
|
||||||
|
|
||||||
## Exercises
|
## Exercises
|
||||||
|
|
||||||
### Step 1: Measure the baseline
|
### Step 1: Measure the baseline
|
||||||
```bash
|
```bash
|
||||||
time python3 prime_slow.py 100000
|
time python3 prime_slow.py
|
||||||
```
|
```
|
||||||
|
|
||||||
Note the `real` time (wall clock) and `user` time (CPU time in user space).
|
Note the `real` time (wall clock) and `user` time (CPU time in user space).
|
||||||
|
|
||||||
### Step 2: Profile with cProfile
|
### Step 2: Profile with cProfile
|
||||||
```bash
|
```bash
|
||||||
python3 -m cProfile -s cumtime prime_slow.py 100000
|
python3 -m cProfile -s cumtime prime_slow.py
|
||||||
```
|
```
|
||||||
|
|
||||||
Look for:
|
Look for:
|
||||||
@ -36,20 +38,31 @@ py-spy record -o prime_slow.svg -- python3 prime_slow.py
|
|||||||
|
|
||||||
Open `prime_slow.svg` in a browser. The widest bar at the top shows where time is spent.
|
Open `prime_slow.svg` in a browser. The widest bar at the top shows where time is spent.
|
||||||
|
|
||||||
### Step 4: Compile and run the optimized version
|
### Step 4: Compile and run the partially optimized version
|
||||||
```bash
|
```bash
|
||||||
# Compile the C library
|
# Compile the C library
|
||||||
gcc -O2 -fPIC -shared -o libprime.so prime.c
|
gcc -O2 -fPIC -shared -o libprime.so prime.c
|
||||||
|
|
||||||
# Run the fast version
|
# Run the "fastish" version (C is_prime, Python loop)
|
||||||
time python3 prime_fast.py 100000
|
time python3 prime_fastish.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 5: Compare
|
This version calls the C `is_prime` function, but the counting loop is still in Python.
|
||||||
- How much faster is the C version?
|
|
||||||
- Generate a flamegraph for `prime_fast.py` - what's different?
|
### Step 5: Run the fully optimized version
|
||||||
|
```bash
|
||||||
|
time python3 prime_fast.py
|
||||||
|
```
|
||||||
|
|
||||||
|
This version calls `count_primes` entirely in C - no Python loop.
|
||||||
|
|
||||||
|
### Step 6: Compare
|
||||||
|
- How much faster is `prime_fastish.py` vs `prime_slow.py`?
|
||||||
|
- How much faster is `prime_fast.py` vs `prime_fastish.py`?
|
||||||
|
- Generate flamegraphs for both - what's different?
|
||||||
|
|
||||||
## Discussion Questions
|
## Discussion Questions
|
||||||
1. Why is the C version faster? (Hint: interpreter overhead, type checking)
|
1. Why is the C version faster? (Hint: interpreter overhead, type checking)
|
||||||
2. When is it worth rewriting in C vs. finding a library?
|
2. Why is `prime_fast.py` faster than `prime_fastish.py`? (Hint: ctypes call overhead)
|
||||||
3. What's the trade-off of using `ctypes` vs native Python?
|
3. When is it worth rewriting in C vs. finding a library?
|
||||||
|
4. What's the trade-off of using `ctypes` vs native Python?
|
||||||
|
|||||||
63
scenario1-python-to-c/prime_fastish.py
Normal file
63
scenario1-python-to-c/prime_fastish.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Scenario 1: The Optimized Version
|
||||||
|
=================================
|
||||||
|
This version calls the C implementation via ctypes.
|
||||||
|
|
||||||
|
SETUP:
|
||||||
|
gcc -O2 -fPIC -shared -o libprime.so prime.c
|
||||||
|
|
||||||
|
EXERCISES:
|
||||||
|
1. Compare: time python3 prime_fast.py
|
||||||
|
2. Profile: py-spy record -o prime_fast.svg -- python3 prime_fast.py
|
||||||
|
3. Compare the flamegraphs - what changed?
|
||||||
|
"""
|
||||||
|
|
||||||
|
import ctypes
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Load the shared library
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
lib_path = os.path.join(script_dir, "libprime.so")
|
||||||
|
|
||||||
|
try:
|
||||||
|
libprime = ctypes.CDLL(lib_path)
|
||||||
|
except OSError as e:
|
||||||
|
print(f"Error: Could not load {lib_path}")
|
||||||
|
print("Please compile first: gcc -O2 -fPIC -shared -o libprime.so prime.c")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Define function signatures
|
||||||
|
libprime.is_prime.argtypes = [ctypes.c_int64]
|
||||||
|
libprime.is_prime.restype = ctypes.c_int
|
||||||
|
|
||||||
|
libprime.count_primes.argtypes = [ctypes.c_int64]
|
||||||
|
libprime.count_primes.restype = ctypes.c_int64
|
||||||
|
|
||||||
|
|
||||||
|
def is_prime(n):
|
||||||
|
"""Python wrapper for C is_prime."""
|
||||||
|
return bool(libprime.is_prime(n))
|
||||||
|
|
||||||
|
|
||||||
|
def count_primes(limit):
|
||||||
|
"""Count all primes up to limit."""
|
||||||
|
count = 0
|
||||||
|
for n in range(2, limit + 1):
|
||||||
|
if is_prime(n):
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
def main():
|
||||||
|
limit = 1_000_000
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
limit = int(sys.argv[1])
|
||||||
|
|
||||||
|
print(f"Counting primes up to {limit} (using C library)...")
|
||||||
|
result = count_primes(limit)
|
||||||
|
print(f"Found {result} primes")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user