153 lines
4.3 KiB
Python
153 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Scenario 2b: The Precomputation Insight
|
|
=======================================
|
|
This simulates a config validator that checks rules against events.
|
|
The "expensive" validation function is called repeatedly with the same inputs.
|
|
|
|
This example shows three stages of optimization:
|
|
1. Naive: call the function every time
|
|
2. Memoized: cache results with @lru_cache
|
|
3. Precomputed: since inputs are known ahead of time, build a lookup table
|
|
|
|
EXERCISES:
|
|
1. Run each version and compare times
|
|
2. Profile each version - observe ncalls and cumtime
|
|
3. Think about: when is precomputation better than memoization?
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
from functools import lru_cache
|
|
|
|
|
|
# Simulated "expensive" validation function
|
|
def validate_rule_slow(rule_id, event_type):
|
|
"""
|
|
Simulate an expensive validation check.
|
|
In real life, this might query a database, parse XML, etc.
|
|
"""
|
|
# Artificial delay to simulate expensive computation
|
|
total = 0
|
|
for i in range(10000):
|
|
total += (rule_id * event_type * i) % 997
|
|
return total % 2 == 0 # Returns True or False
|
|
|
|
|
|
# The set of all valid (rule_id, event_type) pairs we'll encounter
|
|
RULES = [1, 2, 3, 4, 5]
|
|
EVENT_TYPES = [10, 20, 30, 40, 50]
|
|
|
|
|
|
def process_events_naive(events):
|
|
"""Process events using naive repeated validation."""
|
|
valid_count = 0
|
|
for rule_id, event_type, data in events:
|
|
if validate_rule_slow(rule_id, event_type):
|
|
valid_count += 1
|
|
return valid_count
|
|
|
|
|
|
# Memoized version
|
|
@lru_cache(maxsize=None)
|
|
def validate_rule_cached(rule_id, event_type):
|
|
"""Same validation but with caching."""
|
|
total = 0
|
|
for i in range(10000):
|
|
total += (rule_id * event_type * i) % 997
|
|
return total % 2 == 0
|
|
|
|
|
|
def process_events_memoized(events):
|
|
"""Process events using memoized validation."""
|
|
valid_count = 0
|
|
for rule_id, event_type, data in events:
|
|
if validate_rule_cached(rule_id, event_type):
|
|
valid_count += 1
|
|
return valid_count
|
|
|
|
|
|
# Precomputed version
|
|
def build_validation_table():
|
|
"""
|
|
Build a lookup table for all possible (rule_id, event_type) combinations.
|
|
This is O(n*m) upfront but O(1) per lookup thereafter.
|
|
"""
|
|
table = {}
|
|
for rule_id in RULES:
|
|
for event_type in EVENT_TYPES:
|
|
table[(rule_id, event_type)] = validate_rule_slow(rule_id, event_type)
|
|
return table
|
|
|
|
|
|
VALIDATION_TABLE = None # Lazy initialization
|
|
|
|
|
|
def process_events_precomputed(events):
|
|
"""Process events using precomputed lookup table."""
|
|
global VALIDATION_TABLE
|
|
if VALIDATION_TABLE is None:
|
|
VALIDATION_TABLE = build_validation_table()
|
|
|
|
valid_count = 0
|
|
for rule_id, event_type, data in events:
|
|
if VALIDATION_TABLE[(rule_id, event_type)]:
|
|
valid_count += 1
|
|
return valid_count
|
|
|
|
|
|
def generate_test_events(n):
|
|
"""Generate n random test events."""
|
|
import random
|
|
random.seed(42) # Reproducible
|
|
events = []
|
|
for i in range(n):
|
|
rule_id = random.choice(RULES)
|
|
event_type = random.choice(EVENT_TYPES)
|
|
data = f"event_{i}"
|
|
events.append((rule_id, event_type, data))
|
|
return events
|
|
|
|
|
|
def benchmark(name, func, events):
|
|
"""Run a function and report timing."""
|
|
start = time.perf_counter()
|
|
result = func(events)
|
|
elapsed = time.perf_counter() - start
|
|
print(f"{name:20s}: {elapsed:.3f}s (valid: {result})")
|
|
return elapsed
|
|
|
|
|
|
def main():
|
|
n_events = 5000
|
|
if len(sys.argv) > 1:
|
|
n_events = int(sys.argv[1])
|
|
|
|
print(f"Processing {n_events} events...")
|
|
print(f"Unique (rule, event_type) combinations: {len(RULES) * len(EVENT_TYPES)}")
|
|
print()
|
|
|
|
events = generate_test_events(n_events)
|
|
|
|
# Reset cached function for fair comparison
|
|
validate_rule_cached.cache_clear()
|
|
global VALIDATION_TABLE
|
|
VALIDATION_TABLE = None
|
|
|
|
t_naive = benchmark("Naive", process_events_naive, events)
|
|
|
|
validate_rule_cached.cache_clear()
|
|
t_memo = benchmark("Memoized", process_events_memoized, events)
|
|
|
|
VALIDATION_TABLE = None
|
|
t_pre = benchmark("Precomputed", process_events_precomputed, events)
|
|
|
|
print()
|
|
print(f"Speedup (memo vs naive): {t_naive/t_memo:.1f}x")
|
|
print(f"Speedup (precomp vs naive): {t_naive/t_pre:.1f}x")
|
|
print(f"Speedup (precomp vs memo): {t_memo/t_pre:.1f}x")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|