#!/usr/bin/env python3 """ Scenario 2b: The Precomputation Insight ======================================= This simulates a config validator that checks rules against events. The "expensive" validation function is called repeatedly with the same inputs. This example shows three stages of optimization: 1. Naive: call the function every time 2. Memoized: cache results with @lru_cache 3. Precomputed: since inputs are known ahead of time, build a lookup table EXERCISES: 1. Run each version and compare times 2. Profile each version - observe ncalls and cumtime 3. Think about: when is precomputation better than memoization? """ import sys import time from functools import lru_cache # Simulated "expensive" validation function def validate_rule_slow(rule_id, event_type): """ Simulate an expensive validation check. In real life, this might query a database, parse XML, etc. """ # Artificial delay to simulate expensive computation total = 0 for i in range(10000): total += (rule_id * event_type * i) % 997 return total % 2 == 0 # Returns True or False # The set of all valid (rule_id, event_type) pairs we'll encounter RULES = [1, 2, 3, 4, 5] EVENT_TYPES = [10, 20, 30, 40, 50] def process_events_naive(events): """Process events using naive repeated validation.""" valid_count = 0 for rule_id, event_type, data in events: if validate_rule_slow(rule_id, event_type): valid_count += 1 return valid_count # Memoized version @lru_cache(maxsize=None) def validate_rule_cached(rule_id, event_type): """Same validation but with caching.""" total = 0 for i in range(10000): total += (rule_id * event_type * i) % 997 return total % 2 == 0 def process_events_memoized(events): """Process events using memoized validation.""" valid_count = 0 for rule_id, event_type, data in events: if validate_rule_cached(rule_id, event_type): valid_count += 1 return valid_count # Precomputed version def build_validation_table(): """ Build a lookup table for all possible (rule_id, event_type) combinations. This is O(n*m) upfront but O(1) per lookup thereafter. """ table = {} for rule_id in RULES: for event_type in EVENT_TYPES: table[(rule_id, event_type)] = validate_rule_slow(rule_id, event_type) return table VALIDATION_TABLE = None # Lazy initialization def process_events_precomputed(events): """Process events using precomputed lookup table.""" global VALIDATION_TABLE if VALIDATION_TABLE is None: VALIDATION_TABLE = build_validation_table() valid_count = 0 for rule_id, event_type, data in events: if VALIDATION_TABLE[(rule_id, event_type)]: valid_count += 1 return valid_count def generate_test_events(n): """Generate n random test events.""" import random random.seed(42) # Reproducible events = [] for i in range(n): rule_id = random.choice(RULES) event_type = random.choice(EVENT_TYPES) data = f"event_{i}" events.append((rule_id, event_type, data)) return events def benchmark(name, func, events): """Run a function and report timing.""" start = time.perf_counter() result = func(events) elapsed = time.perf_counter() - start print(f"{name:20s}: {elapsed:.3f}s (valid: {result})") return elapsed def main(): n_events = 5000 if len(sys.argv) > 1: n_events = int(sys.argv[1]) print(f"Processing {n_events} events...") print(f"Unique (rule, event_type) combinations: {len(RULES) * len(EVENT_TYPES)}") print() events = generate_test_events(n_events) # Reset cached function for fair comparison validate_rule_cached.cache_clear() global VALIDATION_TABLE VALIDATION_TABLE = None t_naive = benchmark("Naive", process_events_naive, events) validate_rule_cached.cache_clear() t_memo = benchmark("Memoized", process_events_memoized, events) VALIDATION_TABLE = None t_pre = benchmark("Precomputed", process_events_precomputed, events) print() print(f"Speedup (memo vs naive): {t_naive/t_memo:.1f}x") print(f"Speedup (precomp vs naive): {t_naive/t_pre:.1f}x") print(f"Speedup (precomp vs memo): {t_memo/t_pre:.1f}x") if __name__ == "__main__": main()