perf-workshop/scenario2-memoization/config_validator.py

#!/usr/bin/env python3
"""
Scenario 2b: The Precomputation Insight
=======================================
This simulates a config validator that checks rules against events.
The "expensive" validation function is called repeatedly with the same inputs.

This example shows three stages of optimization:
1. Naive: call the function every time
2. Memoized: cache results with @lru_cache
3. Precomputed: since inputs are known ahead of time, build a lookup table

EXERCISES:
1. Run each version and compare times
2. Profile each version - observe ncalls and cumtime
3. Think about: when is precomputation better than memoization?
"""

import sys
import time
from functools import lru_cache


# Simulated "expensive" validation function
def validate_rule_slow(rule_id, event_type):
    """
    Simulate an expensive validation check.
    In real life, this might query a database, parse XML, etc.
    """
    # Artificial delay to simulate expensive computation
    total = 0
    for i in range(10000):
        total += (rule_id * event_type * i) % 997
    return total % 2 == 0  # Returns True or False


# The set of all valid (rule_id, event_type) pairs we'll encounter
RULES = [1, 2, 3, 4, 5]
EVENT_TYPES = [10, 20, 30, 40, 50]


def process_events_naive(events):
    """Process events using naive repeated validation."""
    valid_count = 0
    for rule_id, event_type, data in events:
        if validate_rule_slow(rule_id, event_type):
            valid_count += 1
    return valid_count


# Memoized version
@lru_cache(maxsize=None)
def validate_rule_cached(rule_id, event_type):
    """Same validation but with caching."""
    total = 0
    for i in range(10000):
        total += (rule_id * event_type * i) % 997
    return total % 2 == 0


def process_events_memoized(events):
    """Process events using memoized validation."""
    valid_count = 0
    for rule_id, event_type, data in events:
        if validate_rule_cached(rule_id, event_type):
            valid_count += 1
    return valid_count


# Precomputed version
def build_validation_table():
    """
    Build a lookup table for all possible (rule_id, event_type) combinations.
    This is O(n*m) upfront but O(1) per lookup thereafter.
    """
    table = {}
    for rule_id in RULES:
        for event_type in EVENT_TYPES:
            table[(rule_id, event_type)] = validate_rule_slow(rule_id, event_type)
    return table


VALIDATION_TABLE = None  # Lazy initialization


def process_events_precomputed(events):
    """Process events using precomputed lookup table."""
    global VALIDATION_TABLE
    if VALIDATION_TABLE is None:
        VALIDATION_TABLE = build_validation_table()

    valid_count = 0
    for rule_id, event_type, data in events:
        if VALIDATION_TABLE[(rule_id, event_type)]:
            valid_count += 1
    return valid_count


def generate_test_events(n):
    """Generate n random test events."""
    import random
    random.seed(42)  # Reproducible
    events = []
    for i in range(n):
        rule_id = random.choice(RULES)
        event_type = random.choice(EVENT_TYPES)
        data = f"event_{i}"
        events.append((rule_id, event_type, data))
    return events


def benchmark(name, func, events):
    """Run a function and report timing."""
    start = time.perf_counter()
    result = func(events)
    elapsed = time.perf_counter() - start
    print(f"{name:20s}: {elapsed:.3f}s (valid: {result})")
    return elapsed


def main():
    n_events = 5000
    if len(sys.argv) > 1:
        n_events = int(sys.argv[1])

    print(f"Processing {n_events} events...")
    print(f"Unique (rule, event_type) combinations: {len(RULES) * len(EVENT_TYPES)}")
    print()

    events = generate_test_events(n_events)

    # Reset cached function for fair comparison
    validate_rule_cached.cache_clear()
    global VALIDATION_TABLE
    VALIDATION_TABLE = None

    t_naive = benchmark("Naive", process_events_naive, events)

    validate_rule_cached.cache_clear()
    t_memo = benchmark("Memoized", process_events_memoized, events)

    VALIDATION_TABLE = None
    t_pre = benchmark("Precomputed", process_events_precomputed, events)

    print()
    print(f"Speedup (memo vs naive):    {t_naive/t_memo:.1f}x")
    print(f"Speedup (precomp vs naive): {t_naive/t_pre:.1f}x")
    print(f"Speedup (precomp vs memo):  {t_memo/t_pre:.1f}x")


if __name__ == "__main__":
    main()