init

2026-01-08 18:11:30 +05:30
commit 4fb1bd90db
32 changed files with 3058 additions and 0 deletions
--- a/scenario2-memoization/README.md
+++ b/scenario2-memoization/README.md
@@ -0,0 +1,96 @@
+# Scenario 2: Memoization and Precomputation
+
+## Learning Objectives
+- Read cProfile output to identify redundant function calls
+- Use `@functools.lru_cache` for automatic memoization
+- Recognize when precomputation beats memoization
+- Understand space-time trade-offs
+
+## Files
+- `fib_slow.py` - Naive recursive Fibonacci (exponential time)
+- `fib_cached.py` - Memoized Fibonacci (linear time)
+- `config_validator.py` - Comparison of naive, memoized, and precomputed approaches
+
+## Exercise 1: Fibonacci
+
+### Step 1: Experience the slowness
+```bash
+time python3 fib_slow.py 35
+```
+
+This should take several seconds. Don't try n=50!
+
+### Step 2: Profile to understand why
+```bash
+python3 -m cProfile -s ncalls fib_slow.py 35 2>&1 | head -20
+```
+
+Key insight: Look at `ncalls` for the `fib` function. For fib(35), it's called
+millions of times because we recompute the same values repeatedly.
+
+The call tree looks like:
+```
+fib(5)
+├── fib(4)
+│   ├── fib(3)
+│   │   ├── fib(2)
+│   │   └── fib(1)
+│   └── fib(2)
+└── fib(3)        <-- Same as above! Redundant!
+    ├── fib(2)
+    └── fib(1)
+```
+
+### Step 3: Apply memoization
+```bash
+time python3 fib_cached.py 35
+```
+
+Now try a much larger value:
+```bash
+time python3 fib_cached.py 100
+```
+
+### Step 4: Verify the improvement
+```bash
+python3 -m cProfile -s ncalls fib_cached.py 35 2>&1 | head -20
+```
+
+The `ncalls` should now be O(n) instead of O(2^n).
+
+## Exercise 2: Config Validator
+
+This example shows when precomputation is better than memoization.
+
+### Run all three strategies
+```bash
+python3 config_validator.py 5000
+```
+
+### Profile to understand the differences
+```bash
+python3 -m cProfile -s cumtime config_validator.py 5000
+```
+
+### Discussion Questions
+1. Why is precomputation faster than memoization here?
+   - Hint: How many unique inputs are there?
+   - Hint: What's the overhead of cache lookup vs dict lookup?
+
+2. When would memoization be better than precomputation?
+   - Hint: What if there were 10,000 rules and 10,000 event types?
+   - Hint: What if we didn't know the inputs ahead of time?
+
+3. What's the memory trade-off?
+
+## Key Takeaways
+
+| Approach | When to Use |
+|----------|-------------|
+| No caching | Function is cheap OR called once per input |
+| Memoization | Unknown/large input space, function is expensive |
+| Precomputation | Known/small input space, amortize cost over many lookups |
+
+## Further Reading
+- `functools.lru_cache` documentation
+- `functools.cache` (Python 3.9+) - unbounded cache, simpler API
--- a/scenario2-memoization/config_validator.py
+++ b/scenario2-memoization/config_validator.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+"""
+Scenario 2b: The Precomputation Insight
+=======================================
+This simulates a config validator that checks rules against events.
+The "expensive" validation function is called repeatedly with the same inputs.
+
+This example shows three stages of optimization:
+1. Naive: call the function every time
+2. Memoized: cache results with @lru_cache
+3. Precomputed: since inputs are known ahead of time, build a lookup table
+
+EXERCISES:
+1. Run each version and compare times
+2. Profile each version - observe ncalls and cumtime
+3. Think about: when is precomputation better than memoization?
+"""
+
+import sys
+import time
+from functools import lru_cache
+
+
+# Simulated "expensive" validation function
+def validate_rule_slow(rule_id, event_type):
+    """
+    Simulate an expensive validation check.
+    In real life, this might query a database, parse XML, etc.
+    """
+    # Artificial delay to simulate expensive computation
+    total = 0
+    for i in range(10000):
+        total += (rule_id * event_type * i) % 997
+    return total % 2 == 0  # Returns True or False
+
+
+# The set of all valid (rule_id, event_type) pairs we'll encounter
+RULES = [1, 2, 3, 4, 5]
+EVENT_TYPES = [10, 20, 30, 40, 50]
+
+
+def process_events_naive(events):
+    """Process events using naive repeated validation."""
+    valid_count = 0
+    for rule_id, event_type, data in events:
+        if validate_rule_slow(rule_id, event_type):
+            valid_count += 1
+    return valid_count
+
+
+# Memoized version
+@lru_cache(maxsize=None)
+def validate_rule_cached(rule_id, event_type):
+    """Same validation but with caching."""
+    total = 0
+    for i in range(10000):
+        total += (rule_id * event_type * i) % 997
+    return total % 2 == 0
+
+
+def process_events_memoized(events):
+    """Process events using memoized validation."""
+    valid_count = 0
+    for rule_id, event_type, data in events:
+        if validate_rule_cached(rule_id, event_type):
+            valid_count += 1
+    return valid_count
+
+
+# Precomputed version
+def build_validation_table():
+    """
+    Build a lookup table for all possible (rule_id, event_type) combinations.
+    This is O(n*m) upfront but O(1) per lookup thereafter.
+    """
+    table = {}
+    for rule_id in RULES:
+        for event_type in EVENT_TYPES:
+            table[(rule_id, event_type)] = validate_rule_slow(rule_id, event_type)
+    return table
+
+
+VALIDATION_TABLE = None  # Lazy initialization
+
+
+def process_events_precomputed(events):
+    """Process events using precomputed lookup table."""
+    global VALIDATION_TABLE
+    if VALIDATION_TABLE is None:
+        VALIDATION_TABLE = build_validation_table()
+    
+    valid_count = 0
+    for rule_id, event_type, data in events:
+        if VALIDATION_TABLE[(rule_id, event_type)]:
+            valid_count += 1
+    return valid_count
+
+
+def generate_test_events(n):
+    """Generate n random test events."""
+    import random
+    random.seed(42)  # Reproducible
+    events = []
+    for i in range(n):
+        rule_id = random.choice(RULES)
+        event_type = random.choice(EVENT_TYPES)
+        data = f"event_{i}"
+        events.append((rule_id, event_type, data))
+    return events
+
+
+def benchmark(name, func, events):
+    """Run a function and report timing."""
+    start = time.perf_counter()
+    result = func(events)
+    elapsed = time.perf_counter() - start
+    print(f"{name:20s}: {elapsed:.3f}s (valid: {result})")
+    return elapsed
+
+
+def main():
+    n_events = 5000
+    if len(sys.argv) > 1:
+        n_events = int(sys.argv[1])
+    
+    print(f"Processing {n_events} events...")
+    print(f"Unique (rule, event_type) combinations: {len(RULES) * len(EVENT_TYPES)}")
+    print()
+    
+    events = generate_test_events(n_events)
+    
+    # Reset cached function for fair comparison
+    validate_rule_cached.cache_clear()
+    global VALIDATION_TABLE
+    VALIDATION_TABLE = None
+    
+    t_naive = benchmark("Naive", process_events_naive, events)
+    
+    validate_rule_cached.cache_clear()
+    t_memo = benchmark("Memoized", process_events_memoized, events)
+    
+    VALIDATION_TABLE = None
+    t_pre = benchmark("Precomputed", process_events_precomputed, events)
+    
+    print()
+    print(f"Speedup (memo vs naive):    {t_naive/t_memo:.1f}x")
+    print(f"Speedup (precomp vs naive): {t_naive/t_pre:.1f}x")
+    print(f"Speedup (precomp vs memo):  {t_memo/t_pre:.1f}x")
+
+
+if __name__ == "__main__":
+    main()
--- a/scenario2-memoization/fib_cached.py
+++ b/scenario2-memoization/fib_cached.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+"""
+Scenario 2: Memoization with functools.lru_cache
+================================================
+Adding @lru_cache transforms O(2^n) into O(n) by caching results.
+
+EXERCISES:
+1. Run: time python3 fib_cached.py 35
+2. Compare to fib_slow.py - how much faster?
+3. Profile: python3 -m cProfile -s ncalls fib_cached.py 35
+4. Notice the dramatic reduction in call count
+5. Try a much larger number: python3 fib_cached.py 100
+"""
+
+import sys
+from functools import lru_cache
+
+
+@lru_cache(maxsize=None)  # Unlimited cache size
+def fib(n):
+    """Compute the nth Fibonacci number with memoization."""
+    if n <= 1:
+        return n
+    return fib(n - 1) + fib(n - 2)
+
+
+def main():
+    n = 35
+    if len(sys.argv) > 1:
+        n = int(sys.argv[1])
+    
+    print(f"Computing fib({n}) with memoization...")
+    result = fib(n)
+    print(f"fib({n}) = {result}")
+    
+    # Show cache statistics
+    print(f"\nCache info: {fib.cache_info()}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scenario2-memoization/fib_slow.py
+++ b/scenario2-memoization/fib_slow.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+"""
+Scenario 2: Hidden Redundancy - The Memoization Problem
+========================================================
+This program computes Fibonacci numbers recursively.
+The naive implementation has exponential time complexity due to redundant calls.
+
+EXERCISES:
+1. Run: time python3 fib_slow.py 35
+2. Profile: python3 -m cProfile -s ncalls fib_slow.py 35
+3. Notice the HUGE number of calls to fib()
+4. See fib_cached.py for the memoized version
+"""
+
+import sys
+
+
+def fib(n):
+    """Compute the nth Fibonacci number recursively."""
+    if n <= 1:
+        return n
+    return fib(n - 1) + fib(n - 2)
+
+
+def main():
+    n = 35  # Don't go much higher without optimization!
+    if len(sys.argv) > 1:
+        n = int(sys.argv[1])
+    
+    print(f"Computing fib({n})...")
+    result = fib(n)
+    print(f"fib({n}) = {result}")
+
+
+if __name__ == "__main__":
+    main()