node/test_ml_pipeline.py at master · autonet-code/node · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Test Real ML Training Pipeline

Tests the complete pipeline:
1. Multiple solvers train on MNIST subsets
2. Weight deltas are computed
3. Aggregator combines deltas using FedAvg
4. Resulting model can be tested
"""

import sys
import logging
from nodes.common.ml import (
    train_on_task,
    aggregate_weight_deltas,
    apply_weight_delta,
    SimpleNet,
    save_weights,
    load_weights,
    evaluate_model,
)
from nodes.common.blob_store import BlobStore

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def main():
    print("\n" + "=" * 60)
    print("TESTING REAL ML TRAINING PIPELINE")
    print("=" * 60)

    # Initialize
    store = BlobStore()

    # Step 1: Create initial model
    print("\n[1/5] Creating initial model...")
    model = SimpleNet()
    initial_weights = model.state_dict()
    initial_cid = save_weights(initial_weights, store)
    print(f"    Initial model CID: {initial_cid}")

    # Step 2: Simulate multiple solvers training
    print("\n[2/5] Simulating 3 solvers training on MNIST...")
    num_solvers = 3
    updates = []

    for i in range(num_solvers):
        print(f"\n    Solver {i+1} training...")
        task_spec = {"task_id": i + 1, "solver": f"solver-{i}"}

        weight_delta, metrics = train_on_task(
            task_spec=task_spec,
            global_model_cid=initial_cid,
            store=store,
            epochs=1,
            num_samples=200,  # Small subset for fast training
            batch_size=32,
            learning_rate=0.01,
        )

        update = {
            "weight_delta": weight_delta,
            "metrics": metrics,
            "solver": f"solver-{i}",
        }
        updates.append(update)

        print(f"    Solver {i+1} completed:")
        print(f"      Loss: {metrics['loss']:.4f}")
        print(f"      Accuracy: {metrics['accuracy']:.4f}")
        print(f"      Training time: {metrics['training_time']:.2f}s")

    # Step 3: Aggregate weight deltas
    print("\n[3/5] Aggregating weight deltas using FedAvg...")
    deltas = [u["weight_delta"] for u in updates]
    weights = [u["metrics"]["num_samples"] for u in updates]

    aggregated_delta = aggregate_weight_deltas(deltas, weights)
    print(f"    Aggregated {len(deltas)} weight deltas")
    print(f"    Sample weights: {weights}")

    # Calculate aggregated metrics
    avg_loss = sum(u["metrics"]["loss"] for u in updates) / len(updates)
    avg_accuracy = sum(u["metrics"]["accuracy"] for u in updates) / len(updates)
    total_samples = sum(weights)

    print(f"    Average loss: {avg_loss:.4f}")
    print(f"    Average accuracy: {avg_accuracy:.4f}")
    print(f"    Total samples: {total_samples}")

    # Step 4: Apply aggregated delta to initial model
    print("\n[4/5] Applying aggregated delta to initial model...")
    updated_weights = apply_weight_delta(initial_weights, aggregated_delta)
    updated_cid = save_weights(updated_weights, store)
    print(f"    Updated model CID: {updated_cid}")

    # Step 5: Test updated model
    print("\n[5/5] Testing updated model on test set...")
    try:
        test_metrics = evaluate_model(updated_cid, store, num_samples=500)
        print(f"    Test loss: {test_metrics['test_loss']:.4f}")
        print(f"    Test accuracy: {test_metrics['test_accuracy']:.4f}")
        print(f"    Test samples: {test_metrics['num_samples']}")
    except Exception as e:
        print(f"    Test failed (expected for minimal training): {e}")

    # Summary
    print("\n" + "=" * 60)
    print("PIPELINE TEST COMPLETE")
    print("=" * 60)
    print(f"\nSummary:")
    print(f"  - {num_solvers} solvers trained on {total_samples} total MNIST samples")
    print(f"  - Average training accuracy: {avg_accuracy:.2%}")
    print(f"  - Weight deltas aggregated using FedAvg")
    print(f"  - Global model updated and stored in blob store")
    print(f"\nNOTE: Low accuracy is expected with:")
    print(f"  - Very small training sets (200 samples per solver)")
    print(f"  - Single epoch training")
    print(f"  - Random initial weights")
    print(f"\nFor production:")
    print(f"  - Use larger datasets (1000+ samples)")
    print(f"  - Train for multiple epochs")
    print(f"  - Use multiple aggregation rounds")
    print()


if __name__ == "__main__":
    main()