@@ -49,7 +49,8 @@ func TestPrefixPluginCompletion(t *testing.T) {
4949
5050 pod1 := & types.PodMetrics {Pod : & backend.Pod {NamespacedName : k8stypes.NamespacedName {Name : "pod1" }}, MetricsState : backendmetrics .NewMetricsState ()}
5151 pod2 := & types.PodMetrics {Pod : & backend.Pod {NamespacedName : k8stypes.NamespacedName {Name : "pod2" }}, MetricsState : backendmetrics .NewMetricsState ()}
52- pods := []types.Pod {pod1 , pod2 }
52+ pod3 := & types.PodMetrics {Pod : & backend.Pod {NamespacedName : k8stypes.NamespacedName {Name : "pod3" }}, MetricsState : backendmetrics .NewMetricsState ()}
53+ pods := []types.Pod {pod1 , pod2 , pod3 }
5354
5455 // First request.
5556 req1 := & types.LLMRequest {
@@ -72,11 +73,12 @@ func TestPrefixPluginCompletion(t *testing.T) {
7273 assert .Equal (t , float64 (0 ), scores [pod1 ], "score for pod1" )
7374 assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
7475
75- // Simulate pod1 was picked.
76+ // Simulate pod1 was picked and pod3 was picked as a prefill node .
7677 schedulingResult := & types.SchedulingResult {
7778 PrimaryProfileName : "default" ,
7879 ProfileResults : map [string ]* types.ProfileRunResult {
79- "default" : {TargetPods : []types.Pod {pod1 }},
80+ "default" : {TargetPods : []types.Pod {pod1 }},
81+ Experimental_DefaultPrefillProfile : {TargetPods : []types.Pod {pod3 }},
8082 },
8183 }
8284 plugin .PreRequest (context .Background (), req1 , schedulingResult )
@@ -131,8 +133,9 @@ func TestPrefixPluginCompletion(t *testing.T) {
131133 // Input size is 8, hash block size is 4, so 2 hashes will be calculated.
132134 // Total hashes = 2 (the first one is for the prefix with model)
133135 assert .Equal (t , 2 , len (state .PrefixHashes ), "number of hashes is incorrect" )
134- assert .Equal (t , 1 , len (state .PrefixCacheServers ), "pod1 should have cached the aaaa prefix" )
136+ assert .Equal (t , 2 , len (state .PrefixCacheServers ), "pod1 and pod3 should have cached the aaaa prefix" )
135137 assert .Equal (t , 0.5 , scores [pod1 ], "score should be 0.5 - the model and the first prefix block match" )
138+ assert .Equal (t , 0.5 , scores [pod3 ], "score should be 0.5 - the model and the first prefix block match on the prefill node" )
136139 assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
137140
138141 schedulingResult = & types.SchedulingResult {
@@ -191,7 +194,7 @@ func TestPrefixPluginCompletion(t *testing.T) {
191194 // Input size is 12, hash block size is 4, so 3 hashes will be calculated.
192195 // Total hashes = 3 (the first one is for the prefix with model)
193196 assert .Equal (t , 3 , len (state .PrefixHashes ), "number of hashes is incorrect" )
194- assert .Equal (t , 1 , len (state .PrefixCacheServers ), "pod1 should have cached the aaaa prefix" )
197+ assert .Equal (t , 2 , len (state .PrefixCacheServers ), "pod1 and pod3 should have cached the aaaa prefix" )
195198 assert .Equal (t , 2. / 3 , scores [pod1 ], "score should be 2./3 - the model and the first 2 prefix blocks match" )
196199 assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
197200
0 commit comments