@@ -27,7 +27,7 @@ def server():
2727 "bfloat16" ,
2828 "--enforce-eager" ,
2929 "--max-model-len" ,
30- "8192 " ,
30+ "512 " ,
3131 "--chat-template" ,
3232 DUMMY_CHAT_TEMPLATE ,
3333 ]
@@ -60,10 +60,10 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
6060
6161 assert embeddings .id is not None
6262 assert len (embeddings .data ) == 1
63- assert len (embeddings .data [0 ].embedding ) == 4096
63+ assert len (embeddings .data [0 ].embedding ) == 384
6464 assert embeddings .usage .completion_tokens == 0
65- assert embeddings .usage .prompt_tokens == 9
66- assert embeddings .usage .total_tokens == 9
65+ assert embeddings .usage .prompt_tokens == 11
66+ assert embeddings .usage .total_tokens == 11
6767
6868 # test using token IDs
6969 input_tokens = [1 , 1 , 1 , 1 , 1 ]
@@ -77,7 +77,7 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
7777
7878 assert embeddings .id is not None
7979 assert len (embeddings .data ) == 1
80- assert len (embeddings .data [0 ].embedding ) == 4096
80+ assert len (embeddings .data [0 ].embedding ) == 384
8181 assert embeddings .usage .completion_tokens == 0
8282 assert embeddings .usage .prompt_tokens == 5
8383 assert embeddings .usage .total_tokens == 5
@@ -101,10 +101,10 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
101101
102102 assert embeddings .id is not None
103103 assert len (embeddings .data ) == 3
104- assert len (embeddings .data [0 ].embedding ) == 4096
104+ assert len (embeddings .data [0 ].embedding ) == 384
105105 assert embeddings .usage .completion_tokens == 0
106- assert embeddings .usage .prompt_tokens == 32
107- assert embeddings .usage .total_tokens == 32
106+ assert embeddings .usage .prompt_tokens == 33
107+ assert embeddings .usage .total_tokens == 33
108108
109109 # test List[List[int]]
110110 input_tokens = [[4 , 5 , 7 , 9 , 20 ], [15 , 29 , 499 ], [24 , 24 , 24 , 24 , 24 ],
@@ -119,7 +119,7 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
119119
120120 assert embeddings .id is not None
121121 assert len (embeddings .data ) == 4
122- assert len (embeddings .data [0 ].embedding ) == 4096
122+ assert len (embeddings .data [0 ].embedding ) == 384
123123 assert embeddings .usage .completion_tokens == 0
124124 assert embeddings .usage .prompt_tokens == 17
125125 assert embeddings .usage .total_tokens == 17
@@ -234,7 +234,7 @@ async def test_single_embedding_truncation(client: openai.AsyncOpenAI,
234234
235235 assert embeddings .id is not None
236236 assert len (embeddings .data ) == 1
237- assert len (embeddings .data [0 ].embedding ) == 4096
237+ assert len (embeddings .data [0 ].embedding ) == 384
238238 assert embeddings .usage .completion_tokens == 0
239239 assert embeddings .usage .prompt_tokens == 10
240240 assert embeddings .usage .total_tokens == 10
@@ -252,7 +252,7 @@ async def test_single_embedding_truncation(client: openai.AsyncOpenAI,
252252
253253 assert embeddings .id is not None
254254 assert len (embeddings .data ) == 1
255- assert len (embeddings .data [0 ].embedding ) == 4096
255+ assert len (embeddings .data [0 ].embedding ) == 384
256256 assert embeddings .usage .completion_tokens == 0
257257 assert embeddings .usage .prompt_tokens == 10
258258 assert embeddings .usage .total_tokens == 10
0 commit comments