We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9872744 commit c4d1c1bCopy full SHA for c4d1c1b
tests/spec_decode/e2e/test_mlp_correctness.py
@@ -24,14 +24,14 @@
24
from .conftest import run_greedy_equality_correctness_test
25
26
# main model
27
-MAIN_MODEL = "ibm-granite/granite-3b-code-instruct"
+MAIN_MODEL = "JackFram/llama-160m"
28
29
# speculative model
30
-SPEC_MODEL = "ibm-granite/granite-3b-code-instruct-accelerator"
+SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
31
32
# max. number of speculative tokens: this corresponds to
33
# n_predict in the config.json of the speculator model.
34
-MAX_SPEC_TOKENS = 5
+MAX_SPEC_TOKENS = 3
35
36
# precision
37
PRECISION = "float32"
0 commit comments