From 54c792e42b39fd74b3ae2b3e236b2564eae63c62 Mon Sep 17 00:00:00 2001 From: Aleksandr Suslov Date: Fri, 31 Oct 2025 09:27:33 -0500 Subject: [PATCH] Add --max-decode-step and --add-start-token in paged_llm_v1.py example in the cookbook --- docs/model_cookbook.md | 2 ++ sharktank/sharktank/examples/paged_llm_v1.py | 2 +- sharktank/sharktank/utils/cli.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/model_cookbook.md b/docs/model_cookbook.md index e2eb245fd11..89d342be1c9 100644 --- a/docs/model_cookbook.md +++ b/docs/model_cookbook.md @@ -122,6 +122,8 @@ python -m sharktank.examples.paged_llm_v1 \ --gguf-file=/tmp/mistral-7b-v0.1-f32.gguf \ --tokenizer-config-json=/tmp/mistral-7b/tokenizer_config.json \ --prompt "Write a story about llamas" \ + --max-decode-steps=128 \ + --add-start-token \ --device='cuda:0' # Export as MLIR diff --git a/sharktank/sharktank/examples/paged_llm_v1.py b/sharktank/sharktank/examples/paged_llm_v1.py index c26933a93f5..8299f5a3bcd 100644 --- a/sharktank/sharktank/examples/paged_llm_v1.py +++ b/sharktank/sharktank/examples/paged_llm_v1.py @@ -125,7 +125,7 @@ def main(cli_args: list[str] | None = None): device=model._model.device, ).tolist() else: - token_ids = tokenizer.encode(texts=args.prompt, add_start_token=False)[0] + token_ids = tokenizer.encode(texts=args.prompt, add_start_token=args.add_start_token)[0] results = decoder.greedy_decode(token_ids, args.max_decode_steps) print(f":: Result tokens: {results}") diff --git a/sharktank/sharktank/utils/cli.py b/sharktank/sharktank/utils/cli.py index d97761e42c9..bd2c67bdccd 100644 --- a/sharktank/sharktank/utils/cli.py +++ b/sharktank/sharktank/utils/cli.py @@ -337,6 +337,11 @@ def add_tokenizer_options(parser: argparse.ArgumentParser): help="Direct path to a tokenizer_config.json file", type=Path, ) + parser.add_argument( + "--add-start-token", + help="Adds start token to the prompt", + action="store_true", + ) def add_log_options(parser: argparse.ArgumentParser):