From 54c792e42b39fd74b3ae2b3e236b2564eae63c62 Mon Sep 17 00:00:00 2001
From: Aleksandr Suslov <alexsuslovnn@gmail.com>
Date: Fri, 31 Oct 2025 09:27:33 -0500
Subject: [PATCH] Add --max-decode-step and --add-start-token in
 paged_llm_v1.py example in the cookbook

---
 docs/model_cookbook.md                       | 2 ++
 sharktank/sharktank/examples/paged_llm_v1.py | 2 +-
 sharktank/sharktank/utils/cli.py             | 5 +++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/model_cookbook.md b/docs/model_cookbook.md
index e2eb245fd11..89d342be1c9 100644
--- a/docs/model_cookbook.md
+++ b/docs/model_cookbook.md
@@ -122,6 +122,8 @@ python -m sharktank.examples.paged_llm_v1 \
   --gguf-file=/tmp/mistral-7b-v0.1-f32.gguf \
   --tokenizer-config-json=/tmp/mistral-7b/tokenizer_config.json \
   --prompt "Write a story about llamas" \
+  --max-decode-steps=128 \
+  --add-start-token \
   --device='cuda:0'
 
 # Export as MLIR
diff --git a/sharktank/sharktank/examples/paged_llm_v1.py b/sharktank/sharktank/examples/paged_llm_v1.py
index c26933a93f5..8299f5a3bcd 100644
--- a/sharktank/sharktank/examples/paged_llm_v1.py
+++ b/sharktank/sharktank/examples/paged_llm_v1.py
@@ -125,7 +125,7 @@ def main(cli_args: list[str] | None = None):
             device=model._model.device,
         ).tolist()
     else:
-        token_ids = tokenizer.encode(texts=args.prompt, add_start_token=False)[0]
+        token_ids = tokenizer.encode(texts=args.prompt, add_start_token=args.add_start_token)[0]
 
     results = decoder.greedy_decode(token_ids, args.max_decode_steps)
     print(f":: Result tokens: {results}")
diff --git a/sharktank/sharktank/utils/cli.py b/sharktank/sharktank/utils/cli.py
index d97761e42c9..bd2c67bdccd 100644
--- a/sharktank/sharktank/utils/cli.py
+++ b/sharktank/sharktank/utils/cli.py
@@ -337,6 +337,11 @@ def add_tokenizer_options(parser: argparse.ArgumentParser):
         help="Direct path to a tokenizer_config.json file",
         type=Path,
     )
+    parser.add_argument(
+        "--add-start-token",
+        help="Adds start token to the prompt",
+        action="store_true",
+    )
 
 
 def add_log_options(parser: argparse.ArgumentParser):