Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions inference-spec/engines/llama.cpp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ commands:
- name: "--jinja"
description: "Flag indicating if the chat template uses Jinja"
if: "{{ not model.mmproj_path }}"
- name: "--no-jinja"
description: "Disable Jinja chat template engine for multimodal models"
if: "{{ model.mmproj_path }}"
- name: "--no-warmup"
description: "Flag to disable empty run for warm up"
- name: "--reasoning-budget"
Expand Down
3 changes: 3 additions & 0 deletions test/unit/command/data/engines/llama.cpp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ commands:
- name: "--jinja"
description: "Flag indicating if the chat template uses Jinja"
if: "{{ not model.mmproj_path }}"
- name: "--no-jinja"
description: "Disable Jinja chat template engine for multimodal models"
if: "{{ model.mmproj_path }}"
- name: "--no-warmup"
description: "Flag to disable empty run for warm up"
- name: "--reasoning-budget"
Expand Down
2 changes: 1 addition & 1 deletion test/unit/command/test_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class FactoryInput:
),
(
FactoryInput(has_mmproj=True),
"llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --mmproj /path/to/mmproj --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args", # noqa: E501
"llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --mmproj /path/to/mmproj --no-jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args", # noqa: E501
),
(
FactoryInput(has_chat_template=False),
Expand Down
Loading