Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
93ab6b9
Add Llama4 support
CatherineSue Apr 3, 2025
73c5d6d
complete pipeline
ch-wan Apr 6, 2025
ca9870e
fix
ch-wan Apr 6, 2025
fdb0dd6
add locall_attn
CatherineSue Apr 6, 2025
2cd80c2
load weight
ch-wan Apr 6, 2025
5a56108
Merge branch 'main-upstream' into llama4
fzyzcjy Apr 6, 2025
ac4cca3
rm mllama4
fzyzcjy Apr 6, 2025
6cfb3a7
load experts
ch-wan Apr 6, 2025
9fd5188
load weight
ch-wan Apr 6, 2025
6afdfdf
Revert "rm mllama4"
fzyzcjy Apr 6, 2025
a8d4bff
Merge commit '9fd5188965867d0335d8dde357ec81b1a6880982' into pr/Cathe…
ch-wan Apr 6, 2025
b0703ec
polish code
ch-wan Apr 6, 2025
6b21ef5
cleanup
ispobock Apr 6, 2025
114a366
format
ispobock Apr 6, 2025
1378fe0
fix norm
ch-wan Apr 6, 2025
1f18b0c
add conversation template
ispobock Apr 6, 2025
5c434d7
apply_router_weight_on_input
ch-wan Apr 6, 2025
3dc59e1
add chat template
ispobock Apr 6, 2025
9266d96
format
ispobock Apr 6, 2025
a204c21
fix load
ispobock Apr 6, 2025
cedb65c
Merge branch 'main' into llama4
zhyncs Apr 6, 2025
cc7e862
support k > 1
ch-wan Apr 6, 2025
d8c4432
lint
ispobock Apr 6, 2025
f5d4cf7
fix
ch-wan Apr 6, 2025
95de87d
fix mlp
fzyzcjy Apr 6, 2025
7d45b7d
fix local_attn support
CatherineSue Apr 6, 2025
2e123a3
minor
ch-wan Apr 7, 2025
be1a383
fix
ch-wan Apr 7, 2025
f06da8b
format
ch-wan Apr 7, 2025
12ca1f9
cherry pick tuning outputs
fzyzcjy Apr 7, 2025
e724acd
cherry pick torch compile
fzyzcjy Apr 7, 2025
5861462
fix template
ispobock Apr 7, 2025
2429342
lint
ispobock Apr 7, 2025
a41fcad
clean up documentation and imports
CatherineSue Apr 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/references/supported_models.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Supported Models

## Generative Models
- Llama / Llama 2 / Llama 3 / Llama 3.1 / Llama 3.2 / Llama 3.3
- Llama / Llama 2 / Llama 3 / Llama 3.1 / Llama 3.2 / Llama 3.3 / Llama 4
- Mistral / Mixtral / Mistral NeMo / Mistral Small 3
- Gemma / Gemma 2 / Gemma3
- Qwen / Qwen 2 / Qwen 2 MoE / Qwen 2 VL / Qwen 2.5 VL / Olympic Coder
Expand Down
24 changes: 24 additions & 0 deletions python/sglang/lang/chat_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,30 @@ def get_chat_template_by_model_path(model_path):
)
)

# Reference: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/blob/main/chat_template.json
register_chat_template(
ChatTemplate(
name="llama-4",
default_system_prompt=None,
role_prefix_and_suffix={
"system": (
"<|header_start|>system<|header_end|>\n\n",
"<|eot|>",
),
"user": (
"<|header_start|>user<|header_end|>\n\n",
"<|eot|>",
),
"assistant": (
"<|header_start|>assistant<|header_end|>\n\n",
"<|eot|>",
),
},
stop_str=("<|eot|>",),
image_token="<|image|>",
)
)

# Reference: https://modelscope.cn/models/01ai/Yi-1.5-34B-Chat/file/view/master?fileName=tokenizer_config.json&status=1
register_chat_template(
ChatTemplate(
Expand Down
4 changes: 4 additions & 0 deletions python/sglang/srt/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def __init__(
**kwargs,
)
self.hf_text_config = get_hf_text_config(self.hf_config)
self.attention_chunk_size = getattr(
self.hf_text_config, "attention_chunk_size", None
)

# Check model type
self.is_generation = is_generation_model(
Expand Down Expand Up @@ -467,6 +470,7 @@ def is_generation_model(model_architectures: List[str], is_embedding: bool = Fal
"Gemma3ForConditionalGeneration",
"Grok1VForCausalLM",
"Grok1AForCausalLM",
# TODO: add multimodal support for "Llama4ForConditionalGeneration",
"LlavaLlamaForCausalLM",
"LlavaMistralForCausalLM",
"LlavaQwenForCausalLM",
Expand Down
28 changes: 27 additions & 1 deletion python/sglang/srt/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class SeparatorStyle(IntEnum):
ADD_NEW_LINE_SINGLE = auto()
LLAMA2 = auto()
LLAMA3 = auto()
LLAMA4 = auto()
CHATGLM = auto()
CHATML = auto()
CHATINTERN = auto()
Expand Down Expand Up @@ -156,6 +157,19 @@ def get_prompt(self) -> str:
else:
ret += role + ":"
return ret
elif self.sep_style == SeparatorStyle.LLAMA4:
ret = "<|begin_of_text|>"
if self.system_message:
ret += system_prompt
else:
ret += ""
for i, (role, message) in enumerate(self.messages):
if message:
ret += f"<|header_start|>{role}<|header_end|>\n\n"
ret += f"{message.strip()}<|eot|>"
else:
ret += f"<|header_start|>{role}<|header_end|>\n\n"
return ret
elif self.sep_style == SeparatorStyle.LLAMA3:
ret = "<|begin_of_text|>"
if self.system_message:
Expand All @@ -168,7 +182,6 @@ def get_prompt(self) -> str:
ret += f"{message.strip()}<|eot_id|>"
else:
ret += f"<|start_header_id|>{role}<|end_header_id|>\n\n"
# print(ret)
return ret
elif self.sep_style == SeparatorStyle.LLAMA2:
seps = [self.sep, self.sep2]
Expand Down Expand Up @@ -561,6 +574,19 @@ def generate_chat_conv(
)
)

# reference: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/blob/main/chat_template.json
register_conv_template(
Conversation(
name="llama-4",
system_template="<|header_start|>system<|header_end|>\n\n{system_message}<|eot|>",
roles=("user", "assistant"),
sep_style=SeparatorStyle.LLAMA4,
sep="",
stop_str=["<|end_of_text|>", "<|eot|>", "<|eom|>"],
image_token="<|image|>",
)
)

register_conv_template(
Conversation(
name="chatml",
Expand Down
Loading
Loading