File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1928,6 +1928,12 @@ def check_server_args(self):
19281928 if "Llama4" in model_arch :
19291929 assert self .attention_backend == "fa3" , "fa3 is required for Llama4 model"
19301930
1931+ if "Gemma2ForCausalLM" in model_arch :
1932+ # FIXME: https://github.com/sgl-project/sglang/pull/7367 is not compatible with gemma2 model.
1933+ # It failed at this test: https://github.com/sgl-project/sglang/actions/runs/16255155597/job/45890331952#step:4:736
1934+ logger .warning ("Disable hybrid SWA memory for Gemma2ForCausalLM." )
1935+ self .disable_hybrid_swa_memory = True
1936+
19311937 # Check LoRA
19321938 self .check_lora_server_args ()
19331939
Original file line number Diff line number Diff line change 2727
2828from sglang .bench_serving import run_benchmark
2929from sglang .global_config import global_config
30- from sglang .lang .backend .openai import OpenAI
31- from sglang .lang .backend .runtime_endpoint import RuntimeEndpoint
32- from sglang .lang .interpreter import ProgramState
3330from sglang .srt .utils import (
3431 get_bool_env_var ,
3532 get_device ,
@@ -358,6 +355,9 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
358355
359356
360357def select_sglang_backend (args : argparse .Namespace ):
358+ from sglang .lang .backend .openai import OpenAI
359+ from sglang .lang .backend .runtime_endpoint import RuntimeEndpoint
360+
361361 if args .backend .startswith ("srt" ):
362362 if args .backend == "srt-no-parallel" :
363363 global_config .enable_parallel_encoding = False
You can’t perform that action at this time.
0 commit comments