Skip to content

Commit 6898863

Browse files
add fleet fallback
1 parent 78b5462 commit 6898863

7 files changed

Lines changed: 599 additions & 6 deletions

File tree

fastdeploy/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
_ResolvedTask = Literal["generate", "encode", "embed"]
6161

6262
# Model implementation backend options
63-
ModelImpl = Literal["auto", "fastdeploy", "paddleformers"]
63+
ModelImpl = Literal["auto", "fastdeploy", "paddleformers", "paddlefleet"]
6464

6565
_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
6666
"generate": [],

fastdeploy/engine/args_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ class EngineArgs:
139139
'auto': Use native FastDeploy implementation when available, fallback to PaddleFormers.
140140
'fastdeploy': Use only native FastDeploy implementations.
141141
'paddleformers': Use PaddleFormers backend with FastDeploy optimizations.
142+
'paddlefleet': Use PaddleFleet backend.
142143
"""
143144
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
144145
"""
@@ -636,7 +637,7 @@ def __post_init__(self):
636637
"kvcache_storage_backend is only supported when ENABLE_V1_KVCACHE_SCHEDULER=1"
637638
)
638639

639-
valid_model_impls = ["auto", "fastdeploy", "paddleformers"]
640+
valid_model_impls = ["auto", "fastdeploy", "paddleformers", "paddlefleet"]
640641
if self.model_impl not in valid_model_impls:
641642
raise NotImplementedError(
642643
f"not support model_impl: '{self.model_impl}'. " f"Must be one of: {', '.join(valid_model_impls)}"
@@ -974,13 +975,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
974975
model_group.add_argument(
975976
"--model-impl",
976977
type=str,
977-
choices=["auto", "fastdeploy", "paddleformers"],
978+
choices=["auto", "fastdeploy", "paddleformers", "paddlefleet"],
978979
default=EngineArgs.model_impl,
979980
help=(
980981
"Model implementation backend. "
981982
"'auto': Use native FastDeploy when available, fallback to PaddleFormers. "
982983
"'fastdeploy': Use only native FastDeploy implementations. "
983984
"'paddleformers': Use PaddleFormers backend with FastDeploy optimizations."
985+
"'paddlefleet': Use PaddleFleet backend."
984986
),
985987
)
986988

fastdeploy/model_executor/models/model_base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ def _try_resolve_paddleformers(
194194
elif model_impl == "auto" and is_fallback:
195195
# Auto mode fallback when no native implementation exists
196196
backend_arch = "PaddleFormersForCausalLM"
197+
elif model_impl == "paddlefleet":
198+
backend_arch = "PaddleFleetForCausalLM"
197199
elif model_impl == "fastdeploy":
198200
return None
199201
else:

fastdeploy/model_executor/models/paddleformers/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
)
2222

2323
from .base import PaddleFormersModelBase
24+
from .base_fleet import PaddleFleetModelBase
2425
from .causallm import CausalLMMixin
2526

2627
__all__ = [
2728
"PaddleFormersForCausalLM",
29+
"PaddleFleetForCausalLM",
2830
]
2931

3032

@@ -38,3 +40,14 @@ class PaddleFormersForCausalLM(CausalLMMixin, PaddleFormersModelBase, ModelForCa
3840
@classmethod
3941
def name(cls):
4042
return "PaddleFormersForCausalLM"
43+
44+
45+
@ModelRegistry.register_model_class(
46+
architecture="PaddleFleetForCausalLM",
47+
module_name="paddleformers",
48+
category=ModelCategory.TEXT_GENERATION,
49+
)
50+
class PaddleFleetForCausalLM(PaddleFleetModelBase, ModelForCasualLM):
51+
@classmethod
52+
def name(cls):
53+
return "PaddleFleetForCausalLM"

0 commit comments

Comments
 (0)