Skip to content

Commit 89dd977

Browse files
committed
Revert sampler.py and mtp.py to upstream version
These changes (XPU guard removal, num_cpu_blocks condition) were debugging artifacts unrelated to MiniMax-M2.5 SM80 FP8 MoE support.
1 parent bc8c3b0 commit 89dd977

2 files changed

Lines changed: 6 additions & 4 deletions

File tree

fastdeploy/model_executor/layers/sample/sampler.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ def padding_sampling_params(top_p, top_k, infer_seed, seq_lens_this_time, seq_le
7777
top_k_padding = paddle.repeat_interleave(top_k[:real_bsz], repeats).unsqueeze(1)
7878
topp_seed = paddle.repeat_interleave(infer_seed[:real_bsz], repeats).unsqueeze(1)
7979

80-
MAX_INFER_SEED = 9223372036854775806
80+
if current_platform.is_xpu():
81+
MAX_INFER_SEED = 2147483646
82+
else:
83+
MAX_INFER_SEED = 9223372036854775806
8184

8285
token_lens = paddle.where(
8386
seq_lens_encoder[:real_bsz] == 0,
@@ -97,7 +100,7 @@ def padding_sampling_params(top_p, top_k, infer_seed, seq_lens_this_time, seq_le
97100

98101
offsets = paddle.where(
99102
is_decoder,
100-
local_pos * 4,
103+
local_pos * (32 if current_platform.is_xpu() else 4),
101104
paddle.zeros_like(local_pos),
102105
)
103106

fastdeploy/spec_decode/mtp.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,7 @@ def clear_mtp_cache(self, profile=False):
428428
Clear allocated cacheKV
429429
"""
430430
create_cache_tensor = profile or not (
431-
self.fd_config.cache_config.num_cpu_blocks > 0
432-
or self.fd_config.cache_config.kvcache_storage_backend
431+
self.fd_config.cache_config.kvcache_storage_backend
433432
or self.fd_config.scheduler_config.splitwise_role != "mixed"
434433
)
435434
if not create_cache_tensor:

0 commit comments

Comments
 (0)