We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2205c07 commit f3da192Copy full SHA for f3da192
vllm/model_executor/layers/quantization/fp8.py
@@ -252,7 +252,7 @@ def create_weights(
252
def add_padding_to_weight(self, weight: torch.Tensor) -> torch.Tensor:
253
# Pad the weight tensor. This is an optimization on ROCm platform, which
254
# can benefit from tensors located far enough from one another in memory
255
- if (current_platform.is_rocm() and envs.VLLM_ROCM_FP8_PADDING
+ if (envs.VLLM_ROCM_FP8_PADDING and current_platform.is_rocm()
256
and weight.stride(-1) == 1
257
and (weight.stride(-2) * weight.element_size()) % 512 == 0):
258
num_pad = 256 // weight.element_size()
0 commit comments