Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/sglang/srt/layers/quantization/modelopt_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w13_blockscale_swizzled = Parameter(
w13_blockscale_swizzled, requires_grad=False
)
del layer.w13_weight_scale

# This is for quantization, so we need to invert it.
layer.w13_input_scale_quant = Parameter(
Expand Down Expand Up @@ -920,6 +921,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
layer.w2_blockscale_swizzled = Parameter(
w2_blockscale_swizzled, requires_grad=False
)
del layer.w2_weight_scale
layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)

device = layer.w13_weight.device
Expand Down
Loading