Commit 2dbf6c6
Add fp4 quantize before all-gather for Flashinfer cutlass MoE DP (max throughput) (sgl-project#7667)
1 parent 4b8c4d9 commit 2dbf6c6
File tree
16 files changed
+360
-52
lines changed- python/sglang/srt
- distributed
- device_communicators
- layers
- moe
- fused_moe_triton
- quantization
- managers
- model_executor
- models
16 files changed
+360
-52
lines changedLines changed: 68 additions & 18 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
148 | 148 | | |
149 | 149 | | |
150 | 150 | | |
151 | | - | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
152 | 156 | | |
153 | 157 | | |
154 | 158 | | |
| |||
161 | 165 | | |
162 | 166 | | |
163 | 167 | | |
164 | | - | |
165 | | - | |
166 | | - | |
167 | | - | |
168 | | - | |
169 | | - | |
170 | | - | |
171 | | - | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
| 174 | + | |
| 175 | + | |
| 176 | + | |
| 177 | + | |
| 178 | + | |
| 179 | + | |
| 180 | + | |
| 181 | + | |
| 182 | + | |
| 183 | + | |
| 184 | + | |
| 185 | + | |
| 186 | + | |
| 187 | + | |
| 188 | + | |
| 189 | + | |
| 190 | + | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
| 194 | + | |
172 | 195 | | |
173 | 196 | | |
174 | 197 | | |
175 | 198 | | |
176 | 199 | | |
177 | 200 | | |
178 | 201 | | |
| 202 | + | |
179 | 203 | | |
180 | 204 | | |
181 | 205 | | |
| |||
188 | 212 | | |
189 | 213 | | |
190 | 214 | | |
191 | | - | |
192 | | - | |
193 | | - | |
194 | | - | |
195 | | - | |
196 | | - | |
197 | | - | |
198 | | - | |
199 | | - | |
| 215 | + | |
| 216 | + | |
| 217 | + | |
| 218 | + | |
| 219 | + | |
| 220 | + | |
| 221 | + | |
| 222 | + | |
| 223 | + | |
| 224 | + | |
| 225 | + | |
| 226 | + | |
| 227 | + | |
| 228 | + | |
| 229 | + | |
| 230 | + | |
| 231 | + | |
| 232 | + | |
| 233 | + | |
| 234 | + | |
| 235 | + | |
| 236 | + | |
| 237 | + | |
| 238 | + | |
| 239 | + | |
| 240 | + | |
| 241 | + | |
| 242 | + | |
| 243 | + | |
200 | 244 | | |
201 | 245 | | |
202 | 246 | | |
| |||
266 | 310 | | |
267 | 311 | | |
268 | 312 | | |
| 313 | + | |
| 314 | + | |
| 315 | + | |
| 316 | + | |
| 317 | + | |
| 318 | + | |
269 | 319 | | |
270 | 320 | | |
271 | 321 | | |
| |||
Lines changed: 52 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
206 | 206 | | |
207 | 207 | | |
208 | 208 | | |
| 209 | + | |
| 210 | + | |
| 211 | + | |
| 212 | + | |
| 213 | + | |
| 214 | + | |
| 215 | + | |
| 216 | + | |
| 217 | + | |
| 218 | + | |
| 219 | + | |
| 220 | + | |
| 221 | + | |
| 222 | + | |
| 223 | + | |
| 224 | + | |
| 225 | + | |
| 226 | + | |
| 227 | + | |
| 228 | + | |
209 | 229 | | |
210 | 230 | | |
211 | 231 | | |
| |||
278 | 298 | | |
279 | 299 | | |
280 | 300 | | |
| 301 | + | |
| 302 | + | |
| 303 | + | |
| 304 | + | |
281 | 305 | | |
282 | 306 | | |
283 | 307 | | |
| |||
400 | 424 | | |
401 | 425 | | |
402 | 426 | | |
| 427 | + | |
| 428 | + | |
| 429 | + | |
| 430 | + | |
| 431 | + | |
| 432 | + | |
| 433 | + | |
| 434 | + | |
| 435 | + | |
| 436 | + | |
| 437 | + | |
| 438 | + | |
| 439 | + | |
| 440 | + | |
| 441 | + | |
| 442 | + | |
| 443 | + | |
| 444 | + | |
| 445 | + | |
| 446 | + | |
| 447 | + | |
| 448 | + | |
403 | 449 | | |
404 | 450 | | |
405 | 451 | | |
| |||
499 | 545 | | |
500 | 546 | | |
501 | 547 | | |
| 548 | + | |
| 549 | + | |
| 550 | + | |
| 551 | + | |
| 552 | + | |
| 553 | + | |
502 | 554 | | |
503 | 555 | | |
504 | 556 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
583 | 583 | | |
584 | 584 | | |
585 | 585 | | |
| 586 | + | |
| 587 | + | |
| 588 | + | |
| 589 | + | |
| 590 | + | |
| 591 | + | |
| 592 | + | |
| 593 | + | |
| 594 | + | |
| 595 | + | |
| 596 | + | |
| 597 | + | |
| 598 | + | |
| 599 | + | |
| 600 | + | |
| 601 | + | |
| 602 | + | |
| 603 | + | |
| 604 | + | |
| 605 | + | |
| 606 | + | |
| 607 | + | |
| 608 | + | |
| 609 | + | |
| 610 | + | |
| 611 | + | |
| 612 | + | |
| 613 | + | |
| 614 | + | |
| 615 | + | |
| 616 | + | |
| 617 | + | |
| 618 | + | |
586 | 619 | | |
587 | 620 | | |
588 | 621 | | |
| |||
673 | 706 | | |
674 | 707 | | |
675 | 708 | | |
| 709 | + | |
| 710 | + | |
| 711 | + | |
| 712 | + | |
| 713 | + | |
| 714 | + | |
| 715 | + | |
| 716 | + | |
| 717 | + | |
| 718 | + | |
| 719 | + | |
| 720 | + | |
| 721 | + | |
| 722 | + | |
| 723 | + | |
| 724 | + | |
| 725 | + | |
| 726 | + | |
| 727 | + | |
| 728 | + | |
| 729 | + | |
| 730 | + | |
| 731 | + | |
| 732 | + | |
| 733 | + | |
| 734 | + | |
| 735 | + | |
| 736 | + | |
| 737 | + | |
| 738 | + | |
| 739 | + | |
| 740 | + | |
| 741 | + | |
| 742 | + | |
| 743 | + | |
| 744 | + | |
| 745 | + | |
| 746 | + | |
| 747 | + | |
| 748 | + | |
| 749 | + | |
| 750 | + | |
| 751 | + | |
| 752 | + | |
| 753 | + | |
| 754 | + | |
| 755 | + | |
| 756 | + | |
676 | 757 | | |
677 | 758 | | |
678 | 759 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
35 | 35 | | |
36 | 36 | | |
37 | 37 | | |
38 | | - | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
39 | 42 | | |
40 | 43 | | |
41 | 44 | | |
| |||
112 | 115 | | |
113 | 116 | | |
114 | 117 | | |
115 | | - | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
116 | 123 | | |
117 | 124 | | |
118 | 125 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
72 | 72 | | |
73 | 73 | | |
74 | 74 | | |
| 75 | + | |
75 | 76 | | |
76 | 77 | | |
77 | 78 | | |
| |||
80 | 81 | | |
81 | 82 | | |
82 | 83 | | |
83 | | - | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
84 | 90 | | |
85 | 91 | | |
| 92 | + | |
86 | 93 | | |
87 | 94 | | |
88 | 95 | | |
| |||
108 | 115 | | |
109 | 116 | | |
110 | 117 | | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
111 | 122 | | |
112 | | - | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 126 | + | |
| 127 | + | |
113 | 128 | | |
114 | | - | |
| 129 | + | |
115 | 130 | | |
116 | 131 | | |
117 | 132 | | |
| |||
131 | 146 | | |
132 | 147 | | |
133 | 148 | | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
134 | 153 | | |
135 | 154 | | |
136 | 155 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
191 | 191 | | |
192 | 192 | | |
193 | 193 | | |
194 | | - | |
| 194 | + | |
| 195 | + | |
| 196 | + | |
| 197 | + | |
| 198 | + | |
195 | 199 | | |
196 | 200 | | |
197 | 201 | | |
| |||
0 commit comments