Skip to content

Commit 67da17a

Browse files
committed
[LoRA][Kernel] Remove the unused libentry module (vllm-project#10214)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent b65a229 commit 67da17a

File tree

7 files changed

+49
-276
lines changed

7 files changed

+49
-276
lines changed

tests/lora/test_punica_sizes.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
whether the corresponding Triton kernel can run normally when tensor parallelism
55
is set to [1, 2, 4, 8, 16, 32, 64].
66
"""
7-
from unittest.mock import patch
8-
97
import pytest
108
import torch
119

@@ -16,7 +14,6 @@
1614
from vllm.lora.ops.sgmv_expand_slice import sgmv_expand_slice
1715
from vllm.lora.ops.sgmv_shrink import sgmv_shrink
1816
from vllm.platforms import current_platform
19-
from vllm.triton_utils.libentry import LibEntry
2017

2118
from .utils import (generate_data, generate_data_for_expand_nslices,
2219
ref_torch_groupgemm)
@@ -173,9 +170,6 @@ def test_punica_bgmv(
173170
seed: int,
174171
device: str,
175172
):
176-
from vllm.lora.ops.bgmv_expand import _bgmv_expand_kernel
177-
from vllm.lora.ops.bgmv_shrink import _bgmv_shrink_kernel
178-
179173
torch.set_default_device(device)
180174
current_platform.seed_everything(seed)
181175

@@ -200,33 +194,21 @@ def test_punica_bgmv(
200194
device,
201195
)
202196
if op_type == "shrink":
203-
# The current _bgmv_shrink_kernel does not require the libentry
204-
# decoration. The purpose of adding this patch is to test the
205-
# correctness of libentry.
206-
with patch(
207-
"vllm.lora.ops.bgmv_shrink._bgmv_shrink_kernel",
208-
LibEntry(_bgmv_shrink_kernel),
209-
):
210-
bgmv_shrink(
211-
inputs_tensor,
212-
lora_weights,
213-
our_out_tensor,
214-
indices,
215-
scaling,
216-
)
197+
bgmv_shrink(
198+
inputs_tensor,
199+
lora_weights,
200+
our_out_tensor,
201+
indices,
202+
scaling,
203+
)
217204
else:
218-
# ditto
219-
with patch(
220-
"vllm.lora.ops.bgmv_expand._bgmv_expand_kernel",
221-
LibEntry(_bgmv_expand_kernel),
222-
):
223-
bgmv_expand(
224-
inputs_tensor,
225-
lora_weights,
226-
our_out_tensor,
227-
indices,
228-
add_inputs=True,
229-
)
205+
bgmv_expand(
206+
inputs_tensor,
207+
lora_weights,
208+
our_out_tensor,
209+
indices,
210+
add_inputs=True,
211+
)
230212
ref_torch_groupgemm(
231213
ref_out_tensor,
232214
inputs_tensor,
@@ -262,7 +244,6 @@ def test_punica_expand_nslices(
262244
seed: int,
263245
device: str,
264246
):
265-
from vllm.lora.ops.bgmv_expand_slice import _bgmv_expand_slice_kernel
266247

267248
torch.set_default_device(device)
268249
current_platform.seed_everything(seed)
@@ -312,22 +293,16 @@ def test_punica_expand_nslices(
312293
add_inputs=True,
313294
)
314295
else:
315-
# The current _bgmv_expand_slice_kernel does not require the
316-
# libentry decoration. The purpose of adding this patch is to test
317-
# the correctness of libentry.
318-
with patch(
319-
"vllm.lora.ops.bgmv_expand_slice._bgmv_expand_slice_kernel",
320-
LibEntry(_bgmv_expand_slice_kernel),
321-
):
322-
bgmv_expand_slice(
323-
inputs_tensor,
324-
lora_weights,
325-
our_outputs,
326-
indices,
327-
slice_offset,
328-
slice_size=hidden_size,
329-
add_inputs=True,
330-
)
296+
297+
bgmv_expand_slice(
298+
inputs_tensor,
299+
lora_weights,
300+
our_outputs,
301+
indices,
302+
slice_offset,
303+
slice_size=hidden_size,
304+
add_inputs=True,
305+
)
331306
ref_torch_groupgemm(
332307
ref_outputs[:, slice_offset:slice_offset + hidden_size],
333308
inputs_tensor,

tests/lora/test_punica_variation.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
under different conditions, including various batches, numbers of LoRA , and
44
maximum ranks.
55
"""
6-
from unittest.mock import patch
7-
86
import pytest
97
import torch
108

@@ -15,7 +13,6 @@
1513
from vllm.lora.ops.sgmv_expand_slice import sgmv_expand_slice
1614
from vllm.lora.ops.sgmv_shrink import sgmv_shrink
1715
from vllm.platforms import current_platform
18-
from vllm.triton_utils.libentry import LibEntry
1916

2017
from .utils import (generate_data, generate_data_for_expand_nslices,
2118
ref_torch_groupgemm)
@@ -150,8 +147,6 @@ def test_punica_bgmv(
150147
seed: int,
151148
device: str,
152149
):
153-
from vllm.lora.ops.bgmv_expand import _bgmv_expand_kernel
154-
from vllm.lora.ops.bgmv_shrink import _bgmv_shrink_kernel
155150

156151
torch.set_default_device(device)
157152
current_platform.seed_everything(seed)
@@ -177,33 +172,22 @@ def test_punica_bgmv(
177172
device,
178173
)
179174
if op_type == "shrink":
180-
# The current _bgmv_shrink_kernel does not require the libentry
181-
# decoration. The purpose of adding this patch is to test the
182-
# correctness of libentry.
183-
with patch(
184-
"vllm.lora.ops.bgmv_shrink._bgmv_shrink_kernel",
185-
LibEntry(_bgmv_shrink_kernel),
186-
):
187-
bgmv_shrink(
188-
inputs_tensor,
189-
lora_weights,
190-
our_out_tensor,
191-
indices,
192-
scaling,
193-
)
175+
bgmv_shrink(
176+
inputs_tensor,
177+
lora_weights,
178+
our_out_tensor,
179+
indices,
180+
scaling,
181+
)
194182
else:
195-
# ditto
196-
with patch(
197-
"vllm.lora.ops.bgmv_expand._bgmv_expand_kernel",
198-
LibEntry(_bgmv_expand_kernel),
199-
):
200-
bgmv_expand(
201-
inputs_tensor,
202-
lora_weights,
203-
our_out_tensor,
204-
indices,
205-
add_inputs=True,
206-
)
183+
184+
bgmv_expand(
185+
inputs_tensor,
186+
lora_weights,
187+
our_out_tensor,
188+
indices,
189+
add_inputs=True,
190+
)
207191
ref_torch_groupgemm(
208192
ref_out_tensor,
209193
inputs_tensor,
@@ -239,8 +223,6 @@ def test_punica_expand_nslices(
239223
seed: int,
240224
device: str,
241225
):
242-
from vllm.lora.ops.bgmv_expand_slice import _bgmv_expand_slice_kernel
243-
244226
torch.set_default_device(device)
245227
current_platform.seed_everything(seed)
246228

@@ -289,22 +271,15 @@ def test_punica_expand_nslices(
289271
add_inputs=True,
290272
)
291273
else:
292-
# The current _bgmv_expand_slice_kernel does not require the
293-
# libentry decoration. The purpose of adding this patch is to test
294-
# the correctness of libentry.
295-
with patch(
296-
"vllm.lora.ops.bgmv_expand_slice._bgmv_expand_slice_kernel",
297-
LibEntry(_bgmv_expand_slice_kernel),
298-
):
299-
bgmv_expand_slice(
300-
inputs_tensor,
301-
lora_weights,
302-
our_outputs,
303-
indices,
304-
slice_offset,
305-
slice_size=hidden_size,
306-
add_inputs=True,
307-
)
274+
bgmv_expand_slice(
275+
inputs_tensor,
276+
lora_weights,
277+
our_outputs,
278+
indices,
279+
slice_offset,
280+
slice_size=hidden_size,
281+
add_inputs=True,
282+
)
308283
ref_torch_groupgemm(
309284
ref_outputs[:, slice_offset:slice_offset + hidden_size],
310285
inputs_tensor,

vllm/lora/ops/sgmv_expand.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
import triton
1010
import triton.language as tl
1111

12-
from vllm.triton_utils import libentry
1312

14-
15-
@libentry()
1613
@triton.jit
1714
def _sgmv_expand_kernel(
1815
input_ptr,

vllm/lora/ops/sgmv_expand_slice.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
import triton
1010
import triton.language as tl
1111

12-
from vllm.triton_utils import libentry
1312

14-
15-
@libentry()
1613
@triton.jit
1714
def _sgmv_expand_slice_kernel(
1815
input_ptr,

vllm/lora/ops/sgmv_shrink.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
import triton
1010
import triton.language as tl
1111

12-
from vllm.triton_utils import libentry
1312

14-
15-
@libentry()
1613
@triton.jit
1714
def _sgmv_shrink_kernel(
1815
input_ptr,

vllm/triton_utils/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,5 @@
66

77
from vllm.triton_utils.custom_cache_manager import (
88
maybe_set_triton_cache_manager)
9-
from vllm.triton_utils.libentry import libentry
109

11-
__all__ += ["maybe_set_triton_cache_manager", "libentry"]
10+
__all__ += ["maybe_set_triton_cache_manager"]

0 commit comments

Comments
 (0)