Skip to content

Commit c048f22

Browse files
committed
Delete unused libentry
Signed-off-by: Jee Jee Li <[email protected]>
1 parent f0f2e56 commit c048f22

File tree

7 files changed

+49
-276
lines changed

7 files changed

+49
-276
lines changed

tests/lora/test_punica_sizes.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
whether the corresponding Triton kernel can run normally when tensor parallelism
55
is set to [1, 2, 4, 8, 16, 32, 64].
66
"""
7-
from unittest.mock import patch
8-
97
import pytest
108
import torch
119

@@ -16,7 +14,6 @@
1614
from vllm.lora.ops.sgmv_expand_slice import sgmv_expand_slice
1715
from vllm.lora.ops.sgmv_shrink import sgmv_shrink
1816
from vllm.platforms import current_platform
19-
from vllm.triton_utils.libentry import LibEntry
2017

2118
from .utils import (generate_data, generate_data_for_expand_nslices,
2219
ref_torch_groupgemm)
@@ -235,9 +232,6 @@ def test_punica_bgmv(
235232
seed: int,
236233
device: str,
237234
):
238-
from vllm.lora.ops.bgmv_expand import _bgmv_expand_kernel
239-
from vllm.lora.ops.bgmv_shrink import _bgmv_shrink_kernel
240-
241235
torch.set_default_device(device)
242236
current_platform.seed_everything(seed)
243237

@@ -262,33 +256,21 @@ def test_punica_bgmv(
262256
device,
263257
)
264258
if op_type == "shrink":
265-
# The current _bgmv_shrink_kernel does not require the libentry
266-
# decoration. The purpose of adding this patch is to test the
267-
# correctness of libentry.
268-
with patch(
269-
"vllm.lora.ops.bgmv_shrink._bgmv_shrink_kernel",
270-
LibEntry(_bgmv_shrink_kernel),
271-
):
272-
bgmv_shrink(
273-
inputs_tensor,
274-
lora_weights,
275-
our_out_tensor,
276-
indices,
277-
scaling,
278-
)
259+
bgmv_shrink(
260+
inputs_tensor,
261+
lora_weights,
262+
our_out_tensor,
263+
indices,
264+
scaling,
265+
)
279266
else:
280-
# ditto
281-
with patch(
282-
"vllm.lora.ops.bgmv_expand._bgmv_expand_kernel",
283-
LibEntry(_bgmv_expand_kernel),
284-
):
285-
bgmv_expand(
286-
inputs_tensor,
287-
lora_weights,
288-
our_out_tensor,
289-
indices,
290-
add_inputs=True,
291-
)
267+
bgmv_expand(
268+
inputs_tensor,
269+
lora_weights,
270+
our_out_tensor,
271+
indices,
272+
add_inputs=True,
273+
)
292274
ref_torch_groupgemm(
293275
ref_out_tensor,
294276
inputs_tensor,
@@ -324,7 +306,6 @@ def test_punica_expand_nslices(
324306
seed: int,
325307
device: str,
326308
):
327-
from vllm.lora.ops.bgmv_expand_slice import _bgmv_expand_slice_kernel
328309

329310
torch.set_default_device(device)
330311
current_platform.seed_everything(seed)
@@ -374,22 +355,16 @@ def test_punica_expand_nslices(
374355
add_inputs=True,
375356
)
376357
else:
377-
# The current _bgmv_expand_slice_kernel does not require the
378-
# libentry decoration. The purpose of adding this patch is to test
379-
# the correctness of libentry.
380-
with patch(
381-
"vllm.lora.ops.bgmv_expand_slice._bgmv_expand_slice_kernel",
382-
LibEntry(_bgmv_expand_slice_kernel),
383-
):
384-
bgmv_expand_slice(
385-
inputs_tensor,
386-
lora_weights,
387-
our_outputs,
388-
indices,
389-
slice_offset,
390-
slice_size=hidden_size,
391-
add_inputs=True,
392-
)
358+
359+
bgmv_expand_slice(
360+
inputs_tensor,
361+
lora_weights,
362+
our_outputs,
363+
indices,
364+
slice_offset,
365+
slice_size=hidden_size,
366+
add_inputs=True,
367+
)
393368
ref_torch_groupgemm(
394369
ref_outputs[:, slice_offset:slice_offset + hidden_size],
395370
inputs_tensor,

tests/lora/test_punica_variation.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
under different conditions, including various batches, numbers of LoRA , and
44
maximum ranks.
55
"""
6-
from unittest.mock import patch
7-
86
import pytest
97
import torch
108

@@ -15,7 +13,6 @@
1513
from vllm.lora.ops.sgmv_expand_slice import sgmv_expand_slice
1614
from vllm.lora.ops.sgmv_shrink import sgmv_shrink
1715
from vllm.platforms import current_platform
18-
from vllm.triton_utils.libentry import LibEntry
1916

2017
from .utils import (generate_data, generate_data_for_expand_nslices,
2118
ref_torch_groupgemm)
@@ -150,8 +147,6 @@ def test_punica_bgmv(
150147
seed: int,
151148
device: str,
152149
):
153-
from vllm.lora.ops.bgmv_expand import _bgmv_expand_kernel
154-
from vllm.lora.ops.bgmv_shrink import _bgmv_shrink_kernel
155150

156151
torch.set_default_device(device)
157152
current_platform.seed_everything(seed)
@@ -177,33 +172,22 @@ def test_punica_bgmv(
177172
device,
178173
)
179174
if op_type == "shrink":
180-
# The current _bgmv_shrink_kernel does not require the libentry
181-
# decoration. The purpose of adding this patch is to test the
182-
# correctness of libentry.
183-
with patch(
184-
"vllm.lora.ops.bgmv_shrink._bgmv_shrink_kernel",
185-
LibEntry(_bgmv_shrink_kernel),
186-
):
187-
bgmv_shrink(
188-
inputs_tensor,
189-
lora_weights,
190-
our_out_tensor,
191-
indices,
192-
scaling,
193-
)
175+
bgmv_shrink(
176+
inputs_tensor,
177+
lora_weights,
178+
our_out_tensor,
179+
indices,
180+
scaling,
181+
)
194182
else:
195-
# ditto
196-
with patch(
197-
"vllm.lora.ops.bgmv_expand._bgmv_expand_kernel",
198-
LibEntry(_bgmv_expand_kernel),
199-
):
200-
bgmv_expand(
201-
inputs_tensor,
202-
lora_weights,
203-
our_out_tensor,
204-
indices,
205-
add_inputs=True,
206-
)
183+
184+
bgmv_expand(
185+
inputs_tensor,
186+
lora_weights,
187+
our_out_tensor,
188+
indices,
189+
add_inputs=True,
190+
)
207191
ref_torch_groupgemm(
208192
ref_out_tensor,
209193
inputs_tensor,
@@ -239,8 +223,6 @@ def test_punica_expand_nslices(
239223
seed: int,
240224
device: str,
241225
):
242-
from vllm.lora.ops.bgmv_expand_slice import _bgmv_expand_slice_kernel
243-
244226
torch.set_default_device(device)
245227
current_platform.seed_everything(seed)
246228

@@ -289,22 +271,15 @@ def test_punica_expand_nslices(
289271
add_inputs=True,
290272
)
291273
else:
292-
# The current _bgmv_expand_slice_kernel does not require the
293-
# libentry decoration. The purpose of adding this patch is to test
294-
# the correctness of libentry.
295-
with patch(
296-
"vllm.lora.ops.bgmv_expand_slice._bgmv_expand_slice_kernel",
297-
LibEntry(_bgmv_expand_slice_kernel),
298-
):
299-
bgmv_expand_slice(
300-
inputs_tensor,
301-
lora_weights,
302-
our_outputs,
303-
indices,
304-
slice_offset,
305-
slice_size=hidden_size,
306-
add_inputs=True,
307-
)
274+
bgmv_expand_slice(
275+
inputs_tensor,
276+
lora_weights,
277+
our_outputs,
278+
indices,
279+
slice_offset,
280+
slice_size=hidden_size,
281+
add_inputs=True,
282+
)
308283
ref_torch_groupgemm(
309284
ref_outputs[:, slice_offset:slice_offset + hidden_size],
310285
inputs_tensor,

vllm/lora/ops/sgmv_expand.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
import triton
1010
import triton.language as tl
1111

12-
from vllm.triton_utils import libentry
1312

14-
15-
@libentry()
1613
@triton.jit
1714
def _sgmv_expand_kernel(
1815
input_ptr,

vllm/lora/ops/sgmv_expand_slice.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
import triton
1010
import triton.language as tl
1111

12-
from vllm.triton_utils import libentry
1312

14-
15-
@libentry()
1613
@triton.jit
1714
def _sgmv_expand_slice_kernel(
1815
input_ptr,

vllm/lora/ops/sgmv_shrink.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
import triton
1010
import triton.language as tl
1111

12-
from vllm.triton_utils import libentry
1312

14-
15-
@libentry()
1613
@triton.jit
1714
def _sgmv_shrink_kernel(
1815
input_ptr,

vllm/triton_utils/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,5 @@
66

77
from vllm.triton_utils.custom_cache_manager import (
88
maybe_set_triton_cache_manager)
9-
from vllm.triton_utils.libentry import libentry
109

11-
__all__ += ["maybe_set_triton_cache_manager", "libentry"]
10+
__all__ += ["maybe_set_triton_cache_manager"]

0 commit comments

Comments
 (0)