Skip to content

Commit 9fdabb7

Browse files
[feature] Add Custom Op grouped_matmul_swiglu_quant (#4431)
This PR introduces the `EXEC_NPU_CMD` macro, serving as an adapter layer to simplify the invocation of `aclnn` operators on Ascend NPUs. **Key Changes:** * **Adapter Layer:** Added `EXEC_NPU_CMD` macro and related dependencies to standardize `aclnn` calls. * **Operator Support:** Integrated `grouped_matmul_swiglu_quant` as a reference implementation to demonstrate the usage of the new macro. --- - vLLM version: v0.11.2 --------- Signed-off-by: SlightwindSec <[email protected]>
1 parent 89a1a65 commit 9fdabb7

File tree

10 files changed

+1007
-3
lines changed

10 files changed

+1007
-3
lines changed

.github/workflows/release_whl.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ jobs:
9898
--exclude libc_sec.so \
9999
--exclude "libascend*.so" \
100100
--exclude "libtorch*.so" \
101+
--exclude "libopapi.so" \
101102
--exclude "liberror_manager.so"
102103
done
103104
rm -f dist/*.whl

CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ ascendc_library(vllm_ascend_kernels SHARED
6363
message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
6464

6565
file(GLOB VLLM_ASCEND_SRC
66-
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp)
66+
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
67+
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
6768

6869
include_directories(
6970
${pybind11_INCLUDE_DIRS}
@@ -88,6 +89,7 @@ pybind11_add_module(vllm_ascend_C ${VLLM_ASCEND_SRC})
8889
target_link_directories(
8990
vllm_ascend_C
9091
PRIVATE
92+
${TORCH_LIBRARY_DIRS}
9193
${TORCH_NPU_PATH}/lib/
9294
${ASCEND_HOME_PATH}/lib64
9395
)
@@ -96,14 +98,15 @@ target_link_libraries(
9698
vllm_ascend_C
9799
PUBLIC
98100
${TORCH_LIBRARIES}
99-
libtorch_npu.so
101+
torch_npu
100102
vllm_ascend_kernels
101103
ascendcl
102104
tiling_api
103105
register
104106
platform
105107
ascendalog
106108
dl
109+
opapi
107110
)
108111

109112
target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright (c) 2020, Huawei Technologies Co., Ltd
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the BSD-style license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
#include "NPUBridge.h"
8+
9+
namespace vllm_ascend
10+
{
11+
NPUStorageImpl *NPUBridge::GetNpuStorageImpl(c10::StorageImpl *storageImpl)
12+
{
13+
return static_cast<NPUStorageImpl *>(storageImpl);
14+
}
15+
16+
NPUStorageImpl *NPUBridge::GetNpuStorageImpl(c10::Storage &&storage)
17+
{
18+
return static_cast<NPUStorageImpl *>(storage.unsafeGetStorageImpl());
19+
}
20+
21+
NPUStorageImpl *NPUBridge::GetNpuStorageImpl(const at::Tensor &tensor)
22+
{
23+
return static_cast<NPUStorageImpl *>(tensor.storage().unsafeGetStorageImpl());
24+
}
25+
26+
NPUStorageDesc &NPUBridge::GetNpuStorageImplDesc(const at::Tensor &tensor)
27+
{
28+
return static_cast<NPUStorageImpl *>(tensor.storage().unsafeGetStorageImpl())->npu_desc_;
29+
}
30+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (c) 2020, Huawei Technologies Co., Ltd
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the BSD-style license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
#pragma once
8+
#include <c10/core/StorageImpl.h>
9+
#include "NPUStorageImpl.h"
10+
11+
namespace vllm_ascend
12+
{
13+
14+
class NPUBridge
15+
{
16+
public:
17+
// at::tensor to NPUStorageImpl
18+
static NPUStorageImpl *GetNpuStorageImpl(const at::Tensor &tensor);
19+
20+
// c10::StorageImpl to NPUStorageImpl
21+
static NPUStorageImpl *GetNpuStorageImpl(c10::StorageImpl *storageImpl);
22+
23+
// c10::Storage to NPUStorageImpl
24+
static NPUStorageImpl *GetNpuStorageImpl(c10::Storage &&storage);
25+
26+
// tensor to NPUStorageDesc
27+
static NPUStorageDesc &GetNpuStorageImplDesc(const at::Tensor &tensor);
28+
};
29+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Copyright (c) 2020, Huawei Technologies Co., Ltd
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the BSD-style license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
#include "NPUStorageImpl.h"
8+
9+
namespace vllm_ascend
10+
{
11+
12+
NPUStorageImpl::NPUStorageImpl(
13+
use_byte_size_t use_byte_size,
14+
size_t size_bytes,
15+
at::DataPtr data_ptr,
16+
at::Allocator *allocator,
17+
bool resizable) : c10::StorageImpl(use_byte_size,
18+
size_bytes,
19+
at::DataPtr(std::move(data_ptr)),
20+
allocator,
21+
resizable)
22+
{
23+
}
24+
25+
void NPUStorageImpl::release_resources()
26+
{
27+
StorageImpl::release_resources();
28+
}
29+
30+
c10::intrusive_ptr<c10::StorageImpl> make_npu_storage_impl(
31+
c10::StorageImpl::use_byte_size_t,
32+
c10::SymInt size_bytes,
33+
c10::DataPtr data_ptr,
34+
c10::Allocator *allocator,
35+
bool resizable)
36+
{
37+
if (data_ptr == nullptr)
38+
{
39+
data_ptr = allocator->allocate(size_bytes.as_int_unchecked());
40+
}
41+
// Correctly create NPUStorageImpl object.
42+
c10::intrusive_ptr<c10::StorageImpl> npu_storage_impl = c10::make_intrusive<NPUStorageImpl>(
43+
c10::StorageImpl::use_byte_size_t(),
44+
size_bytes.as_int_unchecked(),
45+
std::move(data_ptr),
46+
allocator,
47+
resizable);
48+
// There is no need to consider the NPUStorageDesc information, it will be carried out in the subsequent processing.
49+
return npu_storage_impl;
50+
}
51+
52+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright (c) 2020, Huawei Technologies Co., Ltd
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the BSD-style license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
#pragma once
8+
9+
#include <ATen/Tensor.h>
10+
#include <c10/core/StorageImpl.h>
11+
#include <c10/core/Allocator.h>
12+
#include <c10/core/ScalarType.h>
13+
#include <c10/util/typeid.h>
14+
#include <c10/util/order_preserving_flat_hash_map.h>
15+
16+
#include "acl/acl_rt.h"
17+
#include "acl/acl_base.h"
18+
19+
namespace vllm_ascend
20+
{
21+
22+
struct NPUStorageDesc
23+
{
24+
public:
25+
struct use_byte_size_t
26+
{
27+
};
28+
29+
c10::SmallVector<int64_t, 5> base_sizes_;
30+
c10::SmallVector<int64_t, 5> base_strides_;
31+
c10::SmallVector<int64_t, 5> storage_sizes_;
32+
int64_t base_offset_ = 0;
33+
use_byte_size_t base_dtype_ = {};
34+
aclFormat origin_format_ = ACL_FORMAT_UNDEFINED;
35+
aclFormat npu_format_ = ACL_FORMAT_ND;
36+
// used to make CANN GE tensor from storagImpl
37+
caffe2::TypeMeta data_type_ = caffe2::TypeMeta::Make<uint8_t>();
38+
};
39+
40+
struct NPUStorageImpl : public c10::StorageImpl
41+
{
42+
explicit NPUStorageImpl(
43+
use_byte_size_t use_byte_size,
44+
size_t size_bytes,
45+
at::DataPtr data_ptr,
46+
at::Allocator *allocator,
47+
bool resizable);
48+
~NPUStorageImpl() override = default;
49+
50+
void release_resources() override;
51+
52+
NPUStorageDesc npu_desc_;
53+
54+
NPUStorageDesc get_npu_desc() const
55+
{
56+
return npu_desc_;
57+
}
58+
};
59+
60+
c10::intrusive_ptr<c10::StorageImpl> make_npu_storage_impl(
61+
c10::StorageImpl::use_byte_size_t,
62+
c10::SymInt size_bytes,
63+
c10::DataPtr data_ptr,
64+
c10::Allocator *allocator,
65+
bool resizable);
66+
67+
}

0 commit comments

Comments
 (0)