Skip to content

Commit 7039c06

Browse files
authored
[NPU] Support npu save load (#31893)
* support save load for NPU * add save load npu unittest * support np.array transform in NPU * fix errors * delete dygraph in unittest * add Wait * fix unittest * fix review comment * fix unittest problem * fix little problem
1 parent 853af66 commit 7039c06

9 files changed

Lines changed: 359 additions & 75 deletions

File tree

paddle/fluid/framework/tensor_util.cc

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,29 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
822822
#else
823823
PADDLE_THROW(platform::errors::Unimplemented(
824824
"XPUPlace is not supported when not compiled with XPU"));
825+
#endif
826+
} else if (platform::is_npu_place(tensor.place())) {
827+
#ifdef PADDLE_WITH_ASCEND_CL
828+
constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB
829+
std::unique_ptr<char[]> buf(new char[kBufSize]);
830+
auto& npu_dev_ctx =
831+
static_cast<const platform::NPUDeviceContext&>(dev_ctx);
832+
platform::CPUPlace cpu;
833+
uintptr_t data = reinterpret_cast<uintptr_t>(data_ptr);
834+
while (size != 0) {
835+
size_t size_to_write = std::min(kBufSize, static_cast<size_t>(size));
836+
memory::Copy(cpu, buf.get(),
837+
BOOST_GET_CONST(platform::NPUPlace, tensor.place()),
838+
reinterpret_cast<const void*>(data), size_to_write,
839+
npu_dev_ctx.stream());
840+
npu_dev_ctx.Wait();
841+
os.write(buf.get(), size_to_write);
842+
data += size_to_write;
843+
size -= size_to_write;
844+
}
845+
#else
846+
PADDLE_THROW(platform::errors::Unimplemented(
847+
"NPUPlace is not supported when not compiled with NPU"));
825848
#endif
826849
} else {
827850
os.write(static_cast<const char*>(data_ptr),
@@ -877,8 +900,10 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
877900
auto ctx = platform::CPUDeviceContext();
878901
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
879902
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
880-
platform::is_xpu_place(dev_ctx.GetPlace())) {
881-
#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU
903+
platform::is_xpu_place(dev_ctx.GetPlace()) ||
904+
platform::is_npu_place(dev_ctx.GetPlace())) {
905+
#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU || \
906+
defined PADDLE_WITH_ASCEND_CL
882907
Tensor cpu_tensor;
883908
cpu_tensor.Resize(framework::make_ddim(shape));
884909
framework::VisitDataType(
@@ -887,13 +912,19 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
887912
is.read(static_cast<char*>(buf), size);
888913
auto dst_place = dev_ctx.GetPlace();
889914
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
915+
if (platform::is_npu_place(dev_ctx.GetPlace())) {
916+
dev_ctx.Wait();
917+
}
890918
#else
891919
if (platform::is_gpu_place(dev_ctx.GetPlace())) {
892920
PADDLE_THROW(platform::errors::Unimplemented(
893921
"CUDAPlace is not supported when not compiled with CUDA"));
894-
} else {
922+
} else if (platform::is_xpu_place(dev_ctx.GetPlace())) {
895923
PADDLE_THROW(platform::errors::Unimplemented(
896924
"XPUPlace is not supported when not compiled with XPU"));
925+
} else {
926+
PADDLE_THROW(platform::errors::Unimplemented(
927+
"NPUPlace is not supported when not compiled with NPU"));
897928
}
898929
#endif
899930
} else {
@@ -934,8 +965,10 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
934965
auto ctx = platform::CPUDeviceContext();
935966
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
936967
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
937-
platform::is_xpu_place(dev_ctx.GetPlace())) {
938-
#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU
968+
platform::is_xpu_place(dev_ctx.GetPlace()) ||
969+
platform::is_npu_place(dev_ctx.GetPlace())) {
970+
#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU || \
971+
defined PADDLE_WITH_ASCEND_CL
939972
Tensor cpu_tensor;
940973
cpu_tensor.Resize(framework::make_ddim(dims));
941974
framework::VisitDataType(
@@ -944,13 +977,19 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
944977
is.read(static_cast<char*>(buf), size);
945978
auto dst_place = dev_ctx.GetPlace();
946979
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
980+
if (platform::is_npu_place(dev_ctx.GetPlace())) {
981+
dev_ctx.Wait();
982+
}
947983
#else
948984
if (platform::is_gpu_place(dev_ctx.GetPlace())) {
949985
PADDLE_THROW(platform::errors::Unimplemented(
950986
"CUDAPlace is not supported when not compiled with CUDA"));
951-
} else {
987+
} else if (platform::is_xpu_place(dev_ctx.GetPlace())) {
952988
PADDLE_THROW(platform::errors::Unimplemented(
953989
"XPUPlace is not supported when not compiled with XPU"));
990+
} else {
991+
PADDLE_THROW(platform::errors::Unimplemented(
992+
"NPUPlace is not supported when not compiled with NPU"));
954993
}
955994
#endif
956995
} else {
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/operators/load_combine_op.h"
16+
17+
namespace ops = paddle::operators;
18+
19+
REGISTER_OP_NPU_KERNEL(
20+
load_combine,
21+
ops::LoadCombineOpKernel<paddle::platform::NPUDeviceContext, float>,
22+
ops::LoadCombineOpKernel<paddle::platform::NPUDeviceContext, double>,
23+
ops::LoadCombineOpKernel<paddle::platform::NPUDeviceContext, int>,
24+
ops::LoadCombineOpKernel<paddle::platform::NPUDeviceContext, int8_t>,
25+
ops::LoadCombineOpKernel<paddle::platform::NPUDeviceContext, int64_t>);
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/operators/load_op.h"
16+
17+
namespace ops = paddle::operators;
18+
19+
REGISTER_OP_NPU_KERNEL(
20+
load, ops::LoadOpKernel<paddle::platform::NPUDeviceContext, float>,
21+
ops::LoadOpKernel<paddle::platform::NPUDeviceContext, double>,
22+
ops::LoadOpKernel<paddle::platform::NPUDeviceContext, int>,
23+
ops::LoadOpKernel<paddle::platform::NPUDeviceContext, int8_t>,
24+
ops::LoadOpKernel<paddle::platform::NPUDeviceContext, int64_t>);
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/operators/save_combine_op.h"
16+
17+
namespace ops = paddle::operators;
18+
19+
REGISTER_OP_NPU_KERNEL(
20+
save_combine,
21+
ops::SaveCombineOpKernel<paddle::platform::NPUDeviceContext, float>,
22+
ops::SaveCombineOpKernel<paddle::platform::NPUDeviceContext, double>,
23+
ops::SaveCombineOpKernel<paddle::platform::NPUDeviceContext, int>,
24+
ops::SaveCombineOpKernel<paddle::platform::NPUDeviceContext, int64_t>);
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/operators/save_op.h"
16+
#include "paddle/fluid/platform/float16.h"
17+
18+
namespace ops = paddle::operators;
19+
20+
REGISTER_OP_NPU_KERNEL(
21+
save, ops::SaveOpKernel<paddle::platform::NPUDeviceContext, float>,
22+
ops::SaveOpKernel<paddle::platform::NPUDeviceContext, double>,
23+
ops::SaveOpKernel<paddle::platform::NPUDeviceContext, int>,
24+
ops::SaveOpKernel<paddle::platform::NPUDeviceContext, uint8_t>,
25+
ops::SaveOpKernel<paddle::platform::NPUDeviceContext, int8_t>,
26+
ops::SaveOpKernel<paddle::platform::NPUDeviceContext, int64_t>,
27+
ops::SaveOpKernel<paddle::platform::NPUDeviceContext,
28+
paddle::platform::float16>);

paddle/fluid/pybind/tensor_py.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor,
644644
}
645645
bool is_gpu_tensor = platform::is_gpu_place(tensor.place());
646646
bool is_xpu_tensor = platform::is_xpu_place(tensor.place());
647+
bool is_npu_tensor = platform::is_npu_place(tensor.place());
647648
const auto &tensor_dims = tensor.dims();
648649
auto tensor_dtype = tensor.type();
649650
size_t sizeof_dtype = framework::SizeOfType(tensor_dtype);
@@ -662,7 +663,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor,
662663

663664
std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(tensor.type());
664665

665-
if (!is_gpu_tensor && !is_xpu_tensor) {
666+
if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor) {
666667
if (!need_deep_copy) {
667668
auto base = py::cast(std::move(tensor));
668669
return py::array(py::dtype(py_dtype_str.c_str()), py_dims, py_strides,
@@ -729,6 +730,34 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor,
729730
PADDLE_THROW(platform::errors::PermissionDenied(
730731
"Cannot use CUDAPlace in CPU only version, "
731732
"Please recompile or reinstall Paddle with CUDA support."));
733+
#endif
734+
} else if (is_npu_tensor) {
735+
#ifdef PADDLE_WITH_ASCEND_CL
736+
py::array py_arr(py::dtype(py_dtype_str.c_str()), py_dims, py_strides);
737+
PADDLE_ENFORCE_EQ(py_arr.writeable(), true,
738+
platform::errors::InvalidArgument(
739+
"PyArray is not writable, in which case memory leak "
740+
"or double free would occur"));
741+
PADDLE_ENFORCE_EQ(
742+
py_arr.owndata(), true,
743+
platform::errors::InvalidArgument(
744+
"PyArray does not own data, in which case memory leak "
745+
"or double free would occur"));
746+
747+
size_t copy_bytes = sizeof_dtype * numel;
748+
auto p = BOOST_GET_CONST(platform::NPUPlace, tensor.place());
749+
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
750+
auto &ctx = *pool.Get(tensor.place());
751+
paddle::memory::Copy(
752+
platform::CPUPlace(), py_arr.mutable_data(), p, tensor_buf_ptr,
753+
copy_bytes,
754+
reinterpret_cast<const platform::NPUDeviceContext &>(ctx).stream());
755+
ctx.Wait();
756+
return py_arr;
757+
#else
758+
PADDLE_THROW(platform::errors::PermissionDenied(
759+
"Cannot use NPUPlace in CPU/GPU/XPU version, "
760+
"Please recompile or reinstall Paddle with NPU support."));
732761
#endif
733762
}
734763
PADDLE_THROW(platform::errors::Unimplemented("Place is not supported"));

python/paddle/fluid/io.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,6 +1973,10 @@ def set_var(var, ndarray):
19731973
p = paddle.fluid.core.Place()
19741974
p.set_place(t._place())
19751975
place = paddle.fluid.XPUPlace(p.xpu_device_id())
1976+
elif p.is_npu_place():
1977+
p = paddle.fluid.core.Place()
1978+
p.set_place(t._place())
1979+
place = paddle.fluid.NPUPlace(p.npu_device_id())
19761980
else:
19771981
p = paddle.fluid.core.Place()
19781982
p.set_place(t._place())
@@ -2115,8 +2119,8 @@ def _load_vars_with_try_catch(exe,
21152119
error_str = "Failed to load model/variables `%s`, please make sure " \
21162120
"model/variables file is saved with the following APIs: " \
21172121
"save_params, save_persistables, save_vars."
2118-
filenames = [var.name for var in vars
2119-
] if filename is None else filename
2122+
filenames = [var.name for var in
2123+
vars] if filename is None else filename
21202124
if raise_error:
21212125
raise RuntimeError(error_str % filenames)
21222126
else:
@@ -2256,6 +2260,10 @@ def set_program_state(program, state_dict):
22562260
p = paddle.fluid.core.Place()
22572261
p.set_place(ten_place)
22582262
py_place = paddle.fluid.XPUPlace(p.xpu_device_id())
2263+
elif ten_place.is_npu_place():
2264+
p = paddle.fluid.core.Place()
2265+
p.set_place(ten_place)
2266+
py_place = paddle.fluid.NPUPlace(p.npu_device_id())
22592267

22602268
ten.set(new_para_np, py_place)
22612269

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
import unittest
18+
import sys
19+
sys.path.append("..")
20+
import paddle
21+
import paddle.fluid as fluid
22+
import paddle.fluid.core as core
23+
from paddle.nn import Embedding
24+
import paddle.fluid.framework as framework
25+
from paddle.fluid.optimizer import Adam
26+
from paddle.fluid.dygraph.base import to_variable
27+
from test_imperative_base import new_program_scope
28+
from paddle.fluid.executor import global_scope
29+
import numpy as np
30+
import six
31+
import pickle
32+
import os
33+
import errno
34+
from test_static_save_load import *
35+
36+
paddle.enable_static()
37+
38+
39+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
40+
"core is not compiled with NPU")
41+
class TestNPUSaveLoadBase(TestSaveLoadBase):
42+
def set_place(self):
43+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
44+
) else paddle.NPUPlace(0)
45+
46+
47+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
48+
"core is not compiled with NPU")
49+
class TestNPUSaveLoadPartial(TestSaveLoadPartial):
50+
def set_place(self):
51+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
52+
) else paddle.NPUPlace(0)
53+
54+
55+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
56+
"core is not compiled with NPU")
57+
class TestNPUSaveLoadSetStateDict(TestSaveLoadSetStateDict):
58+
def set_place(self):
59+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
60+
) else paddle.NPUPlace(0)
61+
62+
63+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
64+
"core is not compiled with NPU")
65+
class TestNPUProgramStatePartial(TestProgramStatePartial):
66+
def set_place(self):
67+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
68+
) else paddle.NPUPlace(0)
69+
70+
71+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
72+
"core is not compiled with NPU")
73+
class TestNPULoadFromOldInterface(TestLoadFromOldInterface):
74+
def set_place(self):
75+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
76+
) else paddle.NPUPlace(0)
77+
78+
79+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
80+
"core is not compiled with NPU")
81+
class TestNPULoadFromOldInterfaceSingleFile(TestLoadFromOldInterfaceSingleFile):
82+
def set_place(self):
83+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
84+
) else paddle.NPUPlace(0)
85+
86+
87+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
88+
"core is not compiled with NPU")
89+
class TestNPUProgramStateOldSave(TestProgramStateOldSave):
90+
def setUp(self):
91+
self.test_dygraph = False
92+
93+
def set_place(self):
94+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
95+
) else paddle.NPUPlace(0)
96+
97+
98+
@unittest.skipIf(not paddle.is_compiled_with_npu(),
99+
"core is not compiled with NPU")
100+
class TestNPUProgramStateOldSaveSingleModel(TestProgramStateOldSaveSingleModel):
101+
def set_place(self):
102+
return fluid.CPUPlace() if not core.is_compiled_with_npu(
103+
) else paddle.NPUPlace(0)
104+
105+
106+
if __name__ == '__main__':
107+
paddle.enable_static()
108+
unittest.main()

0 commit comments

Comments
 (0)