Skip to content

Commit 6326c3e

Browse files
authored
[Inference] Inference python api support fp16 (#34676)
1 parent 016cc56 commit 6326c3e

File tree

4 files changed

+55
-0
lines changed

4 files changed

+55
-0
lines changed

paddle/fluid/inference/api/analysis_predictor.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,16 @@
2525
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
2626
#include "paddle/fluid/inference/api/helper.h"
2727
#include "paddle/fluid/inference/api/paddle_inference_api.h"
28+
#include "paddle/fluid/platform/float16.h"
2829
#include "paddle/fluid/string/printf.h"
2930
#ifdef PADDLE_WITH_TESTING
3031
#include <gtest/gtest.h>
3132
#include <gtest/gtest_prod.h>
3233
#endif
3334

35+
namespace paddle_infer {
36+
using float16 = paddle::platform::float16;
37+
}
3438
///
3539
/// \file analysis_predictor.h
3640
///

paddle/fluid/inference/api/details/zero_copy_tensor.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919
#include "paddle/fluid/inference/api/paddle_tensor.h"
2020
#include "paddle/fluid/memory/memcpy.h"
2121
#include "paddle/fluid/platform/enforce.h"
22+
#include "paddle/fluid/platform/float16.h"
2223

2324
namespace paddle_infer {
2425

26+
using float16 = paddle::platform::float16;
27+
2528
void Tensor::Reshape(const std::vector<int> &shape) {
2629
PADDLE_ENFORCE_EQ(
2730
name_.empty(), false,
@@ -104,6 +107,8 @@ DataType Tensor::type() const {
104107
auto type = tensor->type();
105108
if (type == paddle::framework::proto::VarType::FP32) {
106109
return DataType::FLOAT32;
110+
} else if (type == paddle::framework::proto::VarType::FP16) {
111+
return DataType::FLOAT16;
107112
} else if (type == paddle::framework::proto::VarType::INT64) {
108113
return DataType::INT64;
109114
} else if (type == paddle::framework::proto::VarType::INT32) {
@@ -261,12 +266,14 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<int64_t>(const int64_t *data);
261266
template PD_INFER_DECL void Tensor::CopyFromCpu<int32_t>(const int32_t *data);
262267
template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data);
263268
template PD_INFER_DECL void Tensor::CopyFromCpu<int8_t>(const int8_t *data);
269+
template PD_INFER_DECL void Tensor::CopyFromCpu<float16>(const float16 *data);
264270

265271
template PD_INFER_DECL void Tensor::CopyToCpu<float>(float *data);
266272
template PD_INFER_DECL void Tensor::CopyToCpu<int64_t>(int64_t *data);
267273
template PD_INFER_DECL void Tensor::CopyToCpu<int32_t>(int32_t *data);
268274
template PD_INFER_DECL void Tensor::CopyToCpu<uint8_t>(uint8_t *data);
269275
template PD_INFER_DECL void Tensor::CopyToCpu<int8_t>(int8_t *data);
276+
template PD_INFER_DECL void Tensor::CopyToCpu<float16>(float16 *data);
270277

271278
template PD_INFER_DECL float *Tensor::data<float>(PlaceType *place,
272279
int *size) const;

paddle/fluid/inference/api/paddle_tensor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum DataType {
2525
INT32,
2626
UINT8,
2727
INT8,
28+
FLOAT16,
2829
// TODO(Superjomn) support more data types if needed.
2930
};
3031

paddle/fluid/pybind/inference_api.cc

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,36 @@
3434

3535
namespace py = pybind11;
3636

37+
namespace pybind11 {
38+
namespace detail {
39+
40+
// Note: use same enum number of float16 in numpy.
41+
// import numpy as np
42+
// print np.dtype(np.float16).num # 23
43+
constexpr int NPY_FLOAT16_ = 23;
44+
constexpr int NPY_UINT16_ = 4;
45+
46+
// Note: Since float16 is not a builtin type in C++, we register
47+
// paddle::platform::float16 as numpy.float16.
48+
// Ref: https://github.com/pybind/pybind11/issues/1776
49+
template <>
50+
struct npy_format_descriptor<paddle_infer::float16> {
51+
static py::dtype dtype() {
52+
handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_);
53+
return reinterpret_borrow<py::dtype>(ptr);
54+
}
55+
static std::string format() {
56+
// Note: "e" represents float16.
57+
// Details at:
58+
// https://docs.python.org/3/library/struct.html#format-characters.
59+
return "e";
60+
}
61+
static constexpr auto name = _("float16");
62+
};
63+
64+
} // namespace detail
65+
} // namespace pybind11
66+
3767
namespace paddle {
3868
namespace pybind {
3969
using paddle::AnalysisPredictor;
@@ -126,6 +156,9 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
126156
case PaddleDType::UINT8:
127157
dt = py::dtype::of<uint8_t>();
128158
break;
159+
case PaddleDType::FLOAT16:
160+
dt = py::dtype::of<paddle_infer::float16>();
161+
break;
129162
default:
130163
PADDLE_THROW(platform::errors::Unimplemented(
131164
"Unsupported data type. Now only supports INT32, INT64, UINT8 and "
@@ -196,6 +229,10 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
196229
case PaddleDType::FLOAT32:
197230
tensor.copy_to_cpu<float>(static_cast<float *>(array.mutable_data()));
198231
break;
232+
case PaddleDType::FLOAT16:
233+
tensor.copy_to_cpu<paddle::platform::float16>(
234+
static_cast<paddle::platform::float16 *>(array.mutable_data()));
235+
break;
199236
case PaddleDType::UINT8:
200237
tensor.copy_to_cpu<uint8_t>(static_cast<uint8_t *>(array.mutable_data()));
201238
break;
@@ -226,6 +263,10 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT
226263
case PaddleDType::FLOAT32:
227264
tensor.CopyToCpu<float>(static_cast<float *>(array.mutable_data()));
228265
break;
266+
case PaddleDType::FLOAT16:
267+
tensor.CopyToCpu<paddle::platform::float16>(
268+
static_cast<paddle::platform::float16 *>(array.mutable_data()));
269+
break;
229270
case PaddleDType::UINT8:
230271
tensor.CopyToCpu(static_cast<uint8_t *>(array.mutable_data()));
231272
break;
@@ -642,6 +683,7 @@ void BindZeroCopyTensor(py::module *m) {
642683
.def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>)
643684
.def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>)
644685
.def("copy_from_cpu", &ZeroCopyTensorCreate<float>)
686+
.def("copy_from_cpu", &ZeroCopyTensorCreate<paddle_infer::float16>)
645687
.def("copy_to_cpu", &ZeroCopyTensorToNumpy)
646688
.def("shape", &ZeroCopyTensor::shape)
647689
.def("set_lod", &ZeroCopyTensor::SetLoD)
@@ -655,6 +697,7 @@ void BindPaddleInferTensor(py::module *m) {
655697
.def("copy_from_cpu", &PaddleInferTensorCreate<int32_t>)
656698
.def("copy_from_cpu", &PaddleInferTensorCreate<int64_t>)
657699
.def("copy_from_cpu", &PaddleInferTensorCreate<float>)
700+
.def("copy_from_cpu", &PaddleInferTensorCreate<paddle_infer::float16>)
658701
.def("copy_to_cpu", &PaddleInferTensorToNumpy)
659702
.def("shape", &paddle_infer::Tensor::shape)
660703
.def("set_lod", &paddle_infer::Tensor::SetLoD)

0 commit comments

Comments
 (0)