Skip to content
15 changes: 15 additions & 0 deletions lite/api/cxx_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,14 @@ bool IsQuantizedMode(const std::shared_ptr<cpp::ProgramDesc> &program_desc) {
quant_dequant_op.end(),
op_type) != quant_dequant_op.end()) {
is_quantized_model = true;
#ifdef LITE_WITH_XPU
if (op_desc->HasAttr("bit_length") &&
op_desc->GetAttr<int32_t>("bit_length") != 8) {
return false;
}
#endif
}

if (std::find(dynamic_quant_op.begin(),
dynamic_quant_op.end(),
op_type) != dynamic_quant_op.end()) {
Expand Down Expand Up @@ -368,10 +375,18 @@ void Predictor::Build(const std::shared_ptr<cpp::ProgramDesc> &program_desc,
inner_places.insert(inner_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
}

#ifdef LITE_WITH_XPU
if (valid_place.target == TARGET(kXPU)) {
inner_places.insert(inner_places.begin(),
Place{TARGET(kXPU), PRECISION(kInt8)});
}
#else
if (valid_place.target == TARGET(kX86)) {
inner_places.insert(inner_places.begin(),
Place{TARGET(kX86), PRECISION(kInt8)});
}
#endif
}
}

Expand Down
20 changes: 20 additions & 0 deletions lite/api/paddle_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,26 @@ void CxxConfig::set_xpu_conv_autotune(bool autotune,
#endif
}

void CxxConfig::set_xpu_cluster_num_per_thread(const int num) {
#ifdef LITE_WITH_XPU
lite::TargetWrapperXPU::cluster_num = num;
#else
LOG(WARNING) << "The invoking of the function "
"'set_xpu_cluster_num_per_thread' is ignored, please "
"rebuild it with LITE_WITH_XPU=ON.";
#endif
}

void CxxConfig::set_xpu_sdnn_num_per_thread(const int num) {
#ifdef LITE_WITH_XPU
lite::TargetWrapperXPU::sdnn_num = num;
#else
LOG(WARNING) << "The invoking of the function "
"'set_xpu_sdnn_num_per_thread' is ignored, please "
"rebuild it with LITE_WITH_XPU=ON.";
#endif
}

template <class T>
void CxxConfig::set_preferred_inputs_for_warmup(const int group_idx,
const int tensor_idx,
Expand Down
3 changes: 2 additions & 1 deletion lite/api/paddle_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ class LITE_API CxxConfig : public ConfigBase {
void set_xpu_multi_encoder_precision(const std::string& precision = "int16");
void set_xpu_multi_encoder_method(const std::string& precision = "int16",
bool adaptive_seqlen = false);

void set_xpu_cluster_num_per_thread(const int num);
void set_xpu_sdnn_num_per_thread(const int num);
// set input tensor for warmup.
// It is optional. If you set prefered_inputs, model wil run immediately when
// predictor is created
Expand Down
7 changes: 6 additions & 1 deletion lite/backends/xpu/target_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ TargetWrapperXPU::ConvertCPUWeightToXPUQuantWeight<float, int8_t>(
template XPUQuantData
TargetWrapperXPU::ConvertCPUWeightToXPUQuantWeight<int8_t, int8_t>(
const int8_t*, const DDimLite&, bool, size_t);
template XPUQuantData
TargetWrapperXPU::ConvertCPUWeightToXPUQuantWeight<int16_t, int16_t>(
const int16_t*, const DDimLite&, bool, size_t);

// xpu context
LITE_THREAD_LOCAL std::shared_ptr<xdnn::Context> TargetWrapperXPU::tls_raw_ctx_{
Expand Down Expand Up @@ -194,6 +197,8 @@ LITE_THREAD_LOCAL XPUL3Planner* TargetWrapperXPU::l3_planner_{nullptr};
// xpu quantizer
LITE_THREAD_LOCAL std::shared_ptr<XPUQuantizer> TargetWrapperXPU::quantizer_{
nullptr};

// xpu set cluster sdnn
LITE_THREAD_LOCAL int TargetWrapperXPU::cluster_num{0};
LITE_THREAD_LOCAL int TargetWrapperXPU::sdnn_num{0};
} // namespace lite
} // namespace paddle
10 changes: 10 additions & 0 deletions lite/backends/xpu/target_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ class TargetWrapper<TARGET(kXPU)> {
static xdnn::Context* GetRawContext() {
if (tls_raw_ctx_.get() == nullptr) {
tls_raw_ctx_.reset(xdnn::create_context(), xdnn::destroy_context);
if (cluster_num != 0) {
tls_raw_ctx_->set_ncluster(cluster_num);
}

if (sdnn_num != 0) {
tls_raw_ctx_->set_nsdnn(sdnn_num);
}

CHECK(tls_raw_ctx_.get());
if (!enable_multi_stream_) {
CHECK(xpu_stream_.get() == nullptr)
Expand Down Expand Up @@ -174,6 +182,8 @@ class TargetWrapper<TARGET(kXPU)> {
static size_t shared_l3_size; // model level l3 size
static LITE_THREAD_LOCAL std::vector<XPUL3CacheBlock*>
l3_block_dict; // l3 cache block used between op layers
static LITE_THREAD_LOCAL int cluster_num;
static LITE_THREAD_LOCAL int sdnn_num;

private:
static void ScatterL3Cache(
Expand Down
4 changes: 4 additions & 0 deletions lite/backends/xpu/xpu_quantizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,5 +238,9 @@ template XPUQuantData XPUQuantizer::quant<int8_t, int8_t>(const int8_t*,
const DDimLite&,
bool,
size_t);
template XPUQuantData XPUQuantizer::quant<int16_t, int16_t>(const int16_t*,
const DDimLite&,
bool,
size_t);
} // namespace lite
} // namespace paddle
Loading