PaddlePaddle · zhupengyang · Sep 6, 2022 · Aug 2, 2022 · Aug 6, 2022 · Aug 7, 2022
@@ -57,7 +57,14 @@ bool IsQuantizedMode(const std::shared_ptr<cpp::ProgramDesc> &program_desc) {
                     quant_dequant_op.end(),
                     op_type) != quant_dequant_op.end()) {
         is_quantized_model = true;
+#ifdef LITE_WITH_XPU
+        if (op_desc->HasAttr("bit_length") &&
+            op_desc->GetAttr<int32_t>("bit_length") != 8) {
+          return false;
+        }
+#endif
       }
+
       if (std::find(dynamic_quant_op.begin(),
                     dynamic_quant_op.end(),
                     op_type) != dynamic_quant_op.end()) {
@@ -368,10 +375,18 @@ void Predictor::Build(const std::shared_ptr<cpp::ProgramDesc> &program_desc,
         inner_places.insert(inner_places.begin(),
                             Place{TARGET(kARM), PRECISION(kInt8)});
       }
+
+#ifdef LITE_WITH_XPU
+      if (valid_place.target == TARGET(kXPU)) {
+        inner_places.insert(inner_places.begin(),
+                            Place{TARGET(kXPU), PRECISION(kInt8)});
+      }
+#else
       if (valid_place.target == TARGET(kX86)) {
         inner_places.insert(inner_places.begin(),
                             Place{TARGET(kX86), PRECISION(kInt8)});
       }
+#endif
     }
   }
 

@@ -639,6 +639,26 @@ void CxxConfig::set_xpu_conv_autotune(bool autotune,
 #endif
 }
 
+void CxxConfig::set_xpu_cluster_num_per_thread(const int num) {
+#ifdef LITE_WITH_XPU
+  lite::TargetWrapperXPU::cluster_num = num;
+#else
+  LOG(WARNING) << "The invoking of the function "
+                  "'set_xpu_cluster_num_per_thread' is ignored, please "
+                  "rebuild it with LITE_WITH_XPU=ON.";
+#endif
+}
+
+void CxxConfig::set_xpu_sdnn_num_per_thread(const int num) {
+#ifdef LITE_WITH_XPU
+  lite::TargetWrapperXPU::sdnn_num = num;
+#else
+  LOG(WARNING) << "The invoking of the function "
+                  "'set_xpu_sdnn_num_per_thread' is ignored, please "
+                  "rebuild it with LITE_WITH_XPU=ON.";
+#endif
+}
+
 template <class T>
 void CxxConfig::set_preferred_inputs_for_warmup(const int group_idx,
                                                 const int tensor_idx,

@@ -489,7 +489,8 @@ class LITE_API CxxConfig : public ConfigBase {
   void set_xpu_multi_encoder_precision(const std::string& precision = "int16");
   void set_xpu_multi_encoder_method(const std::string& precision = "int16",
                                     bool adaptive_seqlen = false);
-
+  void set_xpu_cluster_num_per_thread(const int num);
+  void set_xpu_sdnn_num_per_thread(const int num);
   // set input tensor for warmup.
   // It is optional. If you set prefered_inputs, model wil run immediately when
   // predictor is created

@@ -159,6 +159,9 @@ TargetWrapperXPU::ConvertCPUWeightToXPUQuantWeight<float, int8_t>(
 template XPUQuantData
 TargetWrapperXPU::ConvertCPUWeightToXPUQuantWeight<int8_t, int8_t>(
     const int8_t*, const DDimLite&, bool, size_t);
+template XPUQuantData
+TargetWrapperXPU::ConvertCPUWeightToXPUQuantWeight<int16_t, int16_t>(
+    const int16_t*, const DDimLite&, bool, size_t);
 
 // xpu context
 LITE_THREAD_LOCAL std::shared_ptr<xdnn::Context> TargetWrapperXPU::tls_raw_ctx_{
@@ -194,6 +197,8 @@ LITE_THREAD_LOCAL XPUL3Planner* TargetWrapperXPU::l3_planner_{nullptr};
 // xpu quantizer
 LITE_THREAD_LOCAL std::shared_ptr<XPUQuantizer> TargetWrapperXPU::quantizer_{
     nullptr};
-
+// xpu set cluster sdnn
+LITE_THREAD_LOCAL int TargetWrapperXPU::cluster_num{0};
+LITE_THREAD_LOCAL int TargetWrapperXPU::sdnn_num{0};
 }  // namespace lite
 }  // namespace paddle
@@ -75,6 +75,14 @@ class TargetWrapper<TARGET(kXPU)> {
   static xdnn::Context* GetRawContext() {
     if (tls_raw_ctx_.get() == nullptr) {
       tls_raw_ctx_.reset(xdnn::create_context(), xdnn::destroy_context);
+      if (cluster_num != 0) {
+        tls_raw_ctx_->set_ncluster(cluster_num);
+      }
+
+      if (sdnn_num != 0) {
+        tls_raw_ctx_->set_nsdnn(sdnn_num);
+      }
+
       CHECK(tls_raw_ctx_.get());
       if (!enable_multi_stream_) {
         CHECK(xpu_stream_.get() == nullptr)
@@ -174,6 +182,8 @@ class TargetWrapper<TARGET(kXPU)> {
   static size_t shared_l3_size;  // model level l3 size
   static LITE_THREAD_LOCAL std::vector<XPUL3CacheBlock*>
       l3_block_dict;  // l3 cache block used between op layers
+  static LITE_THREAD_LOCAL int cluster_num;
+  static LITE_THREAD_LOCAL int sdnn_num;
 
  private:
   static void ScatterL3Cache(

@@ -238,5 +238,9 @@ template XPUQuantData XPUQuantizer::quant<int8_t, int8_t>(const int8_t*,
                                                           const DDimLite&,
                                                           bool,
                                                           size_t);
+template XPUQuantData XPUQuantizer::quant<int16_t, int16_t>(const int16_t*,
+                                                            const DDimLite&,
+                                                            bool,
+                                                            size_t);
 }  // namespace lite
 }  // namespace paddle