Revert "[OpenCL] Set flush period of command queue (#5905)"

zhaoyang-star · web-flow · commit e7e6b96ecce8 · 2021-04-18T17:36:43.000+08:00
This reverts commit 34b96de.
diff --git a/docs/demo_guides/opencl.md b/docs/demo_guides/opencl.md
@@ -172,7 +172,7 @@ adb shell "export LD_LIBRARY_PATH=/data/local/tmp/opencl/; \
            # repeats=100, warmup=10
            # power_mode=0 绑定大核, thread_num=1
            # accelerate_opencl=1 开启 opencl kernel cache & tuning，仅当模型运行在 opencl 后端时该选项才会生效
-           # print_output=0 不打印模型输出 tensors 详细数据
+           # print_output=0 不打印模型输出 tensors 详细数
 ```
 
 
@@ -225,34 +225,7 @@ adb shell "export GLOG_v=4; \
 
 **NOTE：** 对OpenCL的支持还在持续开发中。
 
-## 4. 性能分析和精度分析
-
-Android 平台下分析：
-```
-# 开启性能分析，会打印出每个 op 耗时信息和汇总信息
-./lite/tools/build_android.sh --arch=armv7 --toolchain=clang --with_opencl=ON --with_extra=ON --with_profile=ON full_publish
-# 开启精度分析，会打印出每个 op 输出数据的均值和标准差信息
-./lite/tools/build_android.sh --arch=armv7 --toolchain=clang --with_opencl=ON --with_extra=ON --with_precision_profile=ON full_publish
-```
-
-macOS x86 平台下分析：
-```
-# 开启性能分析，会打印出每个 op 耗时信息和汇总信息
-./lite/tools/build.sh --with_opencl=ON --with_extra=ON --with_profile=ON x86 
-# 开启精度分析，会打印出每个 op 输出数据的均值和标准差信息
-./lite/tools/build.sh --with_opencl=ON --with_extra=ON --with_precision_profile=ON x86 
-```
-
-Windows x86 平台下分析：
-```
-# 开启性能分析，会打印出每个 op 耗时信息和汇总信息
-.\lite\tools\build_windows.bat with_opencl with_extra with_profile 
-# 开启精度分析，会打印出每个 op 输出数据的均值和标准差信息
-.\lite\tools\build_windows.bat with_opencl with_extra with_precision_profile 
-```
-详细输出信息的说明可查阅[调试工具](../user_guides/debug)。
-
-## 5. 常见问题
+## 4. 常见问题
 
 1. opencl计算过程中大多以`cl::Image2D`的数据排布进行计算，不同gpu支持的最大`cl::Image2D`的宽度和高度有限制，模型输入的数据格式是buffer形式的`NCHW`数据排布方式。要计算你的模型是否超出最大支持（大部分手机支持的`cl::Image2D`最大宽度和高度均为16384），可以通过公式`image_h = tensor_n * tensor_h, image_w=tensor_w * (tensor_c + 3) / 4`计算当前层NCHW排布的Tensor所需的`cl::Image2D`的宽度和高度；
 2. 部署时需考虑不支持opencl的情况，可预先使用API`bool ::IsOpenCLBackendValid()`判断，对于不支持的情况加载CPU模型，详见[./lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc)；
diff --git a/lite/backends/opencl/cl_context.cc b/lite/backends/opencl/cl_context.cc
@@ -71,25 +71,6 @@ cl::Kernel &CLContext::GetKernel(const std::string &name) {
   return GetKernel(it->second);
 }
 
-cl_int CLContext::RunKernel(const cl::Kernel &kernel,
-                            const cl::NDRange &global,
-                            const cl::NDRange &local,
-                            cl::Event *event) {
-  cl_int ret = GetCommandQueue().enqueueNDRangeKernel(
-      kernel, cl::NullRange, global, local, nullptr, event);
-  CL_CHECK_FATAL(ret);
-
-  static int cnt = 0;
-  const int flush_period = 10;
-  if (cnt % flush_period == 0) {
-    ret = GetCommandQueue().flush();
-    CL_CHECK_FATAL(ret);
-  }
-  cnt++;
-
-  return ret;
-}
-
 cl::NDRange CLContext::DefaultGlobalWorkSize(const CLImage &image) {
   // n c h w
   auto image_dim = image.tensor_dims();
diff --git a/lite/backends/opencl/cl_context.h b/lite/backends/opencl/cl_context.h
@@ -56,11 +56,6 @@ class CLContext {
 
   cl::Kernel &GetKernel(const std::string &name);
 
-  cl_int RunKernel(const cl::Kernel &kernel,
-                   const cl::NDRange &global,
-                   const cl::NDRange &local,
-                   cl::Event *event = nullptr);
-
   cl::NDRange DefaultGlobalWorkSize(const CLImage &image);
 
   cl::NDRange DefaultLocalWorkSize(
diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc
@@ -1346,17 +1346,13 @@ void ConvImageCompute::Run() {
   (this->*impl_)();
 
   auto& context = ctx_->As<OpenCLContext>();
-  status_ = context.cl_context()->RunKernel(
-      kernel_, global_work_size_, local_work_size_, &event_);
-  /*
   status_ = EnqueueNDRangeKernel(context,
                                  kernel_,
                                  cl::NullRange,
                                  global_work_size_,
                                  local_work_size_,
                                  nullptr,
                                  event_);
-  */
   CL_CHECK_FATAL(status_);
 }