PaddlePaddle · Wangzheee · Apr 22, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc
@@ -153,6 +153,8 @@ class FeedOp : public framework::OperatorWithKernel {
             feed_sparse_tensor.coalesced());
         out_var->GetMutable<phi::SparseCooTensor>()->SetIndicesDict(
             feed_sparse_tensor.GetIndicesDict());
+        out_var->GetMutable<phi::SparseCooTensor>()->SetKmaps(
+            feed_sparse_tensor.GetKmaps());
       } else {
         PADDLE_THROW(
             phi::errors::Unimplemented("Only support DenseTensor, Strings, and "

diff --git a/paddle/fluid/operators/sync_batch_norm_op.cu b/paddle/fluid/operators/sync_batch_norm_op.cu
@@ -263,6 +263,7 @@ void SyncBatchNormCooKernel(const Context& dev_ctx,
                                        saved_variance,
                                        reserve_space);
   y->SetIndicesDict(x.GetIndicesDict());
+  y->SetKmaps(x.GetKmaps());
 }
 
 template <typename T, typename Context>

diff --git a/paddle/phi/api/yaml/sparse_ops.yaml b/paddle/phi/api/yaml/sparse_ops.yaml
@@ -121,6 +121,15 @@
   intermediate: rulebook, counter
   backward : conv3d_grad
 
+- op : conv3d_implicit_gemm
+  args : (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key="")
+  output : Tensor(out)
+  infer_meta :
+    func : sparse::Conv3dImplicitGemmInferMeta
+  kernel :
+    func : conv3d_implicit_gemm{sparse_coo, dense -> sparse_coo}
+    layout : x
+
 - op : divide
   args : (Tensor x, Tensor y)
   output : Tensor(out)

diff --git a/paddle/phi/core/kmap_cache.h b/paddle/phi/core/kmap_cache.h
@@ -0,0 +1,46 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
+
+namespace phi {
+
+struct KmapCache {
+  DenseTensor* out_in_map = nullptr;
+  DenseTensor* coords = nullptr;
+  DenseTensor* hashmap_keys = nullptr;
+  DenseTensor* hashmap_values = nullptr;
+  // std::vector<int>* spatial_range;
+
+  // destructor
+  ~KmapCache() {
+    if (out_in_map) {
+      delete out_in_map;
+    }
+    if (coords) {
+      delete coords;
+    }
+    if (hashmap_keys) {
+      delete hashmap_keys;
+    }
+    if (hashmap_values) {
+      delete hashmap_values;
+    }
+  }
+};
+
+}  // namespace phi
diff --git a/paddle/phi/core/sparse_coo_tensor.h b/paddle/phi/core/sparse_coo_tensor.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/kmap_cache.h"
 #include "paddle/phi/core/tensor_base.h"
 #include "paddle/phi/core/tensor_meta.h"
 
@@ -244,6 +245,43 @@ class SparseCooTensor : public TensorBase,
     indices_dict_ = indices_dict;
   }
 
+  /// \brief set kmaps_ pointer
+  KmapCache* SetKmapCache(const std::string& key, const KmapCache& kmap) {
+    if (kmaps_ == nullptr) {
+      kmaps_ = std::make_shared<std::map<std::string, KmapCache>>();
+      kmaps_->insert({key, kmap});
+    }
+    return &kmaps_->at(key);
+  }
+
+  void SetKmaps(
+      const std::shared_ptr<std::map<std::string, KmapCache>>& kmaps) {
+    kmaps_ = kmaps;
+  }
+
+  std::shared_ptr<std::map<std::string, KmapCache>> GetKmaps() const {
+    return kmaps_;
+  }
+
+  const KmapCache* GetKmapCache(const std::string& key) const {
+    if (kmaps_ == nullptr) {
+      return nullptr;
+    }
+    const auto& iter = kmaps_->find(key);
+    if (iter == kmaps_->end()) {
+      return nullptr;
+    }
+    return &iter->second;
+  }
+
+  void ClearKmaps() {
+    if (kmaps_ != nullptr) {
+      // set shared_ptr to nullptr,
+      // if no other shared_ptr point to it, it will be released.
+      kmaps_ = nullptr;
+    }
+  }
+
  private:
   friend class DenseTensorUtils;
 
@@ -265,6 +303,9 @@ class SparseCooTensor : public TensorBase,
   std::shared_ptr<std::map<std::string, std::pair<DenseTensor, DenseTensor>>>
       indices_dict_ = nullptr;
 
+  // Sparse conv will generate a kmap, which can be reused.
+  std::shared_ptr<std::map<std::string, KmapCache>> kmaps_ = nullptr;
+
   /* --------------------------- */
   /*   example: non zero element is scalar */
   /* --------------------------- */

diff --git a/paddle/phi/infermeta/sparse/binary.cc b/paddle/phi/infermeta/sparse/binary.cc
@@ -121,6 +121,43 @@ void Conv3dInferMeta(const MetaTensor& x,
   counter->set_dims({1});
 }
 
+void Conv3dImplicitGemmInferMeta(const MetaTensor& x,
+                                 const MetaTensor& kernel,
+                                 const std::vector<int>& paddings,
+                                 const std::vector<int>& dilations,
+                                 const std::vector<int>& strides,
+                                 const int groups,
+                                 const bool subm,
+                                 const std::string& key,
+                                 MetaTensor* out) {
+  const auto& x_dims = x.dims();
+  const bool is2D = x_dims.size() == 4 ? true : false;
+  const auto& kernel_dims = kernel.dims();
+
+  int rank = is2D ? 4 : 5;
+  std::vector<int> out_dims_vec(rank, 1);
+  DDim out_dims = common::make_ddim(out_dims_vec);
+
+  std::vector<int> kernel_sizes(kernel_dims.size());
+  for (int i = 0; i < kernel_dims.size(); i++) {
+    kernel_sizes[i] = static_cast<int>(kernel_dims[i]);
+  }
+
+  std::vector<int> subm_paddings(paddings), subm_strides(strides);
+  if (subm) {
+    // the out shape of subm_conv is same as input shape
+    // reset the padding=kernel_size/2 and strides=1
+    ResetSubmKernelSizeAndStrides(kernel.dims(), &subm_paddings, &subm_strides);
+  }
+
+  GetOutShape(
+      x_dims, kernel_sizes, subm_paddings, dilations, subm_strides, &out_dims);
+
+  out->set_dtype(x.dtype());
+  out->set_dims(out_dims);
+  out->set_layout(x.layout());
+}
+
 inline const std::vector<int> PoolResetKernel(
     const std::vector<int>& kernel_sizes,
     const int in_channels,

diff --git a/paddle/phi/infermeta/sparse/binary.h b/paddle/phi/infermeta/sparse/binary.h
@@ -34,6 +34,16 @@ void Conv3dInferMeta(const MetaTensor& x,
                      MetaTensor* rulebook,
                      MetaTensor* counter);
 
+void Conv3dImplicitGemmInferMeta(const MetaTensor& x,
+                                 const MetaTensor& kernel,
+                                 const std::vector<int>& paddings,
+                                 const std::vector<int>& dilations,
+                                 const std::vector<int>& strides,
+                                 const int groups,
+                                 const bool subm,
+                                 const std::string& key,
+                                 MetaTensor* out);
+
 void Pool3dInferMeta(const MetaTensor& x,
                      const std::vector<int>& kernel_sizes,
                      const std::vector<int>& paddings,

diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt
@@ -42,6 +42,7 @@ file(
 
 if(APPLE OR WIN32)
   list(REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu")
+  list(REMOVE_ITEM kernel_cu "sparse/gpu/conv_kernel_igemm.cu")
 endif()
 
 if(NOT WITH_DGC)

diff --git a/paddle/phi/kernels/funcs/sparse/convolution.h b/paddle/phi/kernels/funcs/sparse/convolution.h
@@ -15,7 +15,9 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/common/ddim.h"
+#include "paddle/phi/core/kmap_cache.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/empty_kernel.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 
 namespace phi {

diff --git a/paddle/phi/kernels/sparse/batch_norm_kernel.cc b/paddle/phi/kernels/sparse/batch_norm_kernel.cc
@@ -59,6 +59,7 @@ void BatchNormCooKernel(const Context& dev_ctx,
                                    saved_variance,
                                    reserve_space);
   y->SetIndicesDict(x.GetIndicesDict());
+  y->SetKmaps(x.GetKmaps());
 }
 
 }  // namespace sparse

diff --git a/paddle/phi/kernels/sparse/elementwise_kernel.h b/paddle/phi/kernels/sparse/elementwise_kernel.h
@@ -91,6 +91,7 @@ void ElementWiseAddDenseKernel(const Context& dev_ctx,
     EmptyLikeCooKernel<T, Context>(dev_ctx, x, out);
     phi::AddKernel<T, Context>(dev_ctx, x.values(), y, out->mutable_values());
     out->SetIndicesDict(x.GetIndicesDict());
+    out->SetKmaps(x.GetKmaps());
   } else {
     PADDLE_THROW(
         errors::Unimplemented("Not support Sparse + Dense in GPU mode"));

diff --git a/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu b/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
@@ -171,6 +171,7 @@ void CoalesceCooGPUKernel(const GPUContext& dev_ctx,
 
   out->SetMember(out_indices, out_values, x.dims(), true);
   out->SetIndicesDict(x.GetIndicesDict());
+  out->SetKmaps(x.GetKmaps());
 }
 
 template <typename T, typename Context>