PaddlePaddle · sneaxiy · Sep 13, 2021 · Aug 26, 2021 · Aug 26, 2021 · Aug 27, 2021
diff --git a/paddle/fluid/operators/math/algorithm.h b/paddle/fluid/operators/math/algorithm.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@ namespace operators {
 namespace math {
 
 template <typename T>
-HOSTDEVICE inline int64_t BinarySearch(const T *x, int64_t num, const T &val) {
+HOSTDEVICE inline int64_t BinarySearch(const T *x, size_t num, const T &val) {
   int64_t beg = 0, end = num - 1;
   while (beg <= end) {
     auto mid = ((beg + end) >> 1);
@@ -39,8 +39,8 @@ HOSTDEVICE inline int64_t BinarySearch(const T *x, int64_t num, const T &val) {
   return -1;
 }
 
-template <typename T>
-HOSTDEVICE inline size_t LowerBound(const T *x, size_t num, const T &val) {
+template <typename T1, typename T2>
+HOSTDEVICE inline size_t LowerBound(const T1 *x, size_t num, const T2 &val) {
 #if defined(__CUDA_ARCH__) || defined(__HIPCC__)  // @{ Group LowerBound
   // The following code is from
   // https://en.cppreference.com/w/cpp/algorithm/lower_bound
@@ -62,8 +62,8 @@ HOSTDEVICE inline size_t LowerBound(const T *x, size_t num, const T &val) {
 #endif  // @} End Group LowerBound
 }
 
-template <typename T>
-HOSTDEVICE inline size_t UpperBound(const T *x, size_t num, const T &val) {
+template <typename T1, typename T2>
+HOSTDEVICE inline size_t UpperBound(const T1 *x, size_t num, const T2 &val) {
 #if defined(__CUDA_ARCH__) || defined(__HIPCC__)  // @{ Group UpperBound
   // The following code is from
   // https://en.cppreference.com/w/cpp/algorithm/upper_bound

diff --git a/paddle/fluid/operators/searchsorted_op.cc b/paddle/fluid/operators/searchsorted_op.cc
@@ -0,0 +1,131 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/searchsorted_op.h"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace operators {
+
+class SearchSortedOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  static bool SearchsortedDimsMatchedBeforeLastDim(
+      const framework::DDim& sequences_dims,
+      const framework::DDim& values_dims) {
+    if (sequences_dims.size() != values_dims.size()) {
+      return false;
+    }
+    const auto& sequences_dims_size = sequences_dims.size();
+    for (int64_t dim = 0; dim < sequences_dims_size - 1; ++dim) {
+      if (sequences_dims[dim] != values_dims[dim]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("SortedSequence"), "Input", "SortedSequence",
+                   "searchsorted");
+    OP_INOUT_CHECK(ctx->HasInput("Values"), "Input", "Values", "searchsorted");
+
+    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "searchsorted");
+
+    auto sequences_dims = ctx->GetInputDim("SortedSequence");
+    auto values_dims = ctx->GetInputDim("Values");
+    auto out_int32 = ctx->Attrs().Get<bool>("out_int32");
+
+    if (sequences_dims.size() != 1)
+      PADDLE_ENFORCE_EQ(
+          SearchsortedDimsMatchedBeforeLastDim(sequences_dims, values_dims),
+          true,
+          platform::errors::Unavailable("The sorted_sequence tensor should be "
+                                        "1 dimension or the first N-1 "
+                                        "dimensions of sorted_sequence tensor "
+                                        "and input values tensor must "
+                                        "match, but we got sorted_sequence "
+                                        "tensor ( %s ), and input value "
+                                        "tensor ( %s )",
+                                        sequences_dims, values_dims));
+
+    if (out_int32) {
+      PADDLE_ENFORCE_LT(
+          sequences_dims[sequences_dims.size() - 1],
+          std::numeric_limits<int>::max(),
+          platform::errors::Unavailable(
+              "the size of sorted_sequence last dimension should be less than "
+              "%d but we got %d",
+              std::numeric_limits<int>::max(),
+              sequences_dims[sequences_dims.size() - 1]));
+    }
+
+    ctx->SetOutputDim("Out", values_dims);
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    auto data_type =
+        OperatorWithKernel::IndicateVarDataType(ctx, "SortedSequence");
+    return framework::OpKernelType(data_type, ctx.device_context());
+  }
+};
+
+class SearchSortedOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("SortedSequence",
+             "(Tensor), N-D or 1-D tensor, containing monotonically increasing "
+             "sequence on the innermost dimension.");
+    AddInput(
+        "Values",
+        "(Tensor), N-D tensor or a Scalar containing the search value(s).");
+    AddOutput("Out", "(Tensor), The output tensor of searchsorted op.");
+    AddAttr<bool>("out_int32",
+                  "the output tensor is int64_t type if False and int(32bit "
+                  "normally) type if True.")
+        .SetDefault(false);
+    AddAttr<bool>(
+        "right",
+        "corresponding to lower bound if False and upper bound if True")
+        .SetDefault(false);
+
+    AddComment(R"DOC(
+  Searchsorted Operator.
+
+  This operator is used to find the indices of the value from the innermost dimension of sorted_sequence 
+
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(searchsorted, ops::SearchSortedOp, ops::SearchSortedOpMaker);
+
+REGISTER_OP_CPU_KERNEL(
+    searchsorted,
+    ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/searchsorted_op.cu b/paddle/fluid/operators/searchsorted_op.cu
@@ -0,0 +1,23 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/searchsorted_op.h"
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_CUDA_KERNEL(
+    searchsorted, ops::SearchSortedKernel<plat::CUDADeviceContext, float>,
+    ops::SearchSortedKernel<plat::CUDADeviceContext, double>,
+    ops::SearchSortedKernel<plat::CUDADeviceContext, int>,
+    ops::SearchSortedKernel<plat::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/searchsorted_op.h b/paddle/fluid/operators/searchsorted_op.h
@@ -0,0 +1,156 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ddim.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/operators/math/algorithm.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/for_range.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+
+template <typename T1, typename T2, typename OutType>
+class GpuAndCpuSearchSortedCompute {
+ public:
+  HOSTDEVICE GpuAndCpuSearchSortedCompute(const T1* sequence_data,
+                                          const T2* value_data, bool right,
+                                          bool is_1d_boundaries,
+                                          int64_t val_size, int64_t seq_size,
+                                          OutType* out_data)
+      : sequence_data_(sequence_data),
+        value_data_(value_data),
+        right_(right),
+        is_1d_boundaries_(is_1d_boundaries),
+        val_size_(val_size),
+        seq_size_(seq_size),
+        out_data_(out_data) {}
+  HOSTDEVICE void operator()(int64_t idx) {
+    const T2* value_ptr = value_data_ + idx;
+    const T1* sequence_ptr = is_1d_boundaries_
+                                 ? sequence_data_
+                                 : sequence_data_ + idx / val_size_ * seq_size_;
+    if (std::isnan(*value_ptr) || std::isinf(*value_ptr)) {
+      out_data_[idx] = 0;
+    } else {
+      if (right_) {
+        out_data_[idx] = static_cast<OutType>(
+            math::UpperBound<T1, T2>(sequence_ptr, seq_size_, *value_ptr));
+      } else {
+        out_data_[idx] = static_cast<OutType>(
+            math::LowerBound<T1, T2>(sequence_ptr, seq_size_, *value_ptr));
+      }
+    }
+  }
+
+ private:
+  const T1* sequence_data_;
+  const T2* value_data_;
+  bool right_;
+  bool is_1d_boundaries_;
+  int64_t val_size_;
+  int64_t seq_size_;
+  OutType* out_data_;
+};
+
+template <typename DeviceContext, typename T1, typename OutType>
+class SearchSortedFunctor {
+ public:
+  SearchSortedFunctor(const framework::ExecutionContext& context,
+                      const framework::Tensor* sorted_sequence,
+                      const framework::Tensor* value, bool right,
+                      OutType* out_data)
+      : context_(context),
+        sorted_sequence_(sorted_sequence),
+        value_(value),
+        right_(right),
+        out_data_(out_data) {}
+
+  template <typename T2>
+  void apply() {
+    const T1* sequence_data = sorted_sequence_->data<T1>();
+    const T2* value_data = value_->data<T2>();
+    const framework::DDim& seq_dims = sorted_sequence_->dims();
+    const framework::DDim& val_dims = value_->dims();
+
+    bool is_1d_boundaries = seq_dims.size() == 1;
+    int64_t val_size = val_dims[val_dims.size() - 1];
+    int64_t seq_size = seq_dims[seq_dims.size() - 1];
+
+    auto& dev_ctx = context_.template device_context<DeviceContext>();
+    platform::ForRange<DeviceContext> for_range(dev_ctx, value_->numel());
+    GpuAndCpuSearchSortedCompute<T1, T2, OutType>
+        gpu_and_cpu_search_sorted_compute(sequence_data, value_data, right_,
+                                          is_1d_boundaries, val_size, seq_size,
+                                          out_data_);
+    for_range(gpu_and_cpu_search_sorted_compute);
+  }
+
+ private:
+  const framework::ExecutionContext& context_;
+  const framework::Tensor* sorted_sequence_;
+  const framework::Tensor* value_;
+  bool right_;
+  OutType* out_data_;
+};
+
+template <typename Visitor>
+static void VisitDataType(framework::proto::VarType::Type type,
+                          Visitor visitor) {
+  if (type == framework::proto::VarType::FP32) {
+    visitor.template apply<float>();
+  } else if (type == framework::proto::VarType::FP64) {
+    visitor.template apply<double>();
+  } else if (type == framework::proto::VarType::INT32) {
+    visitor.template apply<int>();
+  } else if (type == framework::proto::VarType::INT64) {
+    visitor.template apply<int64_t>();
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "The given values datatype of searchsorted operators must be float32, "
+        "float64, int32 or int64, but the recieved values datatype of "
+        "searchsorted operators is %s",
+        framework::DataTypeToString(type)));
+  }
+}
+
+template <typename DeviceContext, typename T>
+class SearchSortedKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* sorted_sequence = context.Input<Tensor>("SortedSequence");
+    auto* value = context.Input<Tensor>("Values");
+    bool out_int32 = context.Attr<bool>("out_int32");
+    bool right = context.Attr<bool>("right");
+    auto* out = context.Output<Tensor>("Out");
+
+    if (out_int32) {
+      int* out_data = out->mutable_data<int>(context.GetPlace());
+      SearchSortedFunctor<DeviceContext, T, int> functor(
+          context, sorted_sequence, value, right, out_data);
+      VisitDataType(value->type(), functor);
+    } else {
+      int64_t* out_data = out->mutable_data<int64_t>(context.GetPlace());
+      SearchSortedFunctor<DeviceContext, T, int64_t> functor(
+          context, sorted_sequence, value, right, out_data);
+      VisitDataType(value->type(), functor);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
@@ -231,6 +231,7 @@
 from .tensor.search import argmax  # noqa: F401
 from .tensor.search import argmin  # noqa: F401
 from .tensor.search import argsort  # noqa: F401
+from .tensor.search import searchsorted  # noqa: F401
 from .tensor.search import masked_select  # noqa: F401
 from .tensor.search import topk  # noqa: F401
 from .tensor.search import where  # noqa: F401