Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions paddle/fluid/operators/math/algorithm.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,8 +39,8 @@ HOSTDEVICE inline int64_t BinarySearch(const T *x, int64_t num, const T &val) {
return -1;
}

template <typename T>
HOSTDEVICE inline size_t LowerBound(const T *x, size_t num, const T &val) {
template <typename T1, typename T2>
HOSTDEVICE inline size_t LowerBound(const T1 *x, size_t num, const T2 &val) {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__) // @{ Group LowerBound
// The following code is from
// https://en.cppreference.com/w/cpp/algorithm/lower_bound
Expand All @@ -62,8 +62,8 @@ HOSTDEVICE inline size_t LowerBound(const T *x, size_t num, const T &val) {
#endif // @} End Group LowerBound
}

template <typename T>
HOSTDEVICE inline size_t UpperBound(const T *x, size_t num, const T &val) {
template <typename T1, typename T2>
HOSTDEVICE inline size_t UpperBound(const T1 *x, size_t num, const T2 &val) {
#if defined(__CUDA_ARCH__) || defined(__HIPCC__) // @{ Group UpperBound
// The following code is from
// https://en.cppreference.com/w/cpp/algorithm/upper_bound
Expand Down
130 changes: 130 additions & 0 deletions paddle/fluid/operators/searchsorted_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/searchsorted_op.h"

#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace operators {

class SearchSortedOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
static bool SearchsortedDimsMatchedBeforeLastDim(
const framework::DDim& sequences_dims,
const framework::DDim& values_dims) {
if (sequences_dims.size() != values_dims.size()) {
return false;
}
const auto& sequences_dims_size = sequences_dims.size();
for (int64_t dim = 0; dim < sequences_dims_size - 1; ++dim) {
if (sequences_dims[dim] != values_dims[dim]) {
return false;
}
}
return true;
}

void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("SortedSequence"), "Input", "SortedSequence",
"searchsorted");
OP_INOUT_CHECK(ctx->HasInput("Values"), "Input", "Values", "searchsorted");

OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "searchsorted");

auto sequences_dims = ctx->GetInputDim("SortedSequence");
auto values_dims = ctx->GetInputDim("Values");
auto out_int32 = ctx->Attrs().Get<bool>("out_int32");

if (sequences_dims.size() != 1) {
PADDLE_ENFORCE_EQ(
SearchsortedDimsMatchedBeforeLastDim(sequences_dims, values_dims),
true,
platform::errors::Unavailable(
"The dimensions of sorted_sequence tensor ( %s ) and "
"values tensor ( %s ) can not match. Because the input "
"sorted_sequence tensor must be "
"1 dimension or the first N-1 "
"dimensions of sorted_sequence tensor "
"and input values tensor must "
"match. Please input appropriate sorted_sequence and values "
"again! ",
sequences_dims, values_dims));
}

if (out_int32) {
PADDLE_ENFORCE_LT(
sequences_dims[sequences_dims.size() - 1],
std::numeric_limits<int>::max(),
platform::errors::Unavailable(
"The size of sorted_sequence %d exceed the maximum limit "
"d%. Because the size of sorted_sequence should be less than the "
"output maximum value for int32 bit. Please set appropriate "
"sorted_sequence to meet this requirement! ",
sequences_dims[sequences_dims.size() - 1],
std::numeric_limits<int>::max()));
}

ctx->SetOutputDim("Out", values_dims);
}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type =
OperatorWithKernel::IndicateVarDataType(ctx, "SortedSequence");
return framework::OpKernelType(data_type, ctx.device_context());
}
};

class SearchSortedOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("SortedSequence",
"(Tensor), N-D or 1-D tensor, The value of the tensor"
"monotonically increases in the innermost dimension.");
AddInput("Values",
"(Tensor or Scalar), N-D tensor or a Scalar given values.");
AddOutput("Out", "(Tensor), The output tensor of searchsorted op.");
AddAttr<bool>("out_int32",
"the output tensor is int64 type if False and int32"
"normally type if True.")
.SetDefault(false);
AddAttr<bool>(
"right",
"corresponding to lower bound if False and upper bound if True")
.SetDefault(false);

AddComment(R"DOC(
Searchsorted Operator.

This operator is used to find the index of the given value from the innermost dimension of sorted_sequence

)DOC");
}
};
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OPERATOR(searchsorted, ops::SearchSortedOp, ops::SearchSortedOpMaker);

REGISTER_OP_CPU_KERNEL(
searchsorted,
ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, float>,
ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, double>,
ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, int>,
ops::SearchSortedKernel<paddle::platform::CPUDeviceContext, int64_t>);
23 changes: 23 additions & 0 deletions paddle/fluid/operators/searchsorted_op.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/searchsorted_op.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_CUDA_KERNEL(
searchsorted, ops::SearchSortedKernel<plat::CUDADeviceContext, float>,
ops::SearchSortedKernel<plat::CUDADeviceContext, double>,
ops::SearchSortedKernel<plat::CUDADeviceContext, int>,
ops::SearchSortedKernel<plat::CUDADeviceContext, int64_t>);
170 changes: 170 additions & 0 deletions paddle/fluid/operators/searchsorted_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <math.h>

#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/algorithm.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/for_range.h"

namespace paddle {
namespace operators {
using Tensor = framework::Tensor;

template <typename T1, typename T2, typename OutType>
class GpuAndCpuSearchSortedCompute {
public:
static HOSTDEVICE bool IsNan(float x) { return ::isnan(x); }
static HOSTDEVICE bool IsNan(double x) { return ::isnan(x); }
static HOSTDEVICE bool IsNan(int x) { return false; }
static HOSTDEVICE bool IsNan(int64_t x) { return false; }

static HOSTDEVICE bool IsInf(float x) { return ::isinf(x); }
static HOSTDEVICE bool IsInf(double x) { return ::isinf(x); }
static HOSTDEVICE bool IsInf(int x) { return false; }
static HOSTDEVICE bool IsInf(int64_t x) { return false; }

HOSTDEVICE GpuAndCpuSearchSortedCompute(const T1* sequence_data,
const T2* value_data, bool right,
bool is_1d_boundaries,
int64_t val_size, int64_t seq_size,
OutType* out_data)
: sequence_data_(sequence_data),
value_data_(value_data),
right_(right),
is_1d_boundaries_(is_1d_boundaries),
val_size_(val_size),
seq_size_(seq_size),
out_data_(out_data) {}
HOSTDEVICE void operator()(int64_t idx) {
const T2* value_ptr = value_data_ + idx;
const T1* sequence_ptr = is_1d_boundaries_
? sequence_data_
: sequence_data_ + idx / val_size_ * seq_size_;
if (IsInf(*value_ptr) || IsNan(*value_ptr)) {
out_data_[idx] = seq_size_;
} else {
if (right_) {
out_data_[idx] = static_cast<OutType>(
math::UpperBound<T1, T2>(sequence_ptr, seq_size_, *value_ptr));
} else {
out_data_[idx] = static_cast<OutType>(
math::LowerBound<T1, T2>(sequence_ptr, seq_size_, *value_ptr));
}
}
}

private:
const T1* sequence_data_;
const T2* value_data_;
bool right_;
bool is_1d_boundaries_;
int64_t val_size_;
int64_t seq_size_;
OutType* out_data_;
};

template <typename DeviceContext, typename T1, typename OutType>
class SearchSortedFunctor {
public:
SearchSortedFunctor(const framework::ExecutionContext& context,
const framework::Tensor* sorted_sequence,
const framework::Tensor* value, bool right,
OutType* out_data)
: context_(context),
sorted_sequence_(sorted_sequence),
value_(value),
right_(right),
out_data_(out_data) {}

template <typename T2>
void apply() {
const T1* sequence_data = sorted_sequence_->data<T1>();
const T2* value_data = value_->data<T2>();
const framework::DDim& seq_dims = sorted_sequence_->dims();
const framework::DDim& val_dims = value_->dims();

bool is_1d_boundaries = seq_dims.size() == 1;
int64_t val_size = val_dims[val_dims.size() - 1];
int64_t seq_size = seq_dims[seq_dims.size() - 1];

auto& dev_ctx = context_.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(dev_ctx, value_->numel());
GpuAndCpuSearchSortedCompute<T1, T2, OutType>
gpu_and_cpu_search_sorted_compute(sequence_data, value_data, right_,
is_1d_boundaries, val_size, seq_size,
out_data_);
for_range(gpu_and_cpu_search_sorted_compute);
}

private:
const framework::ExecutionContext& context_;
const framework::Tensor* sorted_sequence_;
const framework::Tensor* value_;
bool right_;
OutType* out_data_;
};

template <typename Visitor>
static void VisitDataType(framework::proto::VarType::Type type,
Visitor visitor) {
if (type == framework::proto::VarType::FP32) {
visitor.template apply<float>();
} else if (type == framework::proto::VarType::FP64) {
visitor.template apply<double>();
} else if (type == framework::proto::VarType::INT32) {
visitor.template apply<int>();
} else if (type == framework::proto::VarType::INT64) {
visitor.template apply<int64_t>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The recieved values data type %s can not meet "
"inputrequirements.Because the given values data type of searchsorted "
"operators must be float32, float64, int32 or int64. Please input "
"appropriate sorted_sequence again! ",
framework::DataTypeToString(type)));
}
}

template <typename DeviceContext, typename T>
class SearchSortedKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* sorted_sequence = context.Input<Tensor>("SortedSequence");
auto* value = context.Input<Tensor>("Values");
bool out_int32 = context.Attr<bool>("out_int32");
bool right = context.Attr<bool>("right");
auto* out = context.Output<Tensor>("Out");

if (out_int32) {
int* out_data = out->mutable_data<int>(context.GetPlace());
SearchSortedFunctor<DeviceContext, T, int> functor(
context, sorted_sequence, value, right, out_data);
VisitDataType(value->type(), functor);
} else {
int64_t* out_data = out->mutable_data<int64_t>(context.GetPlace());
SearchSortedFunctor<DeviceContext, T, int64_t> functor(
context, sorted_sequence, value, right, out_data);
VisitDataType(value->type(), functor);
}
}
};

} // namespace operators
} // namespace paddle
1 change: 1 addition & 0 deletions python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@
from .tensor.search import argmax # noqa: F401
from .tensor.search import argmin # noqa: F401
from .tensor.search import argsort # noqa: F401
from .tensor.search import searchsorted # noqa: F401
from .tensor.search import masked_select # noqa: F401
from .tensor.search import topk # noqa: F401
from .tensor.search import where # noqa: F401
Expand Down
Loading