Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddle/fluid/operators/pad_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ limitations under the License. */

#pragma once

#include <utility>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"

Expand Down
12 changes: 8 additions & 4 deletions paddle/fluid/operators/pool_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
dev_ctx.SetBlob(key_pool_workspace_memory, workspace_memory);

auto src_memory =
mkldnn::memory({src_md, mkldnn_engine}, (void*)input_data);
mkldnn::memory({src_md, mkldnn_engine},
static_cast<void*>(const_cast<T*>(input_data)));
auto dst_memory =
mkldnn::memory({dst_md, mkldnn_engine}, (void*)output_data);
mkldnn::memory({dst_md, mkldnn_engine},
static_cast<void*>(const_cast<T*>(output_data)));

auto pool_prim = mkldnn::pooling_forward(*pool_pd, src_memory, dst_memory,
*workspace_memory);
Expand Down Expand Up @@ -195,9 +197,11 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
pool_bwd_desc, mkldnn_engine, *pool_pd);

auto diff_src_memory =
mkldnn::memory({diff_src_md, mkldnn_engine}, (void*)in_x_grad_data);
mkldnn::memory({diff_src_md, mkldnn_engine},
static_cast<void*>(const_cast<T*>(in_x_grad_data)));
auto diff_dst_memory =
mkldnn::memory({diff_dst_md, mkldnn_engine}, (void*)out_grad_data);
mkldnn::memory({diff_dst_md, mkldnn_engine},
static_cast<void*>(const_cast<T*>(out_grad_data)));

auto bwd_prim = mkldnn::pooling_backward(
pool_bwd_pd, diff_dst_memory, *workspace_memory, diff_src_memory);
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/operators/pool_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ limitations under the License. */

#pragma once

#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/pool_with_index_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */

#pragma once

#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/operators/prelu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/prelu_op.h"

#include <string>

namespace paddle {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/prior_box_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class PriorBoxOp : public framework::OperatorWithKernel {
bool flip = ctx->Attrs().Get<bool>("flip");

std::vector<float> aspect_ratios_vec;
ExpandAspectRatios(aspect_ratios, flip, aspect_ratios_vec);
ExpandAspectRatios(aspect_ratios, flip, &aspect_ratios_vec);

size_t num_priors = aspect_ratios_vec.size() * min_sizes.size();
if (max_sizes.size() > 0) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/prior_box_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
auto clip = ctx.Attr<bool>("clip");

std::vector<float> aspect_ratios;
ExpandAspectRatios(input_aspect_ratio, flip, aspect_ratios);
ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios);

T step_w = static_cast<T>(ctx.Attr<float>("step_w"));
T step_h = static_cast<T>(ctx.Attr<float>("step_h"));
Expand Down
18 changes: 10 additions & 8 deletions paddle/fluid/operators/prior_box_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/transform.h"
Expand All @@ -22,23 +24,23 @@ namespace operators {

inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
bool flip,
std::vector<float>& output_aspect_ratior) {
std::vector<float>* output_aspect_ratior) {
constexpr float epsilon = 1e-6;
output_aspect_ratior.clear();
output_aspect_ratior.push_back(1.0f);
output_aspect_ratior->clear();
output_aspect_ratior->push_back(1.0f);
for (size_t i = 0; i < input_aspect_ratior.size(); ++i) {
float ar = input_aspect_ratior[i];
bool already_exist = false;
for (size_t j = 0; j < output_aspect_ratior.size(); ++j) {
if (fabs(ar - output_aspect_ratior[j]) < epsilon) {
for (size_t j = 0; j < output_aspect_ratior->size(); ++j) {
if (fabs(ar - output_aspect_ratior->at(j)) < epsilon) {
already_exist = true;
break;
}
}
if (!already_exist) {
output_aspect_ratior.push_back(ar);
output_aspect_ratior->push_back(ar);
if (flip) {
output_aspect_ratior.push_back(1.0f / ar);
output_aspect_ratior->push_back(1.0f / ar);
}
}
}
Expand Down Expand Up @@ -68,7 +70,7 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
auto clip = ctx.Attr<bool>("clip");

std::vector<float> aspect_ratios;
ExpandAspectRatios(input_aspect_ratio, flip, aspect_ratios);
ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios);

T step_w = static_cast<T>(ctx.Attr<float>("step_w"));
T step_h = static_cast<T>(ctx.Attr<float>("step_h"));
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/rank_loss_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/rank_loss_op.h"
#include <string>

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/recv_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <future> // NOLINT
#include <ostream>

#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"

#include <future>
#include "paddle/fluid/operators/detail/grpc_client.h"

namespace paddle {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/operators/roi_pool_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <algorithm>
#include <limits>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/strided_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
const framework::DDim& src_stride,
const framework::DDim& dst_dim,
const framework::DDim& dst_stride, T* dst) {
using namespace detail;
StridedCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
paddle::operators::detail::StridedCopyDimVisitor<T> func(
dev_ctx, src, src_stride, dst_stride, dst);
boost::apply_visitor(func, dst_dim);
}

Expand Down
83 changes: 42 additions & 41 deletions paddle/fluid/operators/top_k_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/platform/assert.h"

namespace paddle {
Expand Down Expand Up @@ -133,71 +134,71 @@ __device__ __forceinline__ void GetTopK(Pair<T> topk[], const T* val, int* col,
}

template <typename T, int MaxLength, int BlockSize>
__device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int& beam,
__device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
int beam_size, const T* src,
bool& firstStep, bool& is_empty,
Pair<T>& max, int dim,
bool* firstStep, bool* is_empty,
Pair<T>* max, int dim,
const int tid) {
if (beam > 0) {
int length = beam < beam_size ? beam : beam_size;
if (firstStep) {
firstStep = false;
if (*beam > 0) {
int length = (*beam) < beam_size ? *beam : beam_size;
if (*firstStep) {
*firstStep = false;
GetTopK<T, BlockSize>(topk, src, tid, dim, length);
} else {
for (int k = 0; k < MaxLength; k++) {
if (k < MaxLength - beam) {
topk[k] = topk[k + beam];
if (k < MaxLength - (*beam)) {
topk[k] = topk[k + *beam];
} else {
topk[k].set(-INFINITY, -1);
}
}
if (!is_empty) {
GetTopK<T, BlockSize>(topk + MaxLength - beam, src, tid, dim, max,
if (!(*is_empty)) {
GetTopK<T, BlockSize>(topk + MaxLength - *beam, src, tid, dim, *max,
length);
}
}

max = topk[MaxLength - 1];
if (max.v == -1) is_empty = true;
beam = 0;
*max = topk[MaxLength - 1];
if ((*max).v == -1) *is_empty = true;
*beam = 0;
}
}

template <typename T, int MaxLength, int BlockSize>
__device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int& beam,
__device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
int beam_size, const T* val,
int* col, bool& firstStep,
bool& is_empty, Pair<T>& max,
int* col, bool* firstStep,
bool* is_empty, Pair<T>* max,
int dim, const int tid) {
if (beam > 0) {
int length = beam < beam_size ? beam : beam_size;
if (firstStep) {
firstStep = false;
if (*beam > 0) {
int length = (*beam) < beam_size ? *beam : beam_size;
if (*firstStep) {
*firstStep = false;
GetTopK<T, BlockSize>(topk, val, col, tid, dim, length);
} else {
for (int k = 0; k < MaxLength; k++) {
if (k < MaxLength - beam) {
topk[k] = topk[k + beam];
if (k < MaxLength - *beam) {
topk[k] = topk[k + *beam];
} else {
topk[k].set(-INFINITY, -1);
}
}
if (!is_empty) {
GetTopK<T, BlockSize>(topk + MaxLength - beam, val, col, tid, dim, max,
if (!(*is_empty)) {
GetTopK<T, BlockSize>(topk + MaxLength - *beam, val, col, tid, dim, max,
length);
}
}

max = topk[MaxLength - 1];
if (max.v == -1) is_empty = true;
beam = 0;
*max = topk[MaxLength - 1];
if ((*max).v == -1) *is_empty = true;
*beam = 0;
}
}

template <typename T, int MaxLength, int BlockSize>
__device__ __forceinline__ void BlockReduce(Pair<T>* sh_topk, int* maxid,
Pair<T> topk[], T** topVal,
int64_t** topIds, int& beam, int& k,
int64_t** topIds, int* beam, int* k,
const int tid, const int warp) {
while (true) {
__syncthreads();
Expand Down Expand Up @@ -225,17 +226,17 @@ __device__ __forceinline__ void BlockReduce(Pair<T>* sh_topk, int* maxid,
(*topVal)++;
(*topIds)++;
}
if (tid == maxid[0]) beam++;
if (--k == 0) break;
if (tid == maxid[0]) (*beam)++;
if (--(*k) == 0) break;
__syncthreads();

if (tid == maxid[0]) {
if (beam < MaxLength) {
sh_topk[tid] = topk[beam];
if (*beam < MaxLength) {
sh_topk[tid] = topk[*beam];
}
}
if (maxid[0] / 32 == warp) {
if (__shfl(beam, (maxid[0]) % 32, 32) == MaxLength) break;
if (__shfl(*beam, (maxid[0]) % 32, 32) == MaxLength) break;
}
}
}
Expand Down Expand Up @@ -268,13 +269,13 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
topk[k].set(-INFINITY, -1);
}
while (k) {
ThreadGetTopK<T, MaxLength, BlockSize>(topk, beam, k,
src + blockIdx.x * lds, firststep,
is_empty, max, dim, tid);
ThreadGetTopK<T, MaxLength, BlockSize>(topk, &beam, k,
src + blockIdx.x * lds, &firststep,
&is_empty, &max, dim, tid);

sh_topk[tid] = topk[0];
BlockReduce<T, MaxLength, BlockSize>(sh_topk, maxid, topk, &output,
&indices, beam, k, tid, warp);
&indices, &beam, &k, tid, warp);
}
}

Expand Down Expand Up @@ -308,9 +309,9 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
KeMatrixTopK<T, 5, 256><<<
grid, threads, 0, reinterpret_cast<const platform::CUDADeviceContext&>(
ctx.device_context())
.stream()>>>(output_data, output->dims()[1],
indices_data, input_data,
input_width, input_width, int(k));
.stream()>>>(
output_data, output->dims()[1], indices_data, input_data, input_width,
input_width, static_cast<int>(k));
}
};

Expand Down