Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions paddle/fluid/inference/tensorrt/convert/activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,9 @@ class STanhOpConverter : public ActivationOpConverter {
STanhOpConverter() { op_type_ = "stanh"; }
};

class ThreasholdedReluOpConverter : public ActivationOpConverter {
class ThresholdedReluOpConverter : public ActivationOpConverter {
public:
ThreasholdedReluOpConverter() { op_type_ = "thresholded_relu"; }
ThresholdedReluOpConverter() { op_type_ = "thresholded_relu"; }
};
#endif

Expand All @@ -201,5 +201,5 @@ REGISTER_TRT_OP_CONVERTER(selu, SeluOpConverter);
REGISTER_TRT_OP_CONVERTER(softsign, SoftsignOpConverter);
REGISTER_TRT_OP_CONVERTER(softplus, SoftplusOpConverter);
REGISTER_TRT_OP_CONVERTER(stanh, STanhOpConverter);
REGISTER_TRT_OP_CONVERTER(thresholded_relu, ThreasholdedReluOpConverter);
REGISTER_TRT_OP_CONVERTER(thresholded_relu, ThresholdedReluOpConverter);
#endif
8 changes: 4 additions & 4 deletions paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class AffineChannelOpConverter : public OpConverter {
std::string output_name = op_desc.Output("Out").front();

auto input_tensor = engine_->GetITensor(input_name);
auto idim = input_tensor->getDimensions();
auto input_dim = input_tensor->getDimensions();

auto* scale_v = scope.FindVar(scale_name);
auto* scale_t = scale_v->GetMutable<phi::DenseTensor>();
Expand All @@ -49,17 +49,17 @@ class AffineChannelOpConverter : public OpConverter {
engine_->GetFp32TrtWeight(bias_name, *bias_t).get().values));

// tensorrt scalend layer only support spatial dims >= 2,
// so nhwc is not availabe (spatial dims == 0)
// so nhwc is not available (spatial dims == 0)
const int channel_axis = engine_->with_dynamic_shape();

TensorRTEngine::Weight scale_weights{
nvinfer1::DataType::kFLOAT,
static_cast<void*>(scale_ptr),
static_cast<size_t>(idim.d[channel_axis])};
static_cast<size_t>(input_dim.d[channel_axis])};
TensorRTEngine::Weight bias_weights{
nvinfer1::DataType::kFLOAT,
static_cast<void*>(bias_ptr),
static_cast<size_t>(idim.d[channel_axis])};
static_cast<size_t>(input_dim.d[channel_axis])};
TensorRTEngine::Weight power_weights{
nvinfer1::DataType::kFLOAT, nullptr, 0};

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/tensorrt/convert/bitwise_not_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BitwiseNotConverter : public OpConverter {
nvinfer1::Dims input_dims = input_tensor->getDimensions();

// set up a elementwise -1 tensor, can not get the dims info for
// dynamic_shape so just let it broadcaste
// dynamic_shape so just let it broadcast
nvinfer1::Dims neg_one_tensor_dims;
neg_one_tensor_dims.nbDims = input_dims.nbDims;
for (int i = 0; i < input_dims.nbDims; ++i) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void ConvertConv3d(TensorRTEngine* engine,
auto* Y_v = scope.FindVar(filter_var_name);
PADDLE_ENFORCE_NOT_NULL(
Y_v,
platform::errors::NotFound("Can not find %s presistale var in scope.",
platform::errors::NotFound("Can not find %s presistable var in scope.",
filter_var_name));
auto* Y_t = Y_v->GetMutable<phi::DenseTensor>();
bool enable_int8 = op_desc.HasAttr("enable_int8");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a cross_multihead_mamul op to a corresponding tensorrt "
"network structure";
VLOG(3)
<< "convert a cross_multihead_matmul op to a corresponding tensorrt "
"network structure";
bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
if (engine_->precision() == phi::DataType::INT8) {
with_fp16 = true;
Expand Down Expand Up @@ -109,7 +110,7 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
weight_q,
bias_q);
fc_q_layer->setName(
("multihead_mamul_fc_q(Output: " + output_name + ")").c_str());
("multihead_matmul_fc_q(Output: " + output_name + ")").c_str());

// add shuffle for fc layer
auto* reshape_after_fc_q_layer =
Expand Down Expand Up @@ -211,7 +212,7 @@ class CrossMultiheadMatMulOpConverter : public OpConverter {
weight_kv,
bias_kv);
fc_layer->setName(
("multihead_mamul_fc(Output: " + output_name + ")").c_str());
("multihead_matmul_fc(Output: " + output_name + ")").c_str());

// add shuffle for fc layer
auto* reshape_after_fc_layer =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class DequantizeLinearOpConverter : public OpConverter {
// Create constant layer for scale
PADDLE_ENFORCE_NOT_NULL(
scale_var,
platform::errors::NotFound("Can not find %s presistale var in scope.",
platform::errors::NotFound("Can not find %s presistable var in scope.",
op_desc.Input("Scale")[0]));
auto* scale_t = scale_var->GetMutable<phi::DenseTensor>();
int n_scale = scale_t->numel();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ namespace tensorrt {

class FlashMultiheadMatMulOpConverter : public OpConverter {
public:
void flash_multihead_mamul_trt(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) {
VLOG(3) << "convert a flash_multihead_mamul op to a corresponding tensorrt "
"network structure\n";
void flash_multihead_matmul_trt(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) {
VLOG(3)
<< "convert a flash_multihead_matmul op to a corresponding tensorrt "
"network structure\n";

bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
if (engine_->precision() == phi::DataType::INT8) {
Expand Down Expand Up @@ -138,7 +139,7 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
weight,
bias);
fc_layer->setName(
("multihead_mamul_fc(Output: " + output_name + ")").c_str());
("multihead_matmul_fc(Output: " + output_name + ")").c_str());
// add shuffle for fc layer
reshape_before_mha_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
Expand Down Expand Up @@ -243,10 +244,10 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
layer, "flash_multihead_matmul", {output_name}, test_mode);
}

void flash_multihead_mamul(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) {
VLOG(3) << "convert a flash_multihead_mamul op to a "
void flash_multihead_matmul(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) {
VLOG(3) << "convert a flash_multihead_matmul op to a "
"MemoryEfficientAttention OP "
"network structure\n";
framework::OpDesc op_desc(op, nullptr);
Expand Down Expand Up @@ -310,7 +311,7 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
hidden_out,
weight,
bias);
qkv_fc_layers[i]->setName(("multihead_mamul_fc_" + std::to_string(i) +
qkv_fc_layers[i]->setName(("multihead_matmul_fc_" + std::to_string(i) +
"_(Output: " + output_name + ")")
.c_str());
} else {
Expand All @@ -334,7 +335,7 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
matrix_operation_x,
*weight_reshape_before_mm[i]->getOutput(0),
matrix_operation_y);
qkv_fc_layers[i]->setName(("multihead_mamul_matmul_" +
qkv_fc_layers[i]->setName(("multihead_matmul_matmul_" +
std::to_string(i) +
"_(Output: " + output_name + ")")
.c_str());
Expand Down Expand Up @@ -499,9 +500,9 @@ class FlashMultiheadMatMulOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
bool use_trt_fma = PADDLE_GET_CONST(bool, op_desc.GetAttr("use_trt_fma"));
if (use_trt_fma) {
flash_multihead_mamul_trt(op, scope, test_mode);
flash_multihead_matmul_trt(op, scope, test_mode);
} else {
flash_multihead_mamul(op, scope, test_mode);
flash_multihead_matmul(op, scope, test_mode);
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class CustomPluginCreater : public OpConverter {
const framework::Scope &scope,
bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "convert " << op_desc.Type() << " op to custom pluign layer";
VLOG(3) << "convert " << op_desc.Type() << " op to custom plugin layer";

std::string plugin_name;

Expand Down Expand Up @@ -175,7 +175,7 @@ class GenericPluginCreater : public OpConverter {
const framework::Scope &scope,
bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "convert " << op_desc.Type() << " op to generic pluign layer";
VLOG(3) << "convert " << op_desc.Type() << " op to generic plugin layer";

CHECK(block_);
const framework::BlockDesc block_desc(
Expand Down Expand Up @@ -259,7 +259,7 @@ class CustomGenericPluginCreater : public OpConverter {
bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "convert " << op_desc.Type()
<< " op to custom generic pluign layer";
<< " op to custom generic plugin layer";

nvinfer1::ILayer *layer = nullptr;
std::vector<nvinfer1::ITensor *> inputs;
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a multihead_mamul op to a corresponding tensorrt "
VLOG(3) << "convert a multihead_matmul op to a corresponding tensorrt "
"network structure";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
Expand Down Expand Up @@ -377,7 +377,7 @@ class MultiheadMatMulOpConverter : public OpConverter {

reshape_before_multihead_layer->setInput(1, *Concat(reshape_tensor));
reshape_before_multihead_layer->setName(
("reshape_before_multihead_mamul(Output: " + output_name + ")")
("reshape_before_multihead_matmul(Output: " + output_name + ")")
.c_str());

if (op_desc.HasAttr("fc_out_threshold")) {
Expand Down Expand Up @@ -625,7 +625,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
bias);
}
fc_layer->setName(
("multihead_mamul_fc(Output: " + output_name + ")").c_str());
("multihead_matmul_fc(Output: " + output_name + ")").c_str());

// add shuffle for CustomQKVToContextPluginDynamic layer
auto* reshape_after_fc_layer =
Expand Down Expand Up @@ -798,7 +798,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
reshape_before_fc_layer->setInput(
1, *Concat(reshape_before_fc_shape_tensor));
reshape_before_fc_layer->setName(
("shuffle_before_multihead_mamul(Output: " + output_name + ")")
("shuffle_before_multihead_matmul(Output: " + output_name + ")")
.c_str());

// add layer fc
Expand Down Expand Up @@ -834,7 +834,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
engine_->SetTensorDynamicRange(fc_layer->getOutput(0), out_scale);
}
fc_layer->setName(
("multihead_mamul_fc(Output: " + output_name + ")").c_str());
("multihead_matmul_fc(Output: " + output_name + ")").c_str());

// no need to add shuffle after fc, just change it in
// QkvToContextPluginDynamic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class MultiheadMatMulRoformerOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a multihead_mamul_roformer op to a corresponding "
VLOG(3) << "convert a multihead_matmul_roformer op to a corresponding "
"tensorrt "
"network structure";
framework::OpDesc op_desc(op, nullptr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class QkMultiheadMatMulOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a qk_multihead_mamul op to a corresponding tensorrt "
VLOG(3) << "convert a qk_multihead_matmul op to a corresponding tensorrt "
"network structure";

framework::OpDesc op_desc(op, nullptr);
Expand Down Expand Up @@ -142,7 +142,7 @@ class QkMultiheadMatMulOpConverter : public OpConverter {
*bias_qk_tensor,
elementwise_operation);
merge_qk_element_layer->setName(
("multihead_mamul_fc_qk(Output: " + output_name + ")").c_str());
("multihead_matmul_fc_qk(Output: " + output_name + ")").c_str());

auto* reshape_after_fc_qk_layer = TRT_ENGINE_ADD_LAYER(
engine_, Shuffle, *merge_qk_element_layer->getOutput(0));
Expand Down Expand Up @@ -232,7 +232,7 @@ class QkMultiheadMatMulOpConverter : public OpConverter {
*bias_v_tensor,
elementwise_operation);
merge_v_element_layer->setName(
("multihead_mamul_fc_v(Output: " + output_name + ")").c_str());
("multihead_matmul_fc_v(Output: " + output_name + ")").c_str());

// add shuffle for fc layer
auto* reshape_after_fc_v_layer = TRT_ENGINE_ADD_LAYER(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ class SparseMultiheadMatMulOpConverter : public OpConverter {
}
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_sparse_multihead_mamul(Output: " + output_name +
("shuffle_before_sparse_multihead_matmul(Output: " + output_name +
")")
.c_str());

Expand Down Expand Up @@ -403,7 +403,8 @@ class SparseMultiheadMatMulOpConverter : public OpConverter {
engine_->SetTensorDynamicRange(fc_layer->getOutput(0), out_scale);
}
fc_layer->setName(
("sparse_multihead_mamul_fc(Output: " + output_name + ")").c_str());
("sparse_multihead_matmul_fc(Output: " + output_name + ")")
.c_str());

// no need to add shuffle after fc, just change it in
// QkvToContextPluginDynamic
Expand Down