Skip to content

C++预测时出 core,不知道原因在哪,同样的模型使用 pathon 预测 api 无问题 #28268

@PengheLiu

Description

@PengheLiu
  • 标题:C++预测时出 core,不知道原因在哪,同样的模型使用 pathon 预测 api 无问题
  • 版本、环境信息:
       1)PaddlePaddle版本:1.6
       2)CPU:预测使用CPU,使用MKLDNN 数学库
       3)系统环境:CentOS 4.3,Python version: 2.7.2
    -预测信息
       1)C++预测:请您提供预测库安装包的版本信息,及其中的version.txt文件
       2)CMake包含路径的完整命令
       3)API信息(如调用请提供)
       4)预测库来源:特殊环境(BCLOUD编译)
  • 复现信息:如为报错,请给出复现环境、复现步骤
  • 问题描述:使用 python 保存的模型,放到实验平台实例上c++预测时出 core,辛苦相关同学帮看下

python 保存模型代码,主要是从另一个参数数组里读出参数,再保存成 paddle 的参数

import sys
import numpy as np
import paddle.fluid as fluid

paddle_params_dir = './pd_params'

startup_program = fluid.default_startup_program()

q_emb = fluid.layers.data(name='q_emb', shape=[128], dtype='float32')
bp_emb = fluid.layers.data(name='bp_emb', shape=[256], dtype='float32')
bp_id_emb = fluid.layers.data(name='bp_id_emb', shape=[256], dtype='float32')
q_vec = fluid.layers.fc(q_emb, name='encoder', size=256, act='tanh')
input_x = fluid.layers.concat([q_vec, bp_emb, bp_id_emb, 
                                q_vec * bp_emb, q_vec - bp_emb, 
                                q_vec * bp_id_emb, q_vec - bp_id_emb, 
                                fluid.layers.cos_sim(q_vec, bp_emb), 
                                fluid.layers.cos_sim(q_vec, bp_id_emb)], axis=-1)

out = fluid.layers.fc(input_x, name='out', size=2, act='softmax')
#out = out[:,1]
out = fluid.layers.slice(out, axes=[1], starts=[1], ends=[2])
out = fluid.layers.squeeze(out, axes=[1])

exe = fluid.Executor(fluid.CPUPlace())
exe.run(startup_program)

scope = fluid.global_scope()
block = startup_program.current_block()

def set_tensor(torch_name, pd_name):
    np_array = np.loadtxt('params/%s' % torch_name, dtype=np.float32)
    np_array = np_array.T
    print(np_array.shape)
    var = block.var(pd_name)
    tensor = scope.var(var.name).get_tensor()
    print('tensor 1', np.array(tensor).shape)
    tensor.set(np_array, fluid.CPUPlace())
    print('tensor 2', np.array(tensor).shape)


set_tensor('encoder.0.weight', 'encoder.w_0')
set_tensor('encoder.0.bias', 'encoder.b_0')
set_tensor('ff_layer_output.0.weight', 'out.w_0')
set_tensor('ff_layer_output.0.bias', 'out.b_0')

fluid.io.save_inference_model(paddle_params_dir, ['q_emb', 'bp_emb', 'bp_id_emb'], [out], exe)

保存出的模型,使用 python api 预测没有问题,可以正常运行相关代码如下:

    bp_emb = new_term_emb_dict[bp]
    bp_id_emb = bp_id_emb_dict[bp]
    q_emb = np.reshape(q_emb, [1, 128])
    bp_emb = bp_emb.reshape([1, 256])
    bp_id_emb = bp_id_emb.reshape([1, 256])

    paddle_data = []
    paddle_data.append(PaddleTensor(q_emb))
    paddle_data.append(PaddleTensor(bp_emb))
    paddle_data.append(PaddleTensor(bp_id_emb))
    output = predictor.run(paddle_data)
    print(output[0].shape)

部署到线上服务端预测时,就出 core ,也定位不到具体原因,线上预测代码如下:

    int infer(const std::vector<std::vector<float>>& input,
            std::vector<float>* q_vec) {
        CHECK_AND_RETURN_WITH_MSG(input.size() == 3, -1, "invalid. ");
        std::vector<uint64_t> tm_list = {now_us()};
        // step 1
        std::vector<paddle::PaddleTensor> input_vec(input.size());
        for (size_t i = 0; i < input.size(); ++i) {
            std::vector<int> shape = {1, input[i].size()};
            ParseTensor(input[i], shape, &(input_vec[i]));
        }
        // step 2
        tm_list.push_back(now_us());
        std::vector<paddle::PaddleTensor> output_vec;
        _tlr_ptr_paddle->Run(input_vec, &output_vec);
        tm_list.push_back(now_us());
        CHECK_AND_RETURN_WITH_MSG(output_vec.size() != 0, -1,
                format_string("invalid. size[%d]. ", output_vec.size()));
        int cnt = 1;
        BOOST_FOREACH(uint32_t dim, output_vec.at(0).shape) {
            cnt *= dim;
        }
        q_vec->clear();
        float* data = output_vec[0].data.data();
        for (size_t j = 0; j < cnt; ++j) {
            q_vec->push_back(data[j]);
        }
        tm_list.push_back(now_us());
        // LOG(NOTICE) << format_string("tm_list[%s]. ", time_diff(tm_list).data());
        return 0;
    }

core 的信息如下:

#0 GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:55
#1 0x00007f11f6aa42d9 in GI_abort () at abort.c:89
#2 0x00007f11f6dde963 in ?? () from /opt/compiler/gcc-8.2/lib/libstdc++.so.6
#3 0x00007f11f6de49a6 in ?? () from /opt/compiler/gcc-8.2/lib/libstdc++.so.6
#4 0x00007f11f6de49e1 in std::terminate() () from /opt/compiler/gcc-8.2/lib/libstdc++.so.6
#5 0x00007f11f6de499a in std::rethrow_exception(std::exception_ptr::exception_ptr) () from /opt/compiler/gcc-8.2/lib/libstdc++.so.6
#6 0x00007f12006104f7 in paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void
, boost::detail::variant::void
, boost::detail::variant::void
, boost::detail::variant::void
, boost::detail::variant::void
, boost::detail::variant::void
, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) [clone .cold.1061] ()
from /home/work/chaoscs/bin/../libpaddle_fluid.so
#7 0x00007f120088cdf4 in paddle::framework::NaiveExecutor::Run() () from /home/work/chaoscs/bin/../libpaddle_fluid.so
#8 0x00007f12006e87d1 in paddle::AnalysisPredictor::Run(std::vector<paddle::PaddleTensor, std::allocatorpaddle::PaddleTensor > const&, std::vector<paddle::PaddleTensor, std::allocatorpaddle::PaddleTensor >, int) () from /home/work/chaoscs/bin/../libpaddle_fluid.so
#9 0x0000000000851185 in chaoscs::Infer::infer (this=0x7f10b4013f70, input=..., q_vec=q_vec@entry=0x7f1161192f10)
at /home/opt/compiler/gcc-8.2/gcc-8.2/include/c++/8.2.0/bits/unique_ptr.h:342
#10 0x000000000085ca96 in chaoscs::ChaosCsServiceImpl::process_bp (this=0x611dde0, request=, response=,
counter=counter@entry=0x7f1073ffbd88) at baidu/im/chaoscs/src/chaoscs_service.h:322
#11 0x000000000085d871 in operator() (__closure=0x7f10541ee0c0) at baidu/im/chaoscs/src/chaoscs_service.h:206
#12 std::_Function_handler<void (), chaoscs::ChaosCsServiceImpl::search(google::protobuf::RpcController
, surf::app::interface::SurfAppRequest const*, surf::app::interface::SurfAppResponse*, google::protobuf::Closure*)::{lambda()#1}>::_M_invoke(std::_Any_data const&) (__functor=...)
at /home/opt/compiler/gcc-8.2/gcc-8.2/include/c++/8.2.0/bits/std_function.h:297
#13 0x000000000084ab35 in operator() (this=) at /home/opt/compiler/gcc-8.2/gcc-8.2/include/c++/8.2.0/bits/std_function.h:682
#14 operator() (__closure=0x7f1161193200, __closure=0x7f1161193200) at baidu/im/chaoscs/src/chaoscs_thread.h:64
#15 std::_Function_handler<void (), chaoscs::ConcurrentExecutor::start(std::function<void ()>*)::{lambda()#1}>::_M_invoke(std::_Any_data const&) (
__functor=...) at /home/opt/compiler/gcc-8.2/gcc-8.2/include/c++/8.2.0/bits/std_function.h:297
#16 0x000000000084b6f7 in operator() (this=0x7f1161193200) at /home/opt/compiler/gcc-8.2/gcc-8.2/include/c++/8.2.0/bits/std_function.h:682
#17 chaoscs::ManagedThread::run_thread (this=0x14a8ac0 chaoscs::global_thread_pool()::pool) at baidu/im/chaoscs/src/chaoscs_thread.h:38
#18 0x00007f11f6e0e16f in ?? () from /opt/compiler/gcc-8.2/lib/libstdc++.so.6
#19 0x00007f12021d6da4 in start_thread (arg=) at pthread_create.c:333
#20 0x00007f11f6b6f32d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions