Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 16 additions & 25 deletions paddle/gserver/layers/MKLDNNFcLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,6 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
}

void MKLDNNFcLayer::convertOutputToOtherDevice() {
copyOutputInfoToOtherDevice();
// find other cpu device and reorder output to cpu device
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
// fc cpu output value do not need convert
// just share point
outputOtherDevice_[i].value = output_.value;
++cnt;
}
}

if (cnt > 1) {
LOG(WARNING) << "should not have more than one CPU devie";
}
}

void MKLDNNFcLayer::reshape() {
const Argument& input = getInput(0, getPrev(0)->getDeviceId());
int batchSize = input.getBatchSize();
Expand Down Expand Up @@ -155,7 +137,10 @@ void MKLDNNFcLayer::resetFwd() {
// change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
if (!outputIsOnlyMKLDNN()) {
convertOutputToOtherDevice();
copyOutputInfoToOtherDevice();
// fc cpu output value do not need create convert
// just share point
getOutput(CPU_DEVICE).value->setData(output_.value->getData());
}

// create forward handle
Expand Down Expand Up @@ -235,13 +220,12 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_.push_back(*bwdWgt_);

/// backward data
device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
const MatrixPtr& in = getInputGrad(0, device);
const MatrixPtr& in = inputLayers_[0]->getOutput().grad;
if (in == nullptr) {
return;
}
if (getInput(0, device).getAllCount() > 1) {
// TODO(TJ): use outputMaps_ ways when merge outgrad done
if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
} else {
inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc());
}
Expand All @@ -258,13 +242,21 @@ void MKLDNNFcLayer::resetBwd() {
pipelineBwd_.push_back(*bwdData_);
}

void MKLDNNFcLayer::updateInputData() {
if (inputLayers_[0]->getType() != "data") {
return;
}
real* iData = getInputValue(0, CPU_DEVICE)->getData();
inVal_->setData(iData);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updateInputData这个函数,不同的mkldnn layer的处理方式会不一样么?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

处理方式是一样的,但是可能别的layer不是用inVal_了

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果处理方式是一样的,那updateInputData的实现应该放在MKLDNNLayer中:

void MKLDNNLayer::updateInputData(MKLDNNMatrix* val) {
   if (inputLayers_[0]->getType() != "data") {
     return;
   }
   real* iData = getInputValue(0, CPU_DEVICE)->getData();
   val->setData(iData);
 }

这里,updateInputData(inVal_)即可。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

嗯,是的。不过我准备下一个PR,把这个函数放到父类,所以下一个一起改掉吧


void MKLDNNFcLayer::forward(PassType passType) {
Layer::forward(passType);
reshape();

{
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
syncInputValue();
updateInputData();

// just submit forward pipeline
stream_->submit(pipelineFwd_);
Expand All @@ -286,7 +278,6 @@ void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
resetBwd();

syncOutputGrad();
// just sumbmit backward pipeline
stream_->submit(pipelineBwd_);
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/gserver/layers/MKLDNNFcLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class MKLDNNFcLayer : public MKLDNNLayer {

void backward(const UpdateCallback& callback) override;

void updateInputData() override;

protected:
/**
* reshape the input image sizes
Expand All @@ -72,8 +74,6 @@ class MKLDNNFcLayer : public MKLDNNLayer {
* only would be called when needed
*/
void resetBwd();

void convertOutputToOtherDevice() override;
};

} // namespace paddle
39 changes: 10 additions & 29 deletions paddle/gserver/layers/MKLDNNLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,10 @@ class MKLDNNLayer : public Layer {
virtual void convertWeightsToPaddle() {}

/**
* convert MKLDNN output to other device.
* only support CPU device yet
* Update input value data when input layer is "data" type.
* Since the input value data address might be changed.
*/
virtual void convertOutputToOtherDevice() {}
virtual void updateInputData() {}

/**
* print info about sizes
Expand Down Expand Up @@ -155,6 +155,7 @@ class MKLDNNLayer : public Layer {
* copy base info and do not copy data value
*/
void copyOutputInfoToOtherDevice() {
int cnt = 0;
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
Expand All @@ -163,6 +164,12 @@ class MKLDNNLayer : public Layer {
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
++cnt;
}
}
if (cnt > 1) {
LOG(WARNING) << "should not have more than one CPU devie";
}
}

Expand Down Expand Up @@ -193,32 +200,6 @@ class MKLDNNLayer : public Layer {
return outputOtherDevice_.size() == 0;
}

/**
* Sync input value data
*/
void syncInputValue() {
if (inputIsOnlyMKLDNN()) {
return;
}
real* iData = getInputValue(0, CPU_DEVICE)->getData();
// update input data
// since it might be changed if this is after data layer
inVal_->updateData(iData);
}

/**
* Sync output grad data
*/
void syncOutputGrad() {
if (outputIsOnlyMKLDNN()) {
return;
}

// update diff
real* oDiff = getOutput(CPU_DEVICE).grad->getData();
outGrad_->updateData(oDiff);
}

/**
* Set deviceId of this layer.
*/
Expand Down
10 changes: 4 additions & 6 deletions paddle/math/MKLDNNMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
size_t width = cnts / dims[0];
m = Matrix::create(height, width, false, false);
}

CHECK(m) << " Matrix should not be empty";

CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast<CpuMatrix>(m);
CHECK(cpuMatrix) << "Only support create from CPU matrix yet";

CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match";
return std::make_shared<MKLDNNMatrix>(
m->getData(), m->getHeight(), m->getWidth(), pd);
CHECK_EQ(cpuMatrix->getElementCnt(), cnts) << "Count size does not match";
return std::make_shared<MKLDNNMatrix>(cpuMatrix, pd);
}

MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
Expand Down Expand Up @@ -138,7 +136,7 @@ void MKLDNNMatrix::downSpatial() {
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(getData());
set_data_handle(data_);
}

} // namespace paddle
35 changes: 28 additions & 7 deletions paddle/math/MKLDNNMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ typedef std::shared_ptr<MKLDNNMatrix> MKLDNNMatrixPtr;
*/
class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
public:
MKLDNNMatrix(real* data,
size_t height,
size_t width,
mkldnn::memory::primitive_desc pd)
: CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
MKLDNNMatrix(CpuMatrixPtr m, mkldnn::memory::primitive_desc pd)
: CpuMatrix(m->getData(), m->getHeight(), m->getWidth(), false),
mkldnn::memory(pd, m->getData()),
m_(m) {}

~MKLDNNMatrix() {}

Expand Down Expand Up @@ -81,11 +80,29 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
void downSpatial();

/**
* Update the memory data handle.
* set the memory data handle.
* Caution: This will not check the buffer size of the data,
* it should be coverd by user.
*/
void updateData(void* data) { set_data_handle(data); }
void setData(real* data) {
set_data_handle(data);
CpuMatrix::setData(data);
m_.reset();
}

/**
* override Matrix::getData
* check data before return
*/
real* getData() override {
CHECK_EQ((void*)data_, get_data_handle());
return data_;
}

const real* getData() const override {
CHECK_EQ((void*)data_, get_data_handle());
return data_;
}

/**
* Get primitive descriptor.
Expand Down Expand Up @@ -143,6 +160,10 @@ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
memory::format srcFmt,
memory::format dstFmt,
memory::dims dm);

private:
// save the CpuMatrixPtr in case the buffer released outside
CpuMatrixPtr m_;
};

} // namespace paddle