Skip to content

Commit c705f06

Browse files
authored
add TensorArray (#4459)
* add tensor array * update * set type
1 parent 33c5453 commit c705f06

4 files changed

Lines changed: 534 additions & 0 deletions

File tree

paddle/framework/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,6 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
4343

4444
cc_library(backward SRCS backward.cc DEPS net_op)
4545
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
46+
47+
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
48+
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)

paddle/framework/tensor_array.cc

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
12+
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License. */
16+
17+
#include "paddle/framework/tensor_array.h"
18+
19+
#include <glog/logging.h>
20+
#include <algorithm>
21+
#include <limits>
22+
23+
namespace paddle {
24+
namespace framework {
25+
26+
namespace detail {
27+
28+
/*
29+
* Offer an iterator over the length-sorted lod-tensor's top level. The top
30+
* level of a lod-tensor stores batch-size of sequences, each top-level sequence
31+
* may contains several lower-level sequences, sort top-level lod by the numbers
32+
* of lower-level sequences in descending order, so that during RNN's running,
33+
* the batch-size will keep decreasing, the short sentences will end at the tail
34+
* of each batch.
35+
*
36+
* Let's take a simple lod-tensor for example
37+
*
38+
* |(0) |(1) top-level has two instances
39+
* ||| ||||| lower-level
40+
*
41+
* sort by lower-level's length
42+
*
43+
* |(1) |(0)
44+
* ||||| |||
45+
*
46+
* when RNN runs, it get 5 batches (equals the number of elements the longest
47+
* sequence has)
48+
*
49+
* |||||
50+
* |||
51+
*
52+
* the first three batches has two elements, the last two elements just has 1
53+
* element each.
54+
*/
55+
struct DynamicBatchUnpacker {
56+
using value_type = float;
57+
58+
DynamicBatchUnpacker(const LoDTensor& source, size_t level,
59+
bool descend = true)
60+
: source(&source), level(level) {
61+
BuildLengthSortedMeta(descend);
62+
}
63+
64+
LoDTensor GetBatch(size_t index);
65+
66+
std::vector<DySeqMeta> meta;
67+
68+
LoDTensor const* source;
69+
size_t level;
70+
71+
protected:
72+
void BuildLengthSortedMeta(bool descend);
73+
};
74+
75+
LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
76+
const std::vector<DySeqMeta>& meta, const LoD& lod,
77+
size_t level);
78+
79+
} // namespace detail
80+
81+
const LoDTensor& TensorArray::Read(size_t index) const {
82+
PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index);
83+
if (index >= size()) {
84+
values_.resize(index + 1);
85+
}
86+
return values_[index];
87+
}
88+
89+
void TensorArray::Write(size_t index, const LoDTensor& value) {
90+
PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index);
91+
92+
if (index >= size()) {
93+
values_.resize(index + 1);
94+
}
95+
96+
values_[index].Resize(value.dims());
97+
values_[index].mutable_data<value_type>(platform::CPUPlace());
98+
values_[index].CopyFrom<value_type>(value, platform::CPUPlace());
99+
}
100+
101+
void TensorArray::WriteShared(size_t index, const LoDTensor& value) {
102+
PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index);
103+
if (index >= size()) {
104+
values_.resize(index + 1);
105+
}
106+
107+
values_[index].ShareDataWith<value_type>(value);
108+
}
109+
110+
LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta,
111+
const LoD& lod) const {
112+
return detail::PackDynamicBatch(values_, meta, lod, level);
113+
}
114+
115+
std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level,
116+
bool length_desend) {
117+
detail::DynamicBatchUnpacker unpacker(source, level,
118+
length_desend /*descend*/);
119+
120+
// find max length of all the sequences
121+
size_t max_length = 0;
122+
for (const auto& seq : unpacker.meta) {
123+
max_length = std::max(max_length, seq.end - seq.begin);
124+
}
125+
126+
// write batches to values
127+
for (size_t batch_id = 0; batch_id < max_length; batch_id++) {
128+
Write(batch_id, unpacker.GetBatch(batch_id));
129+
}
130+
131+
return unpacker.meta;
132+
}
133+
134+
LoDTensor TensorArray::Stack() const {
135+
LoDTensor result;
136+
if (size() == 0) return result;
137+
138+
const auto& first_dims = values_.front().dims();
139+
// check all the values have the same shape
140+
// TODO(superjom) check the same dtypes
141+
for (size_t idx = 1; idx < size(); idx++) {
142+
const auto& value_dims = values_[idx].dims();
143+
PADDLE_ENFORCE_EQ(first_dims, value_dims);
144+
}
145+
146+
// copy
147+
auto result_dims = vectorize(first_dims);
148+
result_dims.insert(result_dims.begin(), size());
149+
result.Resize(make_ddim(result_dims));
150+
result.mutable_data<value_type>(platform::CPUPlace());
151+
152+
for (size_t idx = 0; idx < size(); idx++) {
153+
result.Slice<value_type>(idx, idx + 1)
154+
.CopyFrom<value_type>(Read(idx), platform::CPUPlace());
155+
}
156+
return result;
157+
}
158+
159+
void TensorArray::Unstack(const LoDTensor& source) const {
160+
Unstack(source, false /*data_shared*/);
161+
}
162+
163+
void TensorArray::UnstackShared(const LoDTensor& source) const {
164+
Unstack(source, true /*data_shared*/);
165+
}
166+
167+
void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const {
168+
size_t first_dim = source.dims()[0];
169+
DDim value_dims = slice_ddim(source.dims(), 1, source.dims().size());
170+
PADDLE_ENFORCE_GT(first_dim, 0,
171+
"source should have some data to be unstacked");
172+
173+
values_.resize(first_dim);
174+
175+
for (size_t elem = 0; elem < first_dim; elem++) {
176+
// create a new value
177+
auto& value = values_[elem];
178+
if (data_shared) {
179+
// share memory
180+
value.ShareDataWith<value_type>(source.Slice<value_type>(elem, elem + 1));
181+
} else {
182+
// copy
183+
value.Resize(value_dims);
184+
value.CopyFrom<value_type>(source.Slice<value_type>(elem, elem + 1),
185+
platform::CPUPlace());
186+
}
187+
}
188+
}
189+
190+
size_t TensorArray::size() const { return values_.size(); }
191+
192+
namespace detail {
193+
194+
void DynamicBatchUnpacker::BuildLengthSortedMeta(bool descend) {
195+
PADDLE_ENFORCE(meta.empty(), "duplicate build meta");
196+
// collect meta for each sequence in some level
197+
auto lod = SliceLevels(source->lod(), level, level + 1)[0];
198+
199+
for (size_t seq_id = 0; seq_id < lod.size() - 1; seq_id++) {
200+
DySeqMeta seq_meta({lod[seq_id], lod[seq_id + 1], seq_id});
201+
meta.push_back(seq_meta);
202+
}
203+
204+
PADDLE_ENFORCE_GT(meta.size(), 0, "meta is empty");
205+
206+
// sort by length
207+
sort(meta.begin(), meta.end(),
208+
[descend](const DySeqMeta& a, const DySeqMeta& b) {
209+
bool a_ge_b = (a.end - a.begin) > (b.end - b.begin);
210+
return descend ? a_ge_b : !a_ge_b;
211+
});
212+
}
213+
214+
LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
215+
PADDLE_ENFORCE(!meta.empty(), "should build meta first");
216+
LoDTensor result;
217+
218+
// collect indice need to copy to the batch
219+
std::vector<size_t> indice;
220+
for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) {
221+
const auto& seq_meta = meta[seq_id];
222+
if (index >= seq_meta.end) break;
223+
indice.push_back(seq_meta.begin + index);
224+
}
225+
226+
PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index);
227+
228+
// copy the indice of records in LoDTensor
229+
auto record_dims = slice_ddim(source->dims(), 1, source->dims().size());
230+
auto record_dims_vec = vectorize(record_dims);
231+
record_dims_vec.insert(record_dims_vec.begin(), indice.size());
232+
result.Resize(make_ddim(record_dims_vec));
233+
result.mutable_data<value_type>(platform::CPUPlace());
234+
235+
for (size_t i = 0; i < indice.size() - 1; i++) {
236+
auto index = indice[i];
237+
auto target = result.Slice<value_type>(i, i + 1);
238+
auto source_ = source->Slice<value_type>(index, index + 1);
239+
target.CopyFrom<value_type>(source_, platform::CPUPlace());
240+
}
241+
242+
return result;
243+
}
244+
245+
LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
246+
const std::vector<DySeqMeta>& meta, const LoD& lod,
247+
size_t level) {
248+
PADDLE_ENFORCE(!source.empty());
249+
PADDLE_ENFORCE(!meta.empty());
250+
PADDLE_ENFORCE(!lod.empty());
251+
252+
LoDTensor result;
253+
254+
// init result space
255+
auto record_dims = slice_ddim(source[0].dims(), 1, source[0].dims().size());
256+
auto record_dims_vec = vectorize(record_dims);
257+
auto height = lod[level].back();
258+
record_dims_vec.insert(record_dims_vec.begin(), height);
259+
result.Resize(make_ddim(record_dims_vec));
260+
result.mutable_data<float>(platform::CPUPlace());
261+
262+
for (size_t batch_id = 0; batch_id < source.size(); batch_id++) {
263+
for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) {
264+
const auto& seq_meta = meta[seq_id];
265+
// source is source[batch_id][seq_id]
266+
// target is result[index]
267+
auto index = seq_meta.begin + batch_id;
268+
if (index >= seq_meta.end) break;
269+
auto source_ = source[batch_id].Slice<float>(seq_id, seq_id + 1);
270+
auto target = result.Slice<float>(index, index + 1);
271+
target.CopyFrom<float>(source_, platform::CPUPlace());
272+
}
273+
}
274+
275+
result.set_lod(lod);
276+
277+
return result;
278+
}
279+
280+
} // namespace detail
281+
282+
} // namespace framework
283+
} // namespace paddle

0 commit comments

Comments
 (0)