|
| 1 | +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. |
| 2 | +
|
| 3 | + Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | + you may not use this file except in compliance with the License. |
| 5 | + You may obtain a copy of the License at |
| 6 | +
|
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | + Unless required by applicable law or agreed to in writing, software |
| 10 | + distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +
|
| 12 | +
|
| 13 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + See the License for the specific language governing permissions and |
| 15 | + limitations under the License. */ |
| 16 | + |
| 17 | +#include "paddle/framework/tensor_array.h" |
| 18 | + |
| 19 | +#include <glog/logging.h> |
| 20 | +#include <algorithm> |
| 21 | +#include <limits> |
| 22 | + |
| 23 | +namespace paddle { |
| 24 | +namespace framework { |
| 25 | + |
| 26 | +namespace detail { |
| 27 | + |
| 28 | +/* |
| 29 | + * Offer an iterator over the length-sorted lod-tensor's top level. The top |
| 30 | + * level of a lod-tensor stores batch-size of sequences, each top-level sequence |
| 31 | + * may contains several lower-level sequences, sort top-level lod by the numbers |
| 32 | + * of lower-level sequences in descending order, so that during RNN's running, |
| 33 | + * the batch-size will keep decreasing, the short sentences will end at the tail |
| 34 | + * of each batch. |
| 35 | + * |
| 36 | + * Let's take a simple lod-tensor for example |
| 37 | + * |
| 38 | + * |(0) |(1) top-level has two instances |
| 39 | + * ||| ||||| lower-level |
| 40 | + * |
| 41 | + * sort by lower-level's length |
| 42 | + * |
| 43 | + * |(1) |(0) |
| 44 | + * ||||| ||| |
| 45 | + * |
| 46 | + * when RNN runs, it get 5 batches (equals the number of elements the longest |
| 47 | + * sequence has) |
| 48 | + * |
| 49 | + * ||||| |
| 50 | + * ||| |
| 51 | + * |
| 52 | + * the first three batches has two elements, the last two elements just has 1 |
| 53 | + * element each. |
| 54 | + */ |
| 55 | +struct DynamicBatchUnpacker { |
| 56 | + using value_type = float; |
| 57 | + |
| 58 | + DynamicBatchUnpacker(const LoDTensor& source, size_t level, |
| 59 | + bool descend = true) |
| 60 | + : source(&source), level(level) { |
| 61 | + BuildLengthSortedMeta(descend); |
| 62 | + } |
| 63 | + |
| 64 | + LoDTensor GetBatch(size_t index); |
| 65 | + |
| 66 | + std::vector<DySeqMeta> meta; |
| 67 | + |
| 68 | + LoDTensor const* source; |
| 69 | + size_t level; |
| 70 | + |
| 71 | + protected: |
| 72 | + void BuildLengthSortedMeta(bool descend); |
| 73 | +}; |
| 74 | + |
| 75 | +LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, |
| 76 | + const std::vector<DySeqMeta>& meta, const LoD& lod, |
| 77 | + size_t level); |
| 78 | + |
| 79 | +} // namespace detail |
| 80 | + |
| 81 | +const LoDTensor& TensorArray::Read(size_t index) const { |
| 82 | + PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); |
| 83 | + if (index >= size()) { |
| 84 | + values_.resize(index + 1); |
| 85 | + } |
| 86 | + return values_[index]; |
| 87 | +} |
| 88 | + |
| 89 | +void TensorArray::Write(size_t index, const LoDTensor& value) { |
| 90 | + PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); |
| 91 | + |
| 92 | + if (index >= size()) { |
| 93 | + values_.resize(index + 1); |
| 94 | + } |
| 95 | + |
| 96 | + values_[index].Resize(value.dims()); |
| 97 | + values_[index].mutable_data<value_type>(platform::CPUPlace()); |
| 98 | + values_[index].CopyFrom<value_type>(value, platform::CPUPlace()); |
| 99 | +} |
| 100 | + |
| 101 | +void TensorArray::WriteShared(size_t index, const LoDTensor& value) { |
| 102 | + PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); |
| 103 | + if (index >= size()) { |
| 104 | + values_.resize(index + 1); |
| 105 | + } |
| 106 | + |
| 107 | + values_[index].ShareDataWith<value_type>(value); |
| 108 | +} |
| 109 | + |
| 110 | +LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta, |
| 111 | + const LoD& lod) const { |
| 112 | + return detail::PackDynamicBatch(values_, meta, lod, level); |
| 113 | +} |
| 114 | + |
| 115 | +std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, |
| 116 | + bool length_desend) { |
| 117 | + detail::DynamicBatchUnpacker unpacker(source, level, |
| 118 | + length_desend /*descend*/); |
| 119 | + |
| 120 | + // find max length of all the sequences |
| 121 | + size_t max_length = 0; |
| 122 | + for (const auto& seq : unpacker.meta) { |
| 123 | + max_length = std::max(max_length, seq.end - seq.begin); |
| 124 | + } |
| 125 | + |
| 126 | + // write batches to values |
| 127 | + for (size_t batch_id = 0; batch_id < max_length; batch_id++) { |
| 128 | + Write(batch_id, unpacker.GetBatch(batch_id)); |
| 129 | + } |
| 130 | + |
| 131 | + return unpacker.meta; |
| 132 | +} |
| 133 | + |
| 134 | +LoDTensor TensorArray::Stack() const { |
| 135 | + LoDTensor result; |
| 136 | + if (size() == 0) return result; |
| 137 | + |
| 138 | + const auto& first_dims = values_.front().dims(); |
| 139 | + // check all the values have the same shape |
| 140 | + // TODO(superjom) check the same dtypes |
| 141 | + for (size_t idx = 1; idx < size(); idx++) { |
| 142 | + const auto& value_dims = values_[idx].dims(); |
| 143 | + PADDLE_ENFORCE_EQ(first_dims, value_dims); |
| 144 | + } |
| 145 | + |
| 146 | + // copy |
| 147 | + auto result_dims = vectorize(first_dims); |
| 148 | + result_dims.insert(result_dims.begin(), size()); |
| 149 | + result.Resize(make_ddim(result_dims)); |
| 150 | + result.mutable_data<value_type>(platform::CPUPlace()); |
| 151 | + |
| 152 | + for (size_t idx = 0; idx < size(); idx++) { |
| 153 | + result.Slice<value_type>(idx, idx + 1) |
| 154 | + .CopyFrom<value_type>(Read(idx), platform::CPUPlace()); |
| 155 | + } |
| 156 | + return result; |
| 157 | +} |
| 158 | + |
| 159 | +void TensorArray::Unstack(const LoDTensor& source) const { |
| 160 | + Unstack(source, false /*data_shared*/); |
| 161 | +} |
| 162 | + |
| 163 | +void TensorArray::UnstackShared(const LoDTensor& source) const { |
| 164 | + Unstack(source, true /*data_shared*/); |
| 165 | +} |
| 166 | + |
| 167 | +void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const { |
| 168 | + size_t first_dim = source.dims()[0]; |
| 169 | + DDim value_dims = slice_ddim(source.dims(), 1, source.dims().size()); |
| 170 | + PADDLE_ENFORCE_GT(first_dim, 0, |
| 171 | + "source should have some data to be unstacked"); |
| 172 | + |
| 173 | + values_.resize(first_dim); |
| 174 | + |
| 175 | + for (size_t elem = 0; elem < first_dim; elem++) { |
| 176 | + // create a new value |
| 177 | + auto& value = values_[elem]; |
| 178 | + if (data_shared) { |
| 179 | + // share memory |
| 180 | + value.ShareDataWith<value_type>(source.Slice<value_type>(elem, elem + 1)); |
| 181 | + } else { |
| 182 | + // copy |
| 183 | + value.Resize(value_dims); |
| 184 | + value.CopyFrom<value_type>(source.Slice<value_type>(elem, elem + 1), |
| 185 | + platform::CPUPlace()); |
| 186 | + } |
| 187 | + } |
| 188 | +} |
| 189 | + |
| 190 | +size_t TensorArray::size() const { return values_.size(); } |
| 191 | + |
| 192 | +namespace detail { |
| 193 | + |
| 194 | +void DynamicBatchUnpacker::BuildLengthSortedMeta(bool descend) { |
| 195 | + PADDLE_ENFORCE(meta.empty(), "duplicate build meta"); |
| 196 | + // collect meta for each sequence in some level |
| 197 | + auto lod = SliceLevels(source->lod(), level, level + 1)[0]; |
| 198 | + |
| 199 | + for (size_t seq_id = 0; seq_id < lod.size() - 1; seq_id++) { |
| 200 | + DySeqMeta seq_meta({lod[seq_id], lod[seq_id + 1], seq_id}); |
| 201 | + meta.push_back(seq_meta); |
| 202 | + } |
| 203 | + |
| 204 | + PADDLE_ENFORCE_GT(meta.size(), 0, "meta is empty"); |
| 205 | + |
| 206 | + // sort by length |
| 207 | + sort(meta.begin(), meta.end(), |
| 208 | + [descend](const DySeqMeta& a, const DySeqMeta& b) { |
| 209 | + bool a_ge_b = (a.end - a.begin) > (b.end - b.begin); |
| 210 | + return descend ? a_ge_b : !a_ge_b; |
| 211 | + }); |
| 212 | +} |
| 213 | + |
| 214 | +LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { |
| 215 | + PADDLE_ENFORCE(!meta.empty(), "should build meta first"); |
| 216 | + LoDTensor result; |
| 217 | + |
| 218 | + // collect indice need to copy to the batch |
| 219 | + std::vector<size_t> indice; |
| 220 | + for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { |
| 221 | + const auto& seq_meta = meta[seq_id]; |
| 222 | + if (index >= seq_meta.end) break; |
| 223 | + indice.push_back(seq_meta.begin + index); |
| 224 | + } |
| 225 | + |
| 226 | + PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); |
| 227 | + |
| 228 | + // copy the indice of records in LoDTensor |
| 229 | + auto record_dims = slice_ddim(source->dims(), 1, source->dims().size()); |
| 230 | + auto record_dims_vec = vectorize(record_dims); |
| 231 | + record_dims_vec.insert(record_dims_vec.begin(), indice.size()); |
| 232 | + result.Resize(make_ddim(record_dims_vec)); |
| 233 | + result.mutable_data<value_type>(platform::CPUPlace()); |
| 234 | + |
| 235 | + for (size_t i = 0; i < indice.size() - 1; i++) { |
| 236 | + auto index = indice[i]; |
| 237 | + auto target = result.Slice<value_type>(i, i + 1); |
| 238 | + auto source_ = source->Slice<value_type>(index, index + 1); |
| 239 | + target.CopyFrom<value_type>(source_, platform::CPUPlace()); |
| 240 | + } |
| 241 | + |
| 242 | + return result; |
| 243 | +} |
| 244 | + |
| 245 | +LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, |
| 246 | + const std::vector<DySeqMeta>& meta, const LoD& lod, |
| 247 | + size_t level) { |
| 248 | + PADDLE_ENFORCE(!source.empty()); |
| 249 | + PADDLE_ENFORCE(!meta.empty()); |
| 250 | + PADDLE_ENFORCE(!lod.empty()); |
| 251 | + |
| 252 | + LoDTensor result; |
| 253 | + |
| 254 | + // init result space |
| 255 | + auto record_dims = slice_ddim(source[0].dims(), 1, source[0].dims().size()); |
| 256 | + auto record_dims_vec = vectorize(record_dims); |
| 257 | + auto height = lod[level].back(); |
| 258 | + record_dims_vec.insert(record_dims_vec.begin(), height); |
| 259 | + result.Resize(make_ddim(record_dims_vec)); |
| 260 | + result.mutable_data<float>(platform::CPUPlace()); |
| 261 | + |
| 262 | + for (size_t batch_id = 0; batch_id < source.size(); batch_id++) { |
| 263 | + for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { |
| 264 | + const auto& seq_meta = meta[seq_id]; |
| 265 | + // source is source[batch_id][seq_id] |
| 266 | + // target is result[index] |
| 267 | + auto index = seq_meta.begin + batch_id; |
| 268 | + if (index >= seq_meta.end) break; |
| 269 | + auto source_ = source[batch_id].Slice<float>(seq_id, seq_id + 1); |
| 270 | + auto target = result.Slice<float>(index, index + 1); |
| 271 | + target.CopyFrom<float>(source_, platform::CPUPlace()); |
| 272 | + } |
| 273 | + } |
| 274 | + |
| 275 | + result.set_lod(lod); |
| 276 | + |
| 277 | + return result; |
| 278 | +} |
| 279 | + |
| 280 | +} // namespace detail |
| 281 | + |
| 282 | +} // namespace framework |
| 283 | +} // namespace paddle |
0 commit comments