Skip to content

Commit d01a280

Browse files
authored
Merge pull request PaddlePaddle#15 from Thunderbrook/gpugraph_deepwalk
[GpuGraph] metapath
2 parents 2457680 + b624aec commit d01a280

File tree

7 files changed

+347
-149
lines changed

7 files changed

+347
-149
lines changed

paddle/fluid/framework/data_feed.cc

Lines changed: 1 addition & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -38,59 +38,6 @@ DLManager& global_dlmanager_pool() {
3838
return manager;
3939
}
4040

41-
void GraphDataGenerator::AllocResource(const paddle::platform::Place& place,
42-
std::vector<LoDTensor*> feed_vec,
43-
std::vector<uint64_t>* h_device_keys) {
44-
place_ = place;
45-
gpuid_ = place_.GetDeviceId();
46-
VLOG(3) << "gpuid " << gpuid_;
47-
stream_ = dynamic_cast<platform::CUDADeviceContext*>(
48-
platform::DeviceContextPool::Instance().Get(place))
49-
->stream();
50-
feed_vec_ = feed_vec;
51-
h_device_keys_ = h_device_keys;
52-
device_key_size_ = h_device_keys_->size();
53-
d_device_keys_ =
54-
memory::AllocShared(place_, device_key_size_ * sizeof(uint64_t));
55-
CUDA_CHECK(cudaMemcpyAsync(d_device_keys_->ptr(), h_device_keys_->data(),
56-
device_key_size_ * sizeof(uint64_t),
57-
cudaMemcpyHostToDevice, stream_));
58-
size_t once_max_sample_keynum = walk_degree_ * once_sample_startid_len_;
59-
d_prefix_sum_ =
60-
memory::AllocShared(place_, (once_max_sample_keynum + 1) * sizeof(int));
61-
int* d_prefix_sum_ptr = reinterpret_cast<int*>(d_prefix_sum_->ptr());
62-
cudaMemsetAsync(d_prefix_sum_ptr, 0,
63-
(once_max_sample_keynum + 1) * sizeof(int), stream_);
64-
cursor_ = 0;
65-
jump_rows_ = 0;
66-
device_keys_ = reinterpret_cast<uint64_t*>(d_device_keys_->ptr());
67-
d_walk_ = memory::AllocShared(place_, buf_size_ * sizeof(uint64_t));
68-
cudaMemsetAsync(d_walk_->ptr(), 0, buf_size_ * sizeof(uint64_t), stream_);
69-
d_sample_keys_ =
70-
memory::AllocShared(place_, once_max_sample_keynum * sizeof(uint64_t));
71-
72-
d_sampleidx2rows_.push_back(
73-
memory::AllocShared(place_, once_max_sample_keynum * sizeof(int)));
74-
d_sampleidx2rows_.push_back(
75-
memory::AllocShared(place_, once_max_sample_keynum * sizeof(int)));
76-
cur_sampleidx2row_ = 0;
77-
78-
d_len_per_row_ =
79-
memory::AllocShared(place_, once_max_sample_keynum * sizeof(int));
80-
for (int i = -window_; i < 0; i++) {
81-
window_step_.push_back(i);
82-
}
83-
for (int i = 0; i < window_; i++) {
84-
window_step_.push_back(i + 1);
85-
}
86-
buf_state_.Init(batch_size_, walk_len_, &window_step_);
87-
d_random_row_ = memory::AllocShared(
88-
place_,
89-
(once_sample_startid_len_ * walk_degree_ * repeat_time_) * sizeof(int));
90-
shuffle_seed_ = 0;
91-
cudaStreamSynchronize(stream_);
92-
}
93-
9441
class BufferedLineFileReader {
9542
typedef std::function<bool()> SampleFunc;
9643
static const int MAX_FILE_BUFF_SIZE = 4 * 1024 * 1024;
@@ -2643,8 +2590,7 @@ bool SlotRecordInMemoryDataFeed::Start() {
26432590
#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
26442591
CHECK(paddle::platform::is_gpu_place(this->place_));
26452592
pack_ = BatchGpuPackMgr().get(this->GetPlace(), used_slots_info_);
2646-
gpu_graph_data_generator_.AllocResource(this->place_, feed_vec_,
2647-
h_device_keys_);
2593+
gpu_graph_data_generator_.AllocResource(this->place_, feed_vec_);
26482594
#endif
26492595
return true;
26502596
}

0 commit comments

Comments
 (0)