Skip to content

Commit 47b82ac

Browse files
authored
Merge pull request #6 from Thunderbrook/gpugraph_deepwalk
[GpuGraph] remove useless variables and adjust log level
2 parents fc28b23 + 308a394 commit 47b82ac

File tree

5 files changed

+25
-29
lines changed

5 files changed

+25
-29
lines changed

paddle/fluid/framework/data_feed.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,6 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place& place,
5252
device_key_size_ = h_device_keys_->size();
5353
d_device_keys_ =
5454
memory::AllocShared(place_, device_key_size_ * sizeof(int64_t));
55-
for (size_t i = 0; i < h_device_keys_->size(); i++) {
56-
VLOG(2) << "h_device_keys_[" << i << "] = " << (*h_device_keys_)[i];
57-
}
5855
CUDA_CHECK(cudaMemcpyAsync(d_device_keys_->ptr(), h_device_keys_->data(),
5956
device_key_size_ * sizeof(int64_t),
6057
cudaMemcpyHostToDevice, stream_));
@@ -67,7 +64,6 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place& place,
6764
cursor_ = 0;
6865
jump_rows_ = 0;
6966
device_keys_ = reinterpret_cast<int64_t*>(d_device_keys_->ptr());
70-
VLOG(2) << "device_keys_ = " << (uint64_t)device_keys_;
7167
d_walk_ = memory::AllocShared(place_, buf_size_ * sizeof(int64_t));
7268
cudaMemsetAsync(d_walk_->ptr(), 0, buf_size_ * sizeof(int64_t), stream_);
7369
d_sample_keys_ =

paddle/fluid/framework/data_feed.cu

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ __global__ void GraphDoWalkKernel(int64_t *neighbors, int64_t *walk,
256256
size_t col = step;
257257
size_t offset = (row * col_size + col);
258258
walk[offset] = neighbors[i * cur_degree + k];
259-
id_cnt[row] += 1;
260259
}
261260
}
262261
}
@@ -366,7 +365,7 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
366365
h_sample_keys = new int64_t[once_max_sample_keynum];
367366
h_offset2idx = new int[once_max_sample_keynum];
368367
h_len_per_row = new int[once_max_sample_keynum];
369-
h_prefix_sum = new int64_t[100];
368+
h_prefix_sum = new int64_t[once_max_sample_keynum + 1];
370369
}
371370
///////
372371
auto gpu_graph_ptr = GraphGpuWrapper::GetInstance();
@@ -378,7 +377,9 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
378377
stream_);
379378
int i = 0;
380379
int total_row = 0;
381-
while (i < buf_size_) {
380+
int remain_size =
381+
buf_size_ - walk_degree_ * once_sample_startid_len_ * walk_len_;
382+
while (i <= remain_size) {
382383
int tmp_len = cursor_ + once_sample_startid_len_ > device_key_size_
383384
? device_key_size_ - cursor_
384385
: once_sample_startid_len_;
@@ -389,7 +390,6 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
389390
<< " tmp_len = " << tmp_len << " cursor = " << cursor_
390391
<< " once_max_sample_keynum = " << once_max_sample_keynum;
391392
int64_t *cur_walk = walk + i;
392-
len_per_row += once_max_sample_keynum;
393393

394394
if (debug_mode_) {
395395
cudaMemcpy(h_walk, walk, buf_size_ * sizeof(int64_t),
@@ -408,14 +408,9 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
408408
if (debug_mode_) {
409409
cudaMemcpy(h_walk, walk, buf_size_ * sizeof(int64_t),
410410
cudaMemcpyDeviceToHost);
411-
cudaMemcpy(h_len_per_row, len_per_row,
412-
once_max_sample_keynum * sizeof(int), cudaMemcpyDeviceToHost);
413411
for (int xx = 0; xx < buf_size_; xx++) {
414412
VLOG(2) << "h_walk[" << xx << "]: " << h_walk[xx];
415413
}
416-
for (int xx = 0; xx < once_max_sample_keynum; xx++) {
417-
VLOG(2) << "h_len_per_row[" << xx << "]: " << h_len_per_row[xx];
418-
}
419414
}
420415
/////////
421416
step++;
@@ -433,12 +428,6 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
433428
for (int xx = 0; xx < buf_size_; xx++) {
434429
VLOG(2) << "h_walk[" << xx << "]: " << h_walk[xx];
435430
}
436-
cudaMemcpy(h_len_per_row, len_per_row,
437-
once_max_sample_keynum * sizeof(int),
438-
cudaMemcpyDeviceToHost);
439-
for (int xx = 0; xx < once_max_sample_keynum; xx++) {
440-
VLOG(2) << "h_len_per_row[" << xx << "]: " << h_len_per_row[xx];
441-
}
442431
}
443432
}
444433
cursor_ += tmp_len;
@@ -458,6 +447,13 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
458447
shuffle_seed_ = engine();
459448

460449
if (debug_mode_) {
450+
int *h_random_row = new int[total_row + 10];
451+
cudaMemcpy(h_random_row, d_random_row, total_row * sizeof(int),
452+
cudaMemcpyDeviceToHost);
453+
for (int xx = 0; xx < total_row; xx++) {
454+
VLOG(2) << "h_random_row[" << xx << "]: " << h_random_row[xx];
455+
}
456+
delete h_random_row;
461457
delete[] h_walk;
462458
delete[] h_sample_keys;
463459
delete[] h_offset2idx;

paddle/fluid/framework/data_feed.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -870,8 +870,11 @@ struct BufState {
870870
}
871871

872872
int GetNextBatch() {
873+
cursor += len;
873874
int tmp_len = cursor + batch_size > row_num ? row_num - cursor : batch_size;
874-
cursor += tmp_len;
875+
if (tmp_len == 0) {
876+
return 0;
877+
}
875878
len = tmp_len;
876879
central_word = -1;
877880
step = -1;

paddle/fluid/framework/hogwild_worker.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,6 @@ void HogwildWorker::TrainFilesWithProfiler() {
179179
PrintFetchVars();
180180
#ifdef PADDLE_WITH_HETERPS
181181
dev_ctx_->Wait();
182-
VLOG(1) << "GpuPs worker " << thread_id_ << " train cost " << total_time
183-
<< " seconds, ins_num: " << total_inst;
184182
for (size_t i = 0; i < op_name.size(); ++i) {
185183
VLOG(1) << "card:" << thread_id_ << ", op: " << op_name[i]
186184
<< ", mean time: " << op_total_time[i] / total_inst
@@ -202,6 +200,9 @@ void HogwildWorker::TrainFilesWithProfiler() {
202200
thread_scope_->DropKids();
203201
timeline.Start();
204202
}
203+
VLOG(0) << "GpuPs worker " << thread_id_ << " train cost " << total_time
204+
<< " seconds, ins_num: " << total_inst << " read time: " << read_time
205+
<< "seconds ";
205206

206207
if (need_dump_field_ || need_dump_param_) {
207208
writer_.Flush();
@@ -256,7 +257,7 @@ void HogwildWorker::TrainFiles() {
256257
thread_scope_->DropKids();
257258
}
258259
timeline.Pause();
259-
VLOG(3) << "worker " << thread_id_ << " train cost " << timeline.ElapsedSec()
260+
VLOG(0) << "worker " << thread_id_ << " train cost " << timeline.ElapsedSec()
260261
<< " seconds, ins_num: " << total_ins_num;
261262

262263
if (need_dump_field_ || need_dump_param_) {

python/paddle/fluid/dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,11 +1041,9 @@ def _set_heter_ps(self, enable_heter_ps=False):
10411041
user no need to call this function.
10421042
"""
10431043
self.dataset.set_heter_ps(enable_heter_ps)
1044-
1044+
10451045
def set_graph_device_keys(self, device_keys):
10461046
"""
1047-
Set heter ps mode
1048-
user no need to call this function.
10491047
"""
10501048
self.dataset.set_graph_device_keys(device_keys)
10511049

@@ -1054,11 +1052,13 @@ def set_graph_config(self, config):
10541052
"""
10551053
self.proto_desc.graph_config.walk_degree = config.get("walk_degree", 1)
10561054
self.proto_desc.graph_config.walk_len = config.get("walk_len", 20)
1057-
self.proto_desc.graph_config.once_sample_startid_len = config.get("once_sample_startid_len", 8000)
1058-
self.proto_desc.graph_config.sample_times_one_chunk = config.get("sample_times_one_chunk", 10)
1055+
self.proto_desc.graph_config.window = config.get("window", 5)
1056+
self.proto_desc.graph_config.once_sample_startid_len = config.get(
1057+
"once_sample_startid_len", 8000)
1058+
self.proto_desc.graph_config.sample_times_one_chunk = config.get(
1059+
"sample_times_one_chunk", 10)
10591060
self.proto_desc.graph_config.batch_size = config.get("batch_size", 1)
10601061
self.proto_desc.graph_config.debug_mode = config.get("debug_mode", 0)
1061-
10621062

10631063

10641064
class QueueDataset(DatasetBase):

0 commit comments

Comments
 (0)