@@ -256,7 +256,6 @@ __global__ void GraphDoWalkKernel(int64_t *neighbors, int64_t *walk,
256256 size_t col = step;
257257 size_t offset = (row * col_size + col);
258258 walk[offset] = neighbors[i * cur_degree + k];
259- id_cnt[row] += 1 ;
260259 }
261260 }
262261}
@@ -366,7 +365,7 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
366365 h_sample_keys = new int64_t [once_max_sample_keynum];
367366 h_offset2idx = new int [once_max_sample_keynum];
368367 h_len_per_row = new int [once_max_sample_keynum];
369- h_prefix_sum = new int64_t [100 ];
368+ h_prefix_sum = new int64_t [once_max_sample_keynum + 1 ];
370369 }
371370 // /////
372371 auto gpu_graph_ptr = GraphGpuWrapper::GetInstance ();
@@ -378,7 +377,9 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
378377 stream_);
379378 int i = 0 ;
380379 int total_row = 0 ;
381- while (i < buf_size_) {
380+ int remain_size =
381+ buf_size_ - walk_degree_ * once_sample_startid_len_ * walk_len_;
382+ while (i <= remain_size) {
382383 int tmp_len = cursor_ + once_sample_startid_len_ > device_key_size_
383384 ? device_key_size_ - cursor_
384385 : once_sample_startid_len_;
@@ -389,7 +390,6 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
389390 << " tmp_len = " << tmp_len << " cursor = " << cursor_
390391 << " once_max_sample_keynum = " << once_max_sample_keynum;
391392 int64_t *cur_walk = walk + i;
392- len_per_row += once_max_sample_keynum;
393393
394394 if (debug_mode_) {
395395 cudaMemcpy (h_walk, walk, buf_size_ * sizeof (int64_t ),
@@ -408,14 +408,9 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
408408 if (debug_mode_) {
409409 cudaMemcpy (h_walk, walk, buf_size_ * sizeof (int64_t ),
410410 cudaMemcpyDeviceToHost);
411- cudaMemcpy (h_len_per_row, len_per_row,
412- once_max_sample_keynum * sizeof (int ), cudaMemcpyDeviceToHost);
413411 for (int xx = 0 ; xx < buf_size_; xx++) {
414412 VLOG (2 ) << " h_walk[" << xx << " ]: " << h_walk[xx];
415413 }
416- for (int xx = 0 ; xx < once_max_sample_keynum; xx++) {
417- VLOG (2 ) << " h_len_per_row[" << xx << " ]: " << h_len_per_row[xx];
418- }
419414 }
420415 // ///////
421416 step++;
@@ -433,12 +428,6 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
433428 for (int xx = 0 ; xx < buf_size_; xx++) {
434429 VLOG (2 ) << " h_walk[" << xx << " ]: " << h_walk[xx];
435430 }
436- cudaMemcpy (h_len_per_row, len_per_row,
437- once_max_sample_keynum * sizeof (int ),
438- cudaMemcpyDeviceToHost);
439- for (int xx = 0 ; xx < once_max_sample_keynum; xx++) {
440- VLOG (2 ) << " h_len_per_row[" << xx << " ]: " << h_len_per_row[xx];
441- }
442431 }
443432 }
444433 cursor_ += tmp_len;
@@ -458,6 +447,13 @@ int GraphDataGenerator::FillWalkBuf(std::shared_ptr<phi::Allocation> d_walk) {
458447 shuffle_seed_ = engine ();
459448
460449 if (debug_mode_) {
450+ int *h_random_row = new int [total_row + 10 ];
451+ cudaMemcpy (h_random_row, d_random_row, total_row * sizeof (int ),
452+ cudaMemcpyDeviceToHost);
453+ for (int xx = 0 ; xx < total_row; xx++) {
454+ VLOG (2 ) << " h_random_row[" << xx << " ]: " << h_random_row[xx];
455+ }
456+ delete h_random_row;
461457 delete[] h_walk;
462458 delete[] h_sample_keys;
463459 delete[] h_offset2idx;
0 commit comments