@@ -38,59 +38,6 @@ DLManager& global_dlmanager_pool() {
3838 return manager;
3939}
4040
41- void GraphDataGenerator::AllocResource (const paddle::platform::Place& place,
42- std::vector<LoDTensor*> feed_vec,
43- std::vector<uint64_t >* h_device_keys) {
44- place_ = place;
45- gpuid_ = place_.GetDeviceId ();
46- VLOG (3 ) << " gpuid " << gpuid_;
47- stream_ = dynamic_cast <platform::CUDADeviceContext*>(
48- platform::DeviceContextPool::Instance ().Get (place))
49- ->stream ();
50- feed_vec_ = feed_vec;
51- h_device_keys_ = h_device_keys;
52- device_key_size_ = h_device_keys_->size ();
53- d_device_keys_ =
54- memory::AllocShared (place_, device_key_size_ * sizeof (uint64_t ));
55- CUDA_CHECK (cudaMemcpyAsync (d_device_keys_->ptr (), h_device_keys_->data (),
56- device_key_size_ * sizeof (uint64_t ),
57- cudaMemcpyHostToDevice, stream_));
58- size_t once_max_sample_keynum = walk_degree_ * once_sample_startid_len_;
59- d_prefix_sum_ =
60- memory::AllocShared (place_, (once_max_sample_keynum + 1 ) * sizeof (int ));
61- int * d_prefix_sum_ptr = reinterpret_cast <int *>(d_prefix_sum_->ptr ());
62- cudaMemsetAsync (d_prefix_sum_ptr, 0 ,
63- (once_max_sample_keynum + 1 ) * sizeof (int ), stream_);
64- cursor_ = 0 ;
65- jump_rows_ = 0 ;
66- device_keys_ = reinterpret_cast <uint64_t *>(d_device_keys_->ptr ());
67- d_walk_ = memory::AllocShared (place_, buf_size_ * sizeof (uint64_t ));
68- cudaMemsetAsync (d_walk_->ptr (), 0 , buf_size_ * sizeof (uint64_t ), stream_);
69- d_sample_keys_ =
70- memory::AllocShared (place_, once_max_sample_keynum * sizeof (uint64_t ));
71-
72- d_sampleidx2rows_.push_back (
73- memory::AllocShared (place_, once_max_sample_keynum * sizeof (int )));
74- d_sampleidx2rows_.push_back (
75- memory::AllocShared (place_, once_max_sample_keynum * sizeof (int )));
76- cur_sampleidx2row_ = 0 ;
77-
78- d_len_per_row_ =
79- memory::AllocShared (place_, once_max_sample_keynum * sizeof (int ));
80- for (int i = -window_; i < 0 ; i++) {
81- window_step_.push_back (i);
82- }
83- for (int i = 0 ; i < window_; i++) {
84- window_step_.push_back (i + 1 );
85- }
86- buf_state_.Init (batch_size_, walk_len_, &window_step_);
87- d_random_row_ = memory::AllocShared (
88- place_,
89- (once_sample_startid_len_ * walk_degree_ * repeat_time_) * sizeof (int ));
90- shuffle_seed_ = 0 ;
91- cudaStreamSynchronize (stream_);
92- }
93-
9441class BufferedLineFileReader {
9542 typedef std::function<bool ()> SampleFunc;
9643 static const int MAX_FILE_BUFF_SIZE = 4 * 1024 * 1024 ;
@@ -2643,8 +2590,7 @@ bool SlotRecordInMemoryDataFeed::Start() {
26432590#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
26442591 CHECK (paddle::platform::is_gpu_place (this ->place_ ));
26452592 pack_ = BatchGpuPackMgr ().get (this ->GetPlace (), used_slots_info_);
2646- gpu_graph_data_generator_.AllocResource (this ->place_ , feed_vec_,
2647- h_device_keys_);
2593+ gpu_graph_data_generator_.AllocResource (this ->place_ , feed_vec_);
26482594#endif
26492595 return true ;
26502596}
0 commit comments