AnnaTrainingG · AnnaTrainingG · Aug 23, 2021 · Aug 11, 2021 · Aug 11, 2021 · Aug 11, 2021
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -49,4 +49,7 @@ repos:
         entry: python ./tools/codestyle/copyright.hook
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py|sh)$
-        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
+        exclude: |
+            (?x)^(
+                paddle/utils/.*
+            )$
diff --git a/cmake/cupti.cmake b/cmake/cupti.cmake
@@ -9,6 +9,7 @@ find_path(CUPTI_INCLUDE_DIR cupti.h
         $ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
         ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
         ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/include
+        ${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include
         NO_DEFAULT_PATH
         )
 

diff --git a/paddle/fluid/distributed/service/graph_brpc_client.cc b/paddle/fluid/distributed/service/graph_brpc_client.cc
@@ -479,6 +479,102 @@ std::future<int32_t> GraphBrpcClient::pull_graph_list(
                    closure);
   return fut;
 }
+
+std::future<int32_t> GraphBrpcClient::set_node_feat(
+    const uint32_t &table_id, const std::vector<uint64_t> &node_ids,
+    const std::vector<std::string> &feature_names,
+    const std::vector<std::vector<std::string>> &features) {
+  std::vector<int> request2server;
+  std::vector<int> server2request(server_size, -1);
+  for (int query_idx = 0; query_idx < node_ids.size(); ++query_idx) {
+    int server_index = get_server_index_by_id(node_ids[query_idx]);
+    if (server2request[server_index] == -1) {
+      server2request[server_index] = request2server.size();
+      request2server.push_back(server_index);
+    }
+  }
+  size_t request_call_num = request2server.size();
+  std::vector<std::vector<uint64_t>> node_id_buckets(request_call_num);
+  std::vector<std::vector<int>> query_idx_buckets(request_call_num);
+  std::vector<std::vector<std::vector<std::string>>> features_idx_buckets(
+      request_call_num);
+  for (int query_idx = 0; query_idx < node_ids.size(); ++query_idx) {
+    int server_index = get_server_index_by_id(node_ids[query_idx]);
+    int request_idx = server2request[server_index];
+    node_id_buckets[request_idx].push_back(node_ids[query_idx]);
+    query_idx_buckets[request_idx].push_back(query_idx);
+    if (features_idx_buckets[request_idx].size() == 0) {
+      features_idx_buckets[request_idx].resize(feature_names.size());
+    }
+    for (int feat_idx = 0; feat_idx < feature_names.size(); ++feat_idx) {
+      features_idx_buckets[request_idx][feat_idx].push_back(
+          features[feat_idx][query_idx]);
+    }
+  }
+
+  DownpourBrpcClosure *closure = new DownpourBrpcClosure(
+      request_call_num,
+      [&, node_id_buckets, query_idx_buckets, request_call_num](void *done) {
+        int ret = 0;
+        auto *closure = (DownpourBrpcClosure *)done;
+        size_t fail_num = 0;
+        for (int request_idx = 0; request_idx < request_call_num;
+             ++request_idx) {
+          if (closure->check_response(request_idx, PS_GRAPH_SET_NODE_FEAT) !=
+              0) {
+            ++fail_num;
+          }
+          if (fail_num == request_call_num) {
+            ret = -1;
+          }
+        }
+        closure->set_promise_value(ret);
+      });
+
+  auto promise = std::make_shared<std::promise<int32_t>>();
+  closure->add_promise(promise);
+  std::future<int> fut = promise->get_future();
+
+  for (int request_idx = 0; request_idx < request_call_num; ++request_idx) {
+    int server_index = request2server[request_idx];
+    closure->request(request_idx)->set_cmd_id(PS_GRAPH_SET_NODE_FEAT);
+    closure->request(request_idx)->set_table_id(table_id);
+    closure->request(request_idx)->set_client_id(_client_id);
+    size_t node_num = node_id_buckets[request_idx].size();
+
+    closure->request(request_idx)
+        ->add_params((char *)node_id_buckets[request_idx].data(),
+                     sizeof(uint64_t) * node_num);
+    std::string joint_feature_name =
+        paddle::string::join_strings(feature_names, '\t');
+    closure->request(request_idx)
+        ->add_params(joint_feature_name.c_str(), joint_feature_name.size());
+
+    // set features
+    std::string set_feature = "";
+    for (size_t feat_idx = 0; feat_idx < feature_names.size(); ++feat_idx) {
+      for (size_t node_idx = 0; node_idx < node_num; ++node_idx) {
+        size_t feat_len =
+            features_idx_buckets[request_idx][feat_idx][node_idx].size();
+        set_feature.append((char *)&feat_len, sizeof(size_t));
+        set_feature.append(
+            features_idx_buckets[request_idx][feat_idx][node_idx].data(),
+            feat_len);
+      }
+    }
+    closure->request(request_idx)
+        ->add_params(set_feature.c_str(), set_feature.size());
+
+    GraphPsService_Stub rpc_stub =
+        getServiceStub(get_cmd_channel(server_index));
+    closure->cntl(request_idx)->set_log_id(butil::gettimeofday_ms());
+    rpc_stub.service(closure->cntl(request_idx), closure->request(request_idx),
+                     closure->response(request_idx), closure);
+  }
+
+  return fut;
+}
+
 int32_t GraphBrpcClient::initialize() {
   // set_shard_num(_config.shard_num());
   BrpcPsClient::initialize();

diff --git a/paddle/fluid/distributed/service/graph_brpc_client.h b/paddle/fluid/distributed/service/graph_brpc_client.h
@@ -79,6 +79,11 @@ class GraphBrpcClient : public BrpcPsClient {
       const std::vector<std::string>& feature_names,
       std::vector<std::vector<std::string>>& res);
 
+  virtual std::future<int32_t> set_node_feat(
+      const uint32_t& table_id, const std::vector<uint64_t>& node_ids,
+      const std::vector<std::string>& feature_names,
+      const std::vector<std::vector<std::string>>& features);
+
   virtual std::future<int32_t> clear_nodes(uint32_t table_id);
   virtual std::future<int32_t> add_graph_node(
       uint32_t table_id, std::vector<uint64_t>& node_id_list,

diff --git a/paddle/fluid/distributed/service/graph_brpc_server.cc b/paddle/fluid/distributed/service/graph_brpc_server.cc
@@ -16,6 +16,7 @@
 #include "paddle/fluid/distributed/service/brpc_ps_server.h"
 
 #include <thread>  // NOLINT
+#include <utility>
 #include "butil/endpoint.h"
 #include "iomanip"
 #include "paddle/fluid/distributed/service/brpc_ps_client.h"
@@ -157,6 +158,8 @@ int32_t GraphBrpcService::initialize() {
       &GraphBrpcService::add_graph_node;
   _service_handler_map[PS_GRAPH_REMOVE_GRAPH_NODE] =
       &GraphBrpcService::remove_graph_node;
+  _service_handler_map[PS_GRAPH_SET_NODE_FEAT] =
+      &GraphBrpcService::graph_set_node_feat;
   // shard初始化,server启动后才可从env获取到server_list的shard信息
   initialize_shard_info();
 
@@ -400,5 +403,44 @@ int32_t GraphBrpcService::graph_get_node_feat(Table *table,
 
   return 0;
 }
+
+int32_t GraphBrpcService::graph_set_node_feat(Table *table,
+                                              const PsRequestMessage &request,
+                                              PsResponseMessage &response,
+                                              brpc::Controller *cntl) {
+  CHECK_TABLE_EXIST(table, request, response)
+  if (request.params_size() < 3) {
+    set_response_code(
+        response, -1,
+        "graph_set_node_feat request requires at least 2 arguments");
+    return 0;
+  }
+  size_t node_num = request.params(0).size() / sizeof(uint64_t);
+  uint64_t *node_data = (uint64_t *)(request.params(0).c_str());
+  std::vector<uint64_t> node_ids(node_data, node_data + node_num);
+
+  std::vector<std::string> feature_names =
+      paddle::string::split_string<std::string>(request.params(1), "\t");
+
+  std::vector<std::vector<std::string>> features(
+      feature_names.size(), std::vector<std::string>(node_num));
+
+  const char *buffer = request.params(2).c_str();
+
+  for (size_t feat_idx = 0; feat_idx < feature_names.size(); ++feat_idx) {
+    for (size_t node_idx = 0; node_idx < node_num; ++node_idx) {
+      size_t feat_len = *(size_t *)(buffer);
+      buffer += sizeof(size_t);
+      auto feat = std::string(buffer, feat_len);
+      features[feat_idx][node_idx] = feat;
+      buffer += feat_len;
+    }
+  }
+
+  ((GraphTable *)table)->set_node_feat(node_ids, feature_names, features);
+
+  return 0;
+}
+
 }  // namespace distributed
 }  // namespace paddle
diff --git a/paddle/fluid/distributed/service/graph_brpc_server.h b/paddle/fluid/distributed/service/graph_brpc_server.h
@@ -83,9 +83,13 @@ class GraphBrpcService : public PsBaseService {
                                     const PsRequestMessage &request,
                                     PsResponseMessage &response,
                                     brpc::Controller *cntl);
+
   int32_t graph_get_node_feat(Table *table, const PsRequestMessage &request,
                               PsResponseMessage &response,
                               brpc::Controller *cntl);
+  int32_t graph_set_node_feat(Table *table, const PsRequestMessage &request,
+                              PsResponseMessage &response,
+                              brpc::Controller *cntl);
   int32_t clear_nodes(Table *table, const PsRequestMessage &request,
                       PsResponseMessage &response, brpc::Controller *cntl);
   int32_t add_graph_node(Table *table, const PsRequestMessage &request,

diff --git a/paddle/fluid/distributed/service/graph_py_service.cc b/paddle/fluid/distributed/service/graph_py_service.cc
@@ -330,6 +330,19 @@ std::vector<std::vector<std::string>> GraphPyClient::get_node_feat(
   return v;
 }
 
+void GraphPyClient::set_node_feat(
+    std::string node_type, std::vector<uint64_t> node_ids,
+    std::vector<std::string> feature_names,
+    const std::vector<std::vector<std::string>> features) {
+  if (this->table_id_map.count(node_type)) {
+    uint32_t table_id = this->table_id_map[node_type];
+    auto status =
+        worker_ptr->set_node_feat(table_id, node_ids, feature_names, features);
+    status.wait();
+  }
+  return;
+}
+
 std::vector<FeatureNode> GraphPyClient::pull_graph_list(std::string name,
                                                         int server_index,
                                                         int start, int size,

diff --git a/paddle/fluid/distributed/service/graph_py_service.h b/paddle/fluid/distributed/service/graph_py_service.h
@@ -155,6 +155,9 @@ class GraphPyClient : public GraphPyService {
   std::vector<std::vector<std::string>> get_node_feat(
       std::string node_type, std::vector<uint64_t> node_ids,
       std::vector<std::string> feature_names);
+  void set_node_feat(std::string node_type, std::vector<uint64_t> node_ids,
+                     std::vector<std::string> feature_names,
+                     const std::vector<std::vector<std::string>> features);
   std::vector<FeatureNode> pull_graph_list(std::string name, int server_index,
                                            int start, int size, int step = 1);
   ::paddle::distributed::PSParameter GetWorkerProto();

diff --git a/paddle/fluid/distributed/service/sendrecv.proto b/paddle/fluid/distributed/service/sendrecv.proto
@@ -55,6 +55,7 @@ enum PsCmdID {
   PS_GRAPH_CLEAR = 34;
   PS_GRAPH_ADD_GRAPH_NODE = 35;
   PS_GRAPH_REMOVE_GRAPH_NODE = 36;
+  PS_GRAPH_SET_NODE_FEAT = 37;
 }
 
 message PsRequestMessage {

diff --git a/paddle/fluid/distributed/table/common_graph_table.cc b/paddle/fluid/distributed/table/common_graph_table.cc
@@ -469,6 +469,34 @@ int32_t GraphTable::get_node_feat(const std::vector<uint64_t> &node_ids,
   return 0;
 }
 
+int32_t GraphTable::set_node_feat(
+    const std::vector<uint64_t> &node_ids,
+    const std::vector<std::string> &feature_names,
+    const std::vector<std::vector<std::string>> &res) {
+  size_t node_num = node_ids.size();
+  std::vector<std::future<int>> tasks;
+  for (size_t idx = 0; idx < node_num; ++idx) {
+    uint64_t node_id = node_ids[idx];
+    tasks.push_back(_shards_task_pool[get_thread_pool_index(node_id)]->enqueue(
+        [&, idx, node_id]() -> int {
+          size_t index = node_id % this->shard_num - this->shard_start;
+          auto node = shards[index].add_feature_node(node_id);
+          node->set_feature_size(this->feat_name.size());
+          for (int feat_idx = 0; feat_idx < feature_names.size(); ++feat_idx) {
+            const std::string &feature_name = feature_names[feat_idx];
+            if (feat_id_map.find(feature_name) != feat_id_map.end()) {
+              node->set_feature(feat_id_map[feature_name], res[feat_idx][idx]);
+            }
+          }
+          return 0;
+        }));
+  }
+  for (size_t idx = 0; idx < node_num; ++idx) {
+    tasks[idx].get();
+  }
+  return 0;
+}
+
 std::pair<int32_t, std::string> GraphTable::parse_feature(
     std::string feat_str) {
   // Return (feat_id, btyes) if name are in this->feat_name, else return (-1,

diff --git a/paddle/fluid/distributed/table/common_graph_table.h b/paddle/fluid/distributed/table/common_graph_table.h
@@ -46,6 +46,7 @@ class GraphShard {
     }
     return res;
   }
+
   GraphNode *add_graph_node(uint64_t id);
   FeatureNode *add_feature_node(uint64_t id);
   Node *find_node(uint64_t id);
@@ -122,6 +123,11 @@ class GraphTable : public SparseTable {
                                 const std::vector<std::string> &feature_names,
                                 std::vector<std::vector<std::string>> &res);
 
+  virtual int32_t set_node_feat(
+      const std::vector<uint64_t> &node_ids,
+      const std::vector<std::string> &feature_names,
+      const std::vector<std::vector<std::string>> &res);
+
  protected:
   std::vector<GraphShard> shards;
   size_t shard_start, shard_end, server_num, shard_num_per_table, shard_num;

diff --git a/paddle/fluid/distributed/test/graph_node_test.cc b/paddle/fluid/distributed/test/graph_node_test.cc
@@ -558,6 +558,17 @@ void RunBrpcPushSparse() {
   VLOG(0) << "get_node_feat: " << node_feat[1][0];
   VLOG(0) << "get_node_feat: " << node_feat[1][1];
 
+  node_feat[1][0] = "helloworld";
+
+  client1.set_node_feat(std::string("user"), node_ids, feature_names,
+                        node_feat);
+
+  // sleep(5);
+  node_feat =
+      client1.get_node_feat(std::string("user"), node_ids, feature_names);
+  VLOG(0) << "get_node_feat: " << node_feat[1][0];
+  ASSERT_TRUE(node_feat[1][0] == "helloworld");
+
   // Test string
   node_ids.clear();
   node_ids.push_back(37);

diff --git a/paddle/fluid/extension/include/ext_tensor.h b/paddle/fluid/extension/include/ext_tensor.h
@@ -88,10 +88,20 @@ class PD_DLL_DECL Tensor {
   /// It's usually used to set the input tensor data.
   /// \param PlaceType of target place, of which
   /// the tensor will copy to.
-
   template <typename T>
   Tensor copy_to(const PlaceType& place) const;
 
+  /// \brief Return a sub-tensor of the given tensor.
+  /// It is usually used to extract a sub-tensor (which supports
+  /// modifying the data of the original tensor) to perform further
+  /// operations.
+  /// \param begin_idx The index of the start row (inclusive) to slice.
+  ///                  The index number begins from 0.
+  /// \param end_idx  The index of the end row (exclusive) to slice.
+  ///                 The index number begins from begin_idx + 1.
+  /// \return The sliced tensor.
+  Tensor slice(const int64_t begin_idx, const int64_t end_idx) const;
+
   /// \brief Return the shape of the Tensor.
   std::vector<int64_t> shape() const;
 

diff --git a/paddle/fluid/extension/src/ext_tensor.cc b/paddle/fluid/extension/src/ext_tensor.cc
@@ -124,6 +124,21 @@ void DeviceCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,
   }                                                     \
   auto *tensor = static_cast<framework::LoDTensor *>(tensor_.get());
 
+#define GET_INNER_PLACE                               \
+  platform::Place place;                              \
+  switch (place_) {                                   \
+    case PlaceType::kCPU:                             \
+      place = platform::CPUPlace();                   \
+      break;                                          \
+    case PlaceType::kGPU:                             \
+      place = platform::CUDAPlace();                  \
+      break;                                          \
+    default:                                          \
+      PADDLE_THROW(platform::errors::Unavailable(     \
+          "Custom operator unsupported place id(%d)", \
+          static_cast<int>(place_)));                 \
+  }
+
 void Tensor::reshape(const std::vector<int64_t> &shape) {
   GET_CASTED_TENSOR
   auto new_dim = framework::make_ddim(shape);
@@ -257,6 +272,16 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const {
   return target;
 }
 
+Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const {
+  GET_CASTED_TENSOR
+  GET_INNER_PLACE
+  framework::Tensor intermediate = tensor->Slice(begin_idx, end_idx);
+  Tensor target = Tensor(place_);
+  framework::CustomTensorUtils::ShareDataFrom(
+      static_cast<const void *>(&intermediate), target);
+  return target;
+}
+
 template PD_DLL_DECL Tensor
 Tensor::copy_to<float>(const PlaceType &target_place) const;
 template PD_DLL_DECL Tensor

diff --git a/paddle/fluid/extension/src/ext_tensor.cu b/paddle/fluid/extension/src/ext_tensor.cu
@@ -0,0 +1 @@
+ext_tensor.cc