PaddlePaddle · seiriosPlus · Jul 22, 2019 · Jul 15, 2019 · Jul 16, 2019 · Jul 16, 2019
diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""HDFS Utils"""
+"""hdfs_utils.py will move to fluid/incubate/fleet/utils/hdfs.py"""
 
 import os
 import sys

diff --git a/python/paddle/fluid/contrib/utils/lookup_table_utils.py b/python/paddle/fluid/contrib/utils/lookup_table_utils.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""lookup_table_utils.py will move to fluid/incubate/fleet/utils/lookup_table.py"""
 
 from __future__ import print_function
 

diff --git a/python/paddle/fluid/incubate/fleet/base/fleet_base.py b/python/paddle/fluid/incubate/fleet/base/fleet_base.py
@@ -157,19 +157,26 @@ def split_files(self, files):
         Returns:
             list: files belongs to this worker.
         """
-        file_num = len(files)
         trainer_id = self.worker_index()
-        trainer_num = self.worker_num()
-        if trainer_num > file_num:
-            raise ValueError("trainer_num should be <= file_num : "
-                             "%s > %s" % (trainer_num, file_num))
-        start = 0
-        end = 0
-        for i in range(0, trainer_id + 1):
-            length = file_num / trainer_num + (i < (file_num % trainer_num))
-            start = end
-            end += length
-        return files[start:end]
+        trainers = self.worker_num()
+
+        if len(files) < trainers:
+            raise ValueError("file number must gather or equal trainer number")
+
+        remainder = len(files) % trainers
+        blocksize = len(files) / trainers
+
+        blocks = [blocksize] * trainers
+        for i in range(remainder):
+            blocks[i] += 1
+
+        trainer_files = [[]] * trainers
+        begin = 0
+        for i in range(trainers):
+            trainer_files[i] = files[begin:begin + blocks[i]]
+            begin += blocks[i]
+
+        return trainer_files[trainer_id]
 
     def init(self, role_maker=None):
         """

diff --git a/python/paddle/fluid/incubate/fleet/base/role_maker.py b/python/paddle/fluid/incubate/fleet/base/role_maker.py
@@ -102,6 +102,11 @@ def get_pserver_endpoints(self):
         """
         return self._server_endpoints
 
+    def to_string(self):
+        return "role: {}, current_id: {}, worker_endpoints: {}, server_endpoints: {}".format(
+            self._role, self._current_id, self._worker_endpoints,
+            self._server_endpoints)
+
 
 class MPIRoleMaker(RoleMakerBase):
     """

diff --git a/python/paddle/fluid/incubate/fleet/utils/__init__.py b/python/paddle/fluid/incubate/fleet/utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.