introduce WeightBalancedLibra algorithm(vertex-cut graph partition) (#249)

suncanghuai · suncanghuai · web-flow · commit 0c41df752ac2 · 2022-11-25T07:52:17.000+01:00
* implement WeightBalancedLibra algorithm based on paper pseudocode
* add weight-related apis for class CoordinatedPartitionState
* adjust set operations of class CoordinatedRecord
* append a testcase in PartitionTest.cpp for WB-Libra

Co-authored-by: suncanghuai &lt;suncanghuai@hesaitech.com&gt;
diff --git a/include/Graph/Graph.hpp b/include/Graph/Graph.hpp
@@ -2486,10 +2486,10 @@ namespace CXXGRAPH
 	}
 
 	template <typename T>
-	PartitionMap<T> Graph<T>::partitionGraph(PARTITIONING::PartitionAlgorithm algorithm, unsigned int numberOfPartitions, double param1, double param2, double param3, unsigned int numberOfthreads) const
+	PartitionMap<T> Graph<T>::partitionGraph(PARTITIONING::PartitionAlgorithm algorithm, unsigned int numberOfPartitions, double param1, double param2, double param3, unsigned int numberOfThreads) const
 	{
 		PartitionMap<T> partitionMap;
-		PARTITIONING::Globals globals(numberOfPartitions, algorithm, param1, param2, param3, numberOfthreads);
+		PARTITIONING::Globals globals(numberOfPartitions, algorithm, param1, param2, param3, numberOfThreads);
 		const T_EdgeSet<T> & edgeSet = getEdgeSet();
 		globals.edgeCardinality = edgeSet.size();
 		globals.vertexCardinality = this->getNodeSet().size();
diff --git a/include/Partitioning/CoordinatedPartitionState.hpp b/include/Partitioning/CoordinatedPartitionState.hpp
@@ -42,12 +42,14 @@ namespace CXXGRAPH
         private:
             std::map<int, std::shared_ptr<CoordinatedRecord<T>>> record_map;
             std::vector<int> machines_load_edges;
+            std::vector<double> machines_weight_edges;
             std::vector<int> machines_load_vertices;
             PartitionMap<T> partition_map;
             Globals GLOBALS;
             int MAX_LOAD;
             std::shared_ptr<std::mutex> machines_load_edges_mutex = nullptr;
             std::shared_ptr<std::mutex> machines_load_vertices_mutex = nullptr;
+            std::shared_ptr<std::mutex> machines_weight_edges_mutex = nullptr;
             std::shared_ptr<std::mutex> record_map_mutex = nullptr;
             //DatWriter out; //to print the final partition of each edge
         public:
@@ -56,10 +58,14 @@ namespace CXXGRAPH
 
             std::shared_ptr<Record<T>> getRecord(int x);
             int getMachineLoad(int m);
+            int getMachineWeight(int m);
             int getMachineLoadVertices(int m);
             void incrementMachineLoad(int m, const Edge<T> *e);
+            void incrementMachineWeight(int m, const Edge<T> *e);
             int getMinLoad();
             int getMaxLoad();
+            int getMachineWithMinWeight();
+            int getMachineWithMinWeight(const std::set<int> &partitions);
             std::vector<int> getMachines_load();
             int getTotalReplicas();
             int getNumVertices();
@@ -70,12 +76,22 @@ namespace CXXGRAPH
             const PartitionMap<T> &getPartitionMap();
         };
         template <typename T>
-        CoordinatedPartitionState<T>::CoordinatedPartitionState(Globals &G) : record_map(), GLOBALS(G), machines_load_edges_mutex(std::make_shared<std::mutex>()), machines_load_vertices_mutex(std::make_shared<std::mutex>()), record_map_mutex(std::make_shared<std::mutex>())
+        CoordinatedPartitionState<T>::CoordinatedPartitionState(Globals &G) 
+            : record_map(), 
+              GLOBALS(G),
+              machines_load_edges_mutex(std::make_shared<std::mutex>()),
+              machines_load_vertices_mutex(std::make_shared<std::mutex>()),
+              machines_weight_edges_mutex(std::make_shared<std::mutex>()), 
+              record_map_mutex(std::make_shared<std::mutex>())
         {
+            machines_load_edges.reserve(GLOBALS.numberOfPartition);
+            machines_load_vertices.reserve(GLOBALS.numberOfPartition);
+            machines_weight_edges.reserve(GLOBALS.numberOfPartition);
             for (int i = 0; i < GLOBALS.numberOfPartition; ++i)
             {
                 machines_load_edges.push_back(0);
                 machines_load_vertices.push_back(0);
+                machines_weight_edges.push_back(0);
                 partition_map[i] = std::make_shared<PARTITIONING::Partition<T>>(i);
             }
             MAX_LOAD = 0;
@@ -102,6 +118,13 @@ namespace CXXGRAPH
             return machines_load_edges.at(m);
         }
 
+        template <typename T>
+        int CoordinatedPartitionState<T>::getMachineWeight(int m)
+        {
+            std::lock_guard<std::mutex> lock(*machines_weight_edges_mutex);
+            return machines_weight_edges.at(m);
+        }
+
         template <typename T>
         int CoordinatedPartitionState<T>::getMachineLoadVertices(int m)
         {
@@ -121,6 +144,23 @@ namespace CXXGRAPH
             partition_map[m]->addEdge(e);
         }
         template <typename T>
+        void CoordinatedPartitionState<T>::incrementMachineWeight(int m, const Edge<T> *e)
+        {
+            std::lock_guard<std::mutex> lock(*machines_weight_edges_mutex);
+            double edge_weight = CXXGRAPH::NEGLIGIBLE_WEIGHT;
+            if (e->isWeighted().has_value() && e->isWeighted().value())
+            {
+                edge_weight = (dynamic_cast<const Weighted *>(e))->getWeight();               
+            }
+            machines_weight_edges[m] = machines_weight_edges[m] + edge_weight;
+            //double new_value = machines_weight_edges[m];
+            //if (new_value > MAX_LOAD)
+            //{
+            //    MAX_LOAD = new_value;
+            //}
+            partition_map[m]->addEdge(e);
+        }
+        template <typename T>
         int CoordinatedPartitionState<T>::getMinLoad()
         {
             std::lock_guard<std::mutex> lock(*machines_load_edges_mutex);
@@ -141,6 +181,44 @@ namespace CXXGRAPH
             return MAX_LOAD;
         }
         template <typename T>
+        int CoordinatedPartitionState<T>::getMachineWithMinWeight()
+        {
+            std::lock_guard<std::mutex> lock(*machines_weight_edges_mutex);
+            
+            double MIN_LOAD = std::numeric_limits<double>::max();
+            int machine_id = 0;
+            for (int i = 0; i < machines_weight_edges.size(); ++i)
+            {
+                double loadi = machines_weight_edges[i];
+                if (loadi < MIN_LOAD)
+                {
+                    MIN_LOAD = loadi;
+                    machine_id = i;
+                }
+            }
+
+            return machine_id;
+        }
+        template <typename T>
+        int CoordinatedPartitionState<T>::getMachineWithMinWeight(const std::set<int> &partitions)
+        {
+            std::lock_guard<std::mutex> lock(*machines_weight_edges_mutex);
+            
+            double MIN_LOAD = std::numeric_limits<double>::max();
+            int machine_id = 0;
+            for (const auto &partition_id : partitions)
+            {
+                double loadi = machines_weight_edges.at(partition_id);
+                if (loadi < MIN_LOAD)
+                {
+                    MIN_LOAD = loadi;
+                    machine_id = partition_id;
+                }
+            }
+
+            return machine_id;
+        }
+        template <typename T>
         std::vector<int> CoordinatedPartitionState<T>::getMachines_load()
         {
             std::lock_guard<std::mutex> lock(*machines_load_edges_mutex);
diff --git a/include/Partitioning/CoordinatedRecord.hpp b/include/Partitioning/CoordinatedRecord.hpp
@@ -52,9 +52,35 @@ namespace CXXGRAPH
             void incrementDegree();
 
             void addAll(std::set<int> &set);
-            std::set<int> intersection(CoordinatedRecord &x, CoordinatedRecord &y);
+            std::set<int> partition_intersection(std::shared_ptr<CoordinatedRecord> other);
+            std::set<int> partition_union(std::shared_ptr<CoordinatedRecord> other);
+            std::set<int> partition_difference(std::shared_ptr<CoordinatedRecord> other);
         };
         template <typename T>
+        std::set<int> CoordinatedRecord<T>::partition_intersection(std::shared_ptr<CoordinatedRecord> other)
+        {
+            std::set<int> result;
+            set_intersection(this->partitions.begin(), this->partitions.end(), other->partitions.begin(), other->partitions.end(),
+                             std::inserter(result, result.begin()));
+            return result;
+        }
+        template <typename T>
+        std::set<int> CoordinatedRecord<T>::partition_union(std::shared_ptr<CoordinatedRecord> other)
+        {
+            std::set<int> result;
+            set_union(this->partitions.begin(), this->partitions.end(), other->partitions.begin(), other->partitions.end(),
+                             std::inserter(result, result.begin()));
+            return result;
+        }
+        template <typename T>
+        std::set<int> CoordinatedRecord<T>::partition_difference(std::shared_ptr<CoordinatedRecord> other)
+        {
+            std::set<int> result;
+            set_difference(this->partitions.begin(), this->partitions.end(), other->partitions.begin(), other->partitions.end(),
+                             std::inserter(result, result.begin()));
+            return result;
+        }
+        template <typename T>
         CoordinatedRecord<T>::CoordinatedRecord() : partitions()
         {
             lock = new std::mutex();
@@ -120,14 +146,6 @@ namespace CXXGRAPH
         {
             partitions.insert(set.begin(), set.end());
         }
-        template <typename T>
-        std::set<int> CoordinatedRecord<T>::intersection(CoordinatedRecord &x, CoordinatedRecord &y)
-        {
-            std::set<int> result;
-            set_intersection(x.partitions.begin(), x.partitions.end(), y.partitions.begin(), y.partitions.end(),
-                             std::inserter(result, result.begin()));
-            return result;
-        }
     }
 }
 
diff --git a/include/Partitioning/PartitionAlgorithm.hpp b/include/Partitioning/PartitionAlgorithm.hpp
@@ -33,7 +33,8 @@ namespace CXXGRAPH
             GREEDY_VC_ALG, ///< A Greedy Algorithm
             HDRF_ALG,      ///< High-Degree (are) Replicated First (HDRF) Algorithm (Stream-Based Vertex-Cut Partitioning)
             EBV_ALG,       ///< Edge-Balanced Vertex-Cut Offline Algorithm (EBV)
-            ALG_2
+            ALG_2,
+            WB_LIBRA,           ///< Weighted Balanced Libra
         };
         typedef E_PartitionAlgorithm PartitionAlgorithm;
     }
diff --git a/include/Partitioning/PartitionState.hpp b/include/Partitioning/PartitionState.hpp
@@ -34,10 +34,14 @@ namespace CXXGRAPH
         public:
             virtual std::shared_ptr<Record<T>> getRecord(int x) = 0;
             virtual int getMachineLoad(int m) = 0;
+            virtual int getMachineWeight(int m) = 0;
             virtual int getMachineLoadVertices(int m) = 0;
-            virtual void incrementMachineLoad(int m,const Edge<T>* e) = 0;
+            virtual void incrementMachineLoad(int m, const Edge<T>* e) = 0;
+            virtual void incrementMachineWeight(int m, const Edge<T>* e) = 0;
             virtual int getMinLoad() = 0;
             virtual int getMaxLoad() = 0;
+            virtual int getMachineWithMinWeight() = 0;
+            virtual int getMachineWithMinWeight(const std::set<int> &partitions) = 0;
             virtual std::vector<int> getMachines_load() = 0;
             virtual int getTotalReplicas() = 0;
             virtual int getNumVertices() = 0;
diff --git a/include/Partitioning/Partitioner.hpp b/include/Partitioning/Partitioner.hpp
@@ -33,6 +33,7 @@
 #include "EdgeBalancedVertexCut.hpp"
 #include "GreedyVertexCut.hpp"
 #include "EBV.hpp"
+#include "WeightBalancedLibra.hpp"
 
 namespace CXXGRAPH
 {
@@ -73,8 +74,32 @@ namespace CXXGRAPH
             } else if (GLOBALS.partitionStategy == PartitionAlgorithm::EBV_ALG)
             {
                 algorithm = new EBV<T>(GLOBALS);
-            }
+            } else if (GLOBALS.partitionStategy == PartitionAlgorithm::WB_LIBRA)
+            {
+                // precompute weight sum
+                double weight_sum = 0.0;
+                for (const auto &edge_it : *(this->dataset))
+                {
+                    weight_sum += (edge_it->isWeighted().has_value() && edge_it->isWeighted().value()) ? dynamic_cast<const Weighted *>(edge_it)->getWeight() : CXXGRAPH::NEGLIGIBLE_WEIGHT;
+                }
+                double lambda = std::max(1.0, GLOBALS.param1);
+                double P = static_cast<double>(GLOBALS.numberOfPartition);        
+                // avoid divide by zero when some parameters are invalid  
+                double weight_sum_bound = (GLOBALS.numberOfPartition == 0) ? 0.0 : lambda * weight_sum / P;
 
+                // precompute degrees of vertices
+                std::unordered_map<std::size_t, int> vertices_degrees; 
+                for (const auto &edge_it : *(this->dataset))
+                {
+                    auto nodePair = edge_it->getNodePair();
+                    std::size_t u = nodePair.first->getId();
+                    std::size_t v = nodePair.second->getId();
+                    vertices_degrees[u]++;
+                    vertices_degrees[v]++;
+                }
+
+                algorithm = new WeightBalancedLibra<T>(GLOBALS, weight_sum_bound, move(vertices_degrees));
+            }
         }
 
         template <typename T>
@@ -93,6 +118,31 @@ namespace CXXGRAPH
             } else if (GLOBALS.partitionStategy == PartitionAlgorithm::EBV_ALG)
             {
                 algorithm = new EBV<T>(GLOBALS);
+            } else if (GLOBALS.partitionStategy == PartitionAlgorithm::WB_LIBRA)
+            {
+                // precompute weight sum
+                double weight_sum = 0.0;
+                for (const auto &edge_it : *(this->dataset))
+                {
+                    weight_sum += (edge_it->isWeighted().has_value() && edge_it->isWeighted().value()) ? dynamic_cast<const Weighted *>(edge_it)->getWeight() : CXXGRAPH::NEGLIGIBLE_WEIGHT;
+                }
+                double lambda = GLOBALS.param1;
+                double P = static_cast<double>(GLOBALS.numberOfPartition);        
+                // avoid divide by zero when some parameters are invalid  
+                double weight_sum_bound = (GLOBALS.numberOfPartition == 0) ? 0.0 : lambda * weight_sum / P;
+
+                // precompute degrees of vertices
+                std::unordered_map<std::size_t, int> vertices_degrees; 
+                for (const auto &edge_it : *(this->dataset))
+                {
+                    auto nodePair = edge_it->getNodePair();
+                    std::size_t u = nodePair.first->getId();
+                    std::size_t v = nodePair.second->getId();
+                    vertices_degrees[u]++;
+                    vertices_degrees[v]++;
+                }
+
+                algorithm = new WeightBalancedLibra<T>(GLOBALS, weight_sum_bound, move(vertices_degrees));
             }
         }
 
diff --git a/include/Partitioning/Utility/Globals.hpp b/include/Partitioning/Utility/Globals.hpp
@@ -54,13 +54,18 @@ namespace CXXGRAPH {
 		};
 
 		inline Globals::Globals(int numberOfPartiton, PartitionAlgorithm algorithm,double param1, double param2, double param3, unsigned int threads)
-		{
+		{			
 			this->numberOfPartition = numberOfPartiton;
 			this->partitionStategy = algorithm;			
 			this->threads = threads;
 			this->param1 = param1;
 			this->param2 = param2;
 			this->param3 = param3;
+            if (this->numberOfPartition <= 0)
+			{
+				std::cout << "ERROR: numberOfPartition " << numberOfPartition << std::endl;
+				exit(-1);
+			}
 		}
 
 		inline Globals::~Globals()
diff --git a/include/Partitioning/WeightBalancedLibra.hpp b/include/Partitioning/WeightBalancedLibra.hpp
diff --git a/include/Utility/ConstValue.hpp b/include/Utility/ConstValue.hpp
diff --git a/test/PartitionTest.cpp b/test/PartitionTest.cpp

Original file line number	Diff line number	Diff line change
`@@ -2486,10 +2486,10 @@ namespace CXXGRAPH`
`2486`	`2486`	`}`
`2487`	`2487`
`2488`	`2488`	`template <typename T>`
`2489`		`- PartitionMap<T> Graph<T>::partitionGraph(PARTITIONING::PartitionAlgorithm algorithm, unsigned int numberOfPartitions, double param1, double param2, double param3, unsigned int numberOfthreads) const`
	`2489`	`+ PartitionMap<T> Graph<T>::partitionGraph(PARTITIONING::PartitionAlgorithm algorithm, unsigned int numberOfPartitions, double param1, double param2, double param3, unsigned int numberOfThreads) const`
`2490`	`2490`	`{`
`2491`	`2491`	`PartitionMap<T> partitionMap;`
`2492`		`- PARTITIONING::Globals globals(numberOfPartitions, algorithm, param1, param2, param3, numberOfthreads);`
	`2492`	`+ PARTITIONING::Globals globals(numberOfPartitions, algorithm, param1, param2, param3, numberOfThreads);`
`2493`	`2493`	`const T_EdgeSet<T> & edgeSet = getEdgeSet();`
`2494`	`2494`	`globals.edgeCardinality = edgeSet.size();`
`2495`	`2495`	`globals.vertexCardinality = this->getNodeSet().size();`