Skip to content

Commit 72a8846

Browse files
authored
HBASE-26309 Balancer tends to move regions to the server at the end of list (#3723)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent b9b7fec commit 72a8846

5 files changed

Lines changed: 122 additions & 21 deletions

File tree

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ public String getRack(ServerName server) {
179179
serversPerHostList.get(hostIndex).add(serverIndex);
180180

181181
String rack = this.rackManager.getRack(sn);
182+
182183
if (!racksToIndex.containsKey(rack)) {
183184
racksToIndex.put(rack, numRacks++);
184185
serversPerRackList.add(new ArrayList<>());
@@ -187,6 +188,7 @@ public String getRack(ServerName server) {
187188
serversPerRackList.get(rackIndex).add(serverIndex);
188189
}
189190

191+
LOG.debug("Hosts are {} racks are {}", hostsToIndex, racksToIndex);
190192
// Count how many regions there are.
191193
for (Map.Entry<ServerName, List<RegionInfo>> entry : clusterState.entrySet()) {
192194
numRegions += entry.getValue().size();
@@ -285,6 +287,7 @@ public String getRack(ServerName server) {
285287
serversPerHost[i] = new int[serversPerHostList.get(i).size()];
286288
for (int j = 0; j < serversPerHost[i].length; j++) {
287289
serversPerHost[i][j] = serversPerHostList.get(i).get(j);
290+
LOG.debug("server {} is on host {}",serversPerHostList.get(i).get(j), i);
288291
}
289292
if (serversPerHost[i].length > 1) {
290293
multiServersPerHost = true;
@@ -295,6 +298,7 @@ public String getRack(ServerName server) {
295298
serversPerRack[i] = new int[serversPerRackList.get(i).size()];
296299
for (int j = 0; j < serversPerRack[i].length; j++) {
297300
serversPerRack[i][j] = serversPerRackList.get(i).get(j);
301+
LOG.info("server {} is on rack {}",serversPerRackList.get(i).get(j), i);
298302
}
299303
}
300304

@@ -792,6 +796,10 @@ boolean contains(int[] arr, int val) {
792796

793797
private Comparator<Integer> numRegionsComparator = Comparator.comparingInt(this::getNumRegions);
794798

799+
public Comparator<Integer> getNumRegionsComparator() {
800+
return numRegionsComparator;
801+
}
802+
795803
int getLowestLocalityRegionOnServer(int serverIndex) {
796804
if (regionFinder != null) {
797805
float lowestLocality = 1.0f;

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadCandidateGenerator.java

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.hbase.master.balancer;
2020

21+
import java.util.concurrent.ThreadLocalRandom;
2122
import org.apache.yetus.audience.InterfaceAudience;
2223

2324
@InterfaceAudience.Private
@@ -34,27 +35,53 @@ BalanceAction generate(BalancerClusterState cluster) {
3435
private int pickLeastLoadedServer(final BalancerClusterState cluster, int thisServer) {
3536
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
3637

37-
int index = 0;
38-
while (servers[index] == null || servers[index] == thisServer) {
39-
index++;
40-
if (index == servers.length) {
41-
return -1;
38+
int selectedIndex = -1;
39+
double currentLargestRandom = -1;
40+
for (int i = 0; i < servers.length; i++) {
41+
if (servers[i] == null || servers[i] == thisServer) {
42+
continue;
43+
}
44+
if (selectedIndex != -1
45+
&& cluster.getNumRegionsComparator().compare(servers[i], servers[selectedIndex]) != 0) {
46+
// Exhausted servers of the same region count
47+
break;
48+
}
49+
// we don't know how many servers have the same region count, we will randomly select one
50+
// using a simplified inline reservoir sampling by assignmening a random number to stream
51+
// data and choose the greatest one. (http://gregable.com/2007/10/reservoir-sampling.html)
52+
double currentRandom = ThreadLocalRandom.current().nextDouble();
53+
if (currentRandom > currentLargestRandom) {
54+
selectedIndex = i;
55+
currentLargestRandom = currentRandom;
4256
}
4357
}
44-
return servers[index];
58+
return selectedIndex == -1 ? -1 : servers[selectedIndex];
4559
}
4660

4761
private int pickMostLoadedServer(final BalancerClusterState cluster, int thisServer) {
4862
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
4963

50-
int index = servers.length - 1;
51-
while (servers[index] == null || servers[index] == thisServer) {
52-
index--;
53-
if (index < 0) {
54-
return -1;
64+
int selectedIndex = -1;
65+
double currentLargestRandom = -1;
66+
for (int i = servers.length - 1; i >= 0; i--) {
67+
if (servers[i] == null || servers[i] == thisServer) {
68+
continue;
69+
}
70+
if (selectedIndex != -1 && cluster.getNumRegionsComparator().compare(servers[i],
71+
servers[selectedIndex]) != 0) {
72+
// Exhausted servers of the same region count
73+
break;
74+
}
75+
// we don't know how many servers have the same region count, we will randomly select one
76+
// using a simplified inline reservoir sampling by assignmening a random number to stream
77+
// data and choose the greatest one. (http://gregable.com/2007/10/reservoir-sampling.html)
78+
double currentRandom = ThreadLocalRandom.current().nextDouble();
79+
if (currentRandom > currentLargestRandom) {
80+
selectedIndex = i;
81+
currentLargestRandom = currentRandom;
5582
}
5683
}
57-
return servers[index];
84+
return selectedIndex == -1? -1 : servers[selectedIndex];
5885
}
5986

6087
}

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,8 +345,6 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) {
345345
}
346346

347347
if (idleRegionServerExist(cluster)){
348-
LOG.info("Running balancer because at least one server hosts replicas of the same region." +
349-
"regionReplicaRackCostFunction={}", regionReplicaRackCostFunction.cost());
350348
LOG.info("Running balancer because cluster has idle server(s)."+
351349
" function cost={}", functionCost());
352350
return true;
@@ -510,9 +508,9 @@ protected List<RegionPlan> balanceTable(TableName tableName, Map<ServerName,
510508
LOG.info("Finished computing new moving plan. Computation took {} ms" +
511509
" to try {} different iterations. Found a solution that moves " +
512510
"{} regions; Going from a computed imbalance of {}" +
513-
" to a new imbalance of {}. ",
511+
" to a new imbalance of {}. funtionCost={}",
514512
endTime - startTime, step, plans.size(),
515-
initCost / sumMultiplier, currentCost / sumMultiplier);
513+
initCost / sumMultiplier, currentCost / sumMultiplier, functionCost());
516514
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
517515
return plans;
518516
}

hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/StochasticBalancerTestBase.java

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ protected void testWithCluster(Map<ServerName, List<RegionInfo>> serverMap,
8080
List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
8181

8282
// Print out the cluster loads to make debugging easier.
83-
LOG.info("Mock Balance : " + printMock(balancedCluster));
83+
LOG.info("Mock after Balance : " + printMock(balancedCluster));
8484

8585
if (assertFullyBalanced) {
8686
assertClusterAsBalanced(balancedCluster);
@@ -95,4 +95,40 @@ protected void testWithCluster(Map<ServerName, List<RegionInfo>> serverMap,
9595
}
9696
}
9797
}
98+
99+
protected void testWithClusterWithIteration(Map<ServerName, List<RegionInfo>> serverMap,
100+
RackManager rackManager, boolean assertFullyBalanced, boolean assertFullyBalancedForReplicas) {
101+
List<ServerAndLoad> list = convertToList(serverMap);
102+
LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
103+
104+
loadBalancer.setRackManager(rackManager);
105+
// Run the balancer.
106+
Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable = (Map) mockClusterServersWithTables(serverMap);
107+
List<RegionPlan> plans = loadBalancer.balanceCluster(LoadOfAllTable);
108+
assertNotNull("Initial cluster balance should produce plans.", plans);
109+
110+
List<ServerAndLoad> balancedCluster = null;
111+
// Run through iteration until done. Otherwise will be killed as test time out
112+
while (plans != null && (assertFullyBalanced || assertFullyBalancedForReplicas)) {
113+
// Apply the plan to the mock cluster.
114+
balancedCluster = reconcile(list, plans, serverMap);
115+
116+
// Print out the cluster loads to make debugging easier.
117+
LOG.info("Mock after balance: " + printMock(balancedCluster));
118+
119+
LoadOfAllTable = (Map) mockClusterServersWithTables(serverMap);
120+
plans = loadBalancer.balanceCluster(LoadOfAllTable);
121+
}
122+
123+
// Print out the cluster loads to make debugging easier.
124+
LOG.info("Mock Final balance: " + printMock(balancedCluster));
125+
126+
if (assertFullyBalanced) {
127+
assertNull("Given a requirement to be fully balanced, second attempt at plans should " +
128+
"produce none.", plans);
129+
}
130+
if (assertFullyBalancedForReplicas) {
131+
assertRegionReplicaPlacement(serverMap, rackManager);
132+
}
133+
}
98134
}

hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.balancer;
1919

20+
import java.util.HashMap;
2021
import java.util.List;
2122
import java.util.Map;
2223
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -37,25 +38,36 @@ public class TestStochasticLoadBalancerRegionReplicaWithRacks extends Stochastic
3738
HBaseClassTestRule.forClass(TestStochasticLoadBalancerRegionReplicaWithRacks.class);
3839

3940
private static class ForTestRackManager extends RackManager {
41+
4042
int numRacks;
43+
Map<String, Integer> serverIndexes = new HashMap<String, Integer>();
44+
int numServers = 0;
4145

4246
public ForTestRackManager(int numRacks) {
4347
this.numRacks = numRacks;
4448
}
4549

50+
4651
@Override
4752
public String getRack(ServerName server) {
48-
return "rack_" + (server.hashCode() % numRacks);
53+
String key = server.getServerName();
54+
if (!serverIndexes.containsKey(key)) {
55+
serverIndexes.put(key, numServers++);
56+
}
57+
return "rack_" + serverIndexes.get(key) % numRacks;
4958
}
5059
}
5160

5261
@Test
5362
public void testRegionReplicationOnMidClusterWithRacks() {
54-
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 10000000L);
63+
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 100000000L);
64+
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
5565
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
5666
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
67+
// for full balance
68+
// conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.001f);
5769
loadBalancer.onConfigurationChange(conf);
58-
int numNodes = 4;
70+
int numNodes = 5;
5971
int numRegions = numNodes * 1;
6072
int replication = 3; // 3 replicas per region
6173
int numRegionsPerServer = 1;
@@ -65,6 +77,26 @@ public void testRegionReplicationOnMidClusterWithRacks() {
6577
createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables);
6678
RackManager rm = new ForTestRackManager(numRacks);
6779

68-
testWithCluster(serverMap, rm, false, true);
80+
testWithClusterWithIteration(serverMap, rm, true, true);
81+
}
82+
83+
@Test
84+
public void testRegionReplicationOnLargeClusterWithRacks() {
85+
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", false);
86+
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 5000L);
87+
conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
88+
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 10 * 1000); // 10 sec
89+
loadBalancer.onConfigurationChange(conf);
90+
int numNodes = 100;
91+
int numRegions = numNodes * 30;
92+
int replication = 3; // 3 replicas per region
93+
int numRegionsPerServer = 28;
94+
int numTables = 1;
95+
int numRacks = 4; // all replicas should be on a different rack
96+
Map<ServerName, List<RegionInfo>> serverMap =
97+
createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables);
98+
RackManager rm = new ForTestRackManager(numRacks);
99+
100+
testWithClusterWithIteration(serverMap, rm, true, true);
69101
}
70102
}

0 commit comments

Comments
 (0)