Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
@InterfaceAudience.Private
abstract class CostFunction {

public static final double COST_EPSILON = 0.0001;
public static double getCostEpsilon(double cost) {
return Math.ulp(cost);
}

private float multiplier = 0;

Expand Down Expand Up @@ -91,13 +93,14 @@ protected void regionMoved(int region, int oldServer, int newServer) {
* @return The scaled value.
*/
protected static double scale(double min, double max, double value) {
double costEpsilon = getCostEpsilon(max);
if (
max <= min || value <= min || Math.abs(max - min) <= COST_EPSILON
|| Math.abs(value - min) <= COST_EPSILON
max <= min || value <= min || Math.abs(max - min) <= costEpsilon
|| Math.abs(value - min) <= costEpsilon
) {
return 0;
}
if (max <= min || Math.abs(max - min) <= COST_EPSILON) {
if (max <= min || Math.abs(max - min) <= costEpsilon) {
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,8 @@ void updateMetricsSize(int size) {

private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
regionReplicaHostCostFunction.prepare(c);
if (Math.abs(regionReplicaHostCostFunction.cost()) > CostFunction.COST_EPSILON) {
return true;
}
return (Math.abs(regionReplicaHostCostFunction.cost()) > CostFunction.COST_EPSILON);
double cost = Math.abs(regionReplicaHostCostFunction.cost());
return cost > CostFunction.getCostEpsilon(cost);
}

@RestrictedApi(explanation = "Should only be called in tests", link = "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,18 @@
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableSet;
import java.util.Queue;
import java.util.Random;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ThreadLocalRandom;
Expand Down Expand Up @@ -286,6 +287,11 @@ public void assertRegionReplicaPlacement(Map<ServerName, List<RegionInfo>> serve
}
}

protected void setMaxRunTime(Duration maxRunTime) {
conf.setLong(StochasticLoadBalancer.MAX_RUNNING_TIME_KEY, maxRunTime.toMillis());
loadBalancer.loadConf(conf);
}

protected String printStats(List<ServerAndLoad> servers) {
int numServers = servers.size();
int totalRegions = 0;
Expand All @@ -308,7 +314,10 @@ protected List<ServerAndLoad> convertToList(final Map<ServerName, List<RegionInf
}

protected String printMock(List<ServerAndLoad> balancedCluster) {
SortedSet<ServerAndLoad> sorted = new TreeSet<>(balancedCluster);
if (balancedCluster == null) {
return "null";
}
NavigableSet<ServerAndLoad> sorted = new TreeSet<>(balancedCluster);
ServerAndLoad[] arr = sorted.toArray(new ServerAndLoad[sorted.size()]);
StringBuilder sb = new StringBuilder(sorted.size() * 4 + 4);
sb.append("{ ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static org.junit.Assert.assertNull;

import java.time.Duration;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.HBaseClassTestRule;
Expand Down Expand Up @@ -51,8 +52,8 @@ public class TestStochasticLoadBalancerBalanceCluster extends BalancerTestBase {
*/
@Test
public void testBalanceCluster() throws Exception {
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 2 * 60 * 1000); // 2 min
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
setMaxRunTime(Duration.ofMillis(1500));
loadBalancer.onConfigurationChange(conf);

for (int[] mockCluster : clusterStateMocks) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hbase.master.balancer;

import static junit.framework.TestCase.assertNotNull;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertNull;
import static org.mockito.Mockito.mock;
Expand All @@ -41,26 +40,20 @@
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Category({ MasterTests.class, MediumTests.class })
public class TestStochasticLoadBalancerHeterogeneousCost extends BalancerTestBase {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestStochasticLoadBalancerHeterogeneousCost.class);

private static final Logger LOG =
LoggerFactory.getLogger(TestStochasticLoadBalancerHeterogeneousCost.class);
private static final double ALLOWED_WINDOW = 1.20;
private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
private static String RULES_FILE;

Expand Down Expand Up @@ -172,65 +165,7 @@ private void testHeterogeneousWithCluster(final int numNodes, final int numRegio
TestStochasticLoadBalancerHeterogeneousCostRules.createRulesFile(RULES_FILE, rules);
final Map<ServerName, List<RegionInfo>> serverMap =
this.createServerMap(numNodes, numRegions, numRegionsPerServer, 1, 1);
this.testWithCluster(serverMap, null, true, false);
}

protected void testWithCluster(final Map<ServerName, List<RegionInfo>> serverMap,
final RackManager rackManager, final boolean assertFullyBalanced,
final boolean assertFullyBalancedForReplicas) {
final List<ServerAndLoad> list = this.convertToList(serverMap);
LOG.info("Mock Cluster : " + this.printMock(list) + " " + this.printStats(list));

BalancerTestBase.loadBalancer.setRackManager(rackManager);

// Run the balancer.
final List<RegionPlan> plans =
BalancerTestBase.loadBalancer.balanceTable(HConstants.ENSEMBLE_TABLE_NAME, serverMap);
assertNotNull(plans);

// Check to see that this actually got to a stable place.
if (assertFullyBalanced || assertFullyBalancedForReplicas) {
// Apply the plan to the mock cluster.
final List<ServerAndLoad> balancedCluster = this.reconcile(list, plans, serverMap);

// Print out the cluster loads to make debugging easier.
LOG.info("Mock Balanced cluster : " + this.printMock(balancedCluster));

if (assertFullyBalanced) {
final List<RegionPlan> secondPlans =
BalancerTestBase.loadBalancer.balanceTable(HConstants.ENSEMBLE_TABLE_NAME, serverMap);
assertNull(secondPlans);

// create external cost function to retrieve limit
// for each RS
final HeterogeneousRegionCountCostFunction cf =
new HeterogeneousRegionCountCostFunction(conf);
assertNotNull(cf);
BalancerClusterState cluster = new BalancerClusterState(serverMap, null, null, null);
cf.prepare(cluster);

// checking that we all hosts have a number of regions below their limit
for (final ServerAndLoad serverAndLoad : balancedCluster) {
final ServerName sn = serverAndLoad.getServerName();
final int numberRegions = serverAndLoad.getLoad();
final int limit = cf.findLimitForRS(sn);

double usage = (double) numberRegions / (double) limit;
LOG.debug(
sn.getHostname() + ":" + numberRegions + "/" + limit + "(" + (usage * 100) + "%)");

// as the balancer is stochastic, we cannot check exactly the result of the balancing,
// hence the allowedWindow parameter
assertTrue("Host " + sn.getHostname() + " should be below "
+ cf.overallUsage * ALLOWED_WINDOW * 100 + "%; " + cf.overallUsage + ", " + usage + ", "
+ numberRegions + ", " + limit, usage <= cf.overallUsage * ALLOWED_WINDOW);
}
}

if (assertFullyBalancedForReplicas) {
this.assertRegionReplicaPlacement(serverMap, rackManager);
}
}
this.testWithClusterWithIteration(serverMap, null, true, false);
}

@Override
Expand Down Expand Up @@ -313,6 +248,10 @@ static class StochasticLoadTestBalancer extends StochasticLoadBalancer {
private FairRandomCandidateGenerator fairRandomCandidateGenerator =
new FairRandomCandidateGenerator();

StochasticLoadTestBalancer() {
super(new DummyMetricsStochasticBalancer());
}

@Override
protected CandidateGenerator getRandomGenerator() {
return fairRandomCandidateGenerator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.master.balancer;

import java.time.Duration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
Expand All @@ -38,8 +39,8 @@ public void testLargeCluster() {
int numRegionsPerServer = 80; // all servers except one
int numTables = 100;
int replication = 1;
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 6 * 60 * 1000);
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
setMaxRunTime(Duration.ofSeconds(30));
loadBalancer.onConfigurationChange(conf);
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.master.balancer;

import java.time.Duration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
Expand All @@ -38,7 +39,9 @@ public void testMidCluster() {
int numRegionsPerServer = 60; // all servers except one
int replication = 1;
int numTables = 40;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
setMaxRunTime(Duration.ofMillis(10_000));
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, true);
}

@Test
Expand All @@ -50,7 +53,8 @@ public void testMidCluster2() {
int numTables = 400;
// num large num regions means may not always get to best balance with one run
boolean assertFullyBalanced = false;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables,
setMaxRunTime(Duration.ofMillis(10_000));
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
assertFullyBalanced, false);
}

Expand All @@ -61,7 +65,9 @@ public void testMidCluster3() {
int numRegionsPerServer = 9; // all servers except one
int replication = 1;
int numTables = 110;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
setMaxRunTime(Duration.ofMillis(10_000));
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, true);
// TODO(eclark): Make sure that the tables are well distributed.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ public void testRegionReplicasOnSmallCluster() {
int replication = 3; // 3 replicas per region
int numRegionsPerServer = 80; // all regions are mostly balanced
int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, true);
}

private static class ForTestRackManagerOne extends RackManager {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.master.balancer;

import java.time.Duration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
Expand All @@ -34,13 +35,14 @@ public class TestStochasticLoadBalancerRegionReplicaHighReplication extends Bala
@Test
public void testRegionReplicasOnMidClusterHighReplication() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
setMaxRunTime(Duration.ofSeconds(5));
loadBalancer.onConfigurationChange(conf);
int numNodes = 40;
int numRegions = 6 * numNodes;
int replication = 40; // 40 replicas per region, one for each server
int numRegionsPerServer = 5;
int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, false, true);
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
false, true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.master.balancer;

import java.time.Duration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
Expand All @@ -38,12 +39,15 @@ public void testRegionReplicasOnLargeCluster() {
// ignore these two cost functions to allow us to make any move that helps other functions.
conf.setFloat("hbase.master.balancer.stochastic.moveCost", 0f);
conf.setFloat("hbase.master.balancer.stochastic.tableSkewCost", 0f);
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
setMaxRunTime(Duration.ofSeconds(15));
loadBalancer.onConfigurationChange(conf);
int numNodes = 1000;
int numRegions = 20 * numNodes; // 20 * replication regions per RS
int numRegionsPerServer = 19; // all servers except one
int numTables = 100;
int replication = 3;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ public void testRegionReplicasOnMidCluster() {
int replication = 3; // 3 replicas per region
int numRegionsPerServer = 30; // all regions are mostly balanced
int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
conf.setLong(StochasticLoadBalancer.MAX_RUNNING_TIME_KEY, 10_000);
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.master.balancer;

import java.time.Duration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
Expand All @@ -34,13 +35,14 @@ public class TestStochasticLoadBalancerRegionReplicaReplicationGreaterThanNumNod

@Test
public void testRegionReplicationOnMidClusterReplicationGreaterThanNumNodes() {
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
loadBalancer.onConfigurationChange(conf);
setMaxRunTime(Duration.ofSeconds(5));
int numNodes = 40;
int numRegions = 6 * 50;
int replication = 50; // 50 replicas per region, more than numNodes
int numRegionsPerServer = 6;
int numTables = 10;
testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, false);
testWithClusterWithIteration(numNodes, numRegions, numRegionsPerServer, replication, numTables,
true, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ public class TestStochasticLoadBalancerRegionReplicaSameHosts extends BalancerTe
@Test
public void testRegionReplicationOnMidClusterSameHosts() {
conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
loadBalancer.onConfigurationChange(conf);
int numHosts = 30;
int numRegions = 30 * 30;
Expand All @@ -62,6 +61,6 @@ public void testRegionReplicationOnMidClusterSameHosts() {
}
}

testWithCluster(newServerMap, null, true, true);
testWithClusterWithIteration(newServerMap, null, true, true);
}
}
Loading