Skip to content

Commit beafd33

Browse files
authored
HBASE-28419 Allow Action and Policies of ServerKillingMonkey to be configurable. (#5743)
Signed-off-by: Nick Dimiduk <[email protected]>
1 parent 34b738d commit beafd33

3 files changed

Lines changed: 77 additions & 16 deletions

File tree

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.Arrays;
2121
import java.util.HashSet;
2222
import java.util.Set;
23+
import java.util.concurrent.TimeUnit;
2324

2425
public interface MonkeyConstants {
2526

@@ -45,6 +46,11 @@ public interface MonkeyConstants {
4546
String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period";
4647
String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs";
4748
String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time";
49+
String RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = "restart.random.rs.exception.sleep.time";
50+
String RESTART_ACTIVE_NAMENODE_SLEEP_TIME = "restart.active.namenode.sleep.time";
51+
String RESTART_RANDOM_DATANODE_SLEEP_TIME = "restart.random.datanode.sleep.time";
52+
String RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = "restart.random.journalnode.sleep.time";
53+
String RESTART_RANDOM_ZKNODE_SLEEP_TIME = "restart.random.zknode.sleep.time";
4854
String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time";
4955
String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time";
5056
String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio";
@@ -92,6 +98,13 @@ public interface MonkeyConstants {
9298
long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000;
9399
boolean DEFAULT_UNBALANCE_KILL_META_RS = true;
94100
long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000;
101+
102+
long DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
103+
long DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
104+
long DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
105+
long DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
106+
long DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
107+
95108
long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000;
96109
long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000;
97110
float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f;

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,17 @@
4242
*/
4343
public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {
4444

45+
private long restartRandomRsExceptMetaSleepTime;
46+
private long restartActiveMasterSleepTime;
47+
private long rollingBatchRestartRSSleepTime;
48+
private long restartActiveNameNodeSleepTime;
49+
private long restartRandomDataNodeSleepTime;
50+
private long restartRandomJournalNodeSleepTime;
51+
private long restartRandomZKNodeSleepTime;
4552
private long gracefulRollingRestartTSSLeepTime;
4653
private long rollingBatchSuspendRSSleepTime;
4754
private float rollingBatchSuspendtRSRatio;
55+
private long action1Period;
4856

4957
@Override
5058
public ChaosMonkey build() {
@@ -53,15 +61,15 @@ public ChaosMonkey build() {
5361
// Destructive actions to mess things around. Cannot run batch restart.
5462
// @formatter:off
5563
Action[] actions1 = new Action[] {
56-
new RestartRandomRsExceptMetaAction(60000),
57-
new RestartActiveMasterAction(5000),
64+
new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
65+
new RestartActiveMasterAction(restartActiveMasterSleepTime),
5866
// only allow 2 servers to be dead.
59-
new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
67+
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
6068
new ForceBalancerAction(),
61-
new RestartActiveNameNodeAction(60000),
62-
new RestartRandomDataNodeAction(60000),
63-
new RestartRandomJournalNodeAction(60000),
64-
new RestartRandomZKNodeAction(60000),
69+
new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime),
70+
new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime),
71+
new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime),
72+
new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime),
6573
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
6674
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
6775
rollingBatchSuspendtRSRatio)
@@ -73,12 +81,33 @@ public ChaosMonkey build() {
7381
new Action[] { new DumpClusterStatusAction(), new DumpHdfsClusterStatusAction() };
7482

7583
return new PolicyBasedChaosMonkey(properties, util,
76-
new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
77-
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
78-
new PeriodicRandomActionPolicy(60 * 1000, actions2));
84+
new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
85+
new PeriodicRandomActionPolicy(action1Period, actions1)),
86+
new PeriodicRandomActionPolicy(action1Period, actions2));
7987
}
8088

8189
private void loadProperties() {
90+
restartRandomRsExceptMetaSleepTime = Long
91+
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
92+
MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
93+
restartActiveMasterSleepTime =
94+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
95+
MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
96+
rollingBatchRestartRSSleepTime = Long
97+
.parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
98+
MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
99+
restartActiveNameNodeSleepTime =
100+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
101+
MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
102+
restartRandomDataNodeSleepTime =
103+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
104+
MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
105+
restartRandomJournalNodeSleepTime = Long
106+
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
107+
MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
108+
restartRandomZKNodeSleepTime =
109+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
110+
MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
82111
gracefulRollingRestartTSSLeepTime =
83112
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
84113
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -88,5 +117,8 @@ private void loadProperties() {
88117
rollingBatchSuspendtRSRatio =
89118
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
90119
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
120+
action1Period =
121+
Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
122+
MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
91123
}
92124
}

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,13 @@
3737
*/
3838
public class ServerKillingMonkeyFactory extends MonkeyFactory {
3939

40+
private long restartRandomRsExceptMetaSleepTime;
41+
private long restartActiveMasterSleepTime;
42+
private long rollingBatchRestartRSSleepTime;
4043
private long gracefulRollingRestartTSSLeepTime;
4144
private long rollingBatchSuspendRSSleepTime;
4245
private float rollingBatchSuspendtRSRatio;
46+
private long action1Period;
4347

4448
@Override
4549
public ChaosMonkey build() {
@@ -48,10 +52,10 @@ public ChaosMonkey build() {
4852
// Destructive actions to mess things around. Cannot run batch restart
4953
// @formatter:off
5054
Action[] actions1 = new Action[] {
51-
new RestartRandomRsExceptMetaAction(60000),
52-
new RestartActiveMasterAction(5000),
55+
new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
56+
new RestartActiveMasterAction(restartActiveMasterSleepTime),
5357
// only allow 2 servers to be dead
54-
new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
58+
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
5559
new ForceBalancerAction(),
5660
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
5761
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
@@ -63,12 +67,21 @@ public ChaosMonkey build() {
6367
Action[] actions2 = new Action[] { new DumpClusterStatusAction() };
6468

6569
return new PolicyBasedChaosMonkey(properties, util,
66-
new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
67-
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
68-
new PeriodicRandomActionPolicy(60 * 1000, actions2));
70+
new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
71+
new PeriodicRandomActionPolicy(action1Period, actions1)),
72+
new PeriodicRandomActionPolicy(action1Period, actions2));
6973
}
7074

7175
private void loadProperties() {
76+
restartRandomRsExceptMetaSleepTime = Long
77+
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
78+
MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
79+
restartActiveMasterSleepTime =
80+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
81+
MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
82+
rollingBatchRestartRSSleepTime = Long
83+
.parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
84+
MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
7285
gracefulRollingRestartTSSLeepTime =
7386
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
7487
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -78,5 +91,8 @@ private void loadProperties() {
7891
rollingBatchSuspendtRSRatio =
7992
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
8093
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
94+
action1Period =
95+
Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
96+
MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
8197
}
8298
}

0 commit comments

Comments
 (0)