|
9 | 9 | -- Let FTS detect/declare failure sooner |
10 | 10 | -- start_ignore |
11 | 11 | alter system set gp_fts_probe_interval to 10; |
| 12 | +-- Because after RESET, it still takes a little while for the primary |
| 13 | +-- to restart, and potentially makes FTS think it's in "recovery not |
| 14 | +-- in progress" stage and promote the mirror, we would need the FTS |
| 15 | +-- to make that decision a bit less frequently. |
| 16 | +alter system set gp_fts_probe_retries to 15; |
12 | 17 | select pg_reload_conf(); |
13 | 18 | -- end_ignore |
14 | 19 |
|
15 | | --- Let the background writer sleep 27 seconds to delay the resetting. |
16 | | --- This number is selected because there's a slight chance that FTS senses |
17 | | --- "recovery not in progress" after its 5-second retry window and promote |
18 | | --- the mirror. So just put the end of the sleep perid away from the end |
19 | | --- of the retry windows. |
20 | | -select gp_inject_fault('fault_in_background_writer_quickdie', 'sleep', '', '', '', 1, 1, 27, dbid) |
| 20 | +-- Let the background writer sleep 17 seconds to delay the resetting. |
| 21 | +-- This number is selected to be larger than the 15-second retry window |
| 22 | +-- which makes a meaningful test, meanwhile reduce the chance that FTS sees |
| 23 | +-- a "recovery not in progress" primary as much as possible. |
| 24 | +select gp_inject_fault('fault_in_background_writer_quickdie', 'sleep', '', '', '', 1, 1, 17, dbid) |
21 | 25 | from gp_segment_configuration where role = 'p' and content = 0; |
22 | 26 |
|
23 | 27 | -- Do not let the postmaster send SIGKILL to the bgwriter |
@@ -54,6 +58,7 @@ select pg_sleep(30); |
54 | 58 | -- start_ignore |
55 | 59 | -- restore parameters |
56 | 60 | alter system reset gp_fts_probe_interval; |
| 61 | +alter system reset gp_fts_probe_retries; |
57 | 62 | select pg_reload_conf(); |
58 | 63 | -- end_ignore |
59 | 64 |
|
|
0 commit comments