Skip to content

Commit b426142

Browse files
authored
HBASE-22617 Recovered WAL directories not getting cleaned up (#330)
Signed-off-by: Guanghao Zhang <zghao@apache.org> Signed-off-by: Andrew Purtell <apurtell@apache.org>
1 parent 15ac781 commit b426142

18 files changed

Lines changed: 236 additions & 170 deletions

File tree

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
import org.apache.hadoop.hbase.client.Connection;
5353
import org.apache.hadoop.hbase.client.RegionInfo;
5454
import org.apache.hadoop.hbase.client.TableDescriptor;
55-
import org.apache.hadoop.hbase.regionserver.HRegion;
5655
import org.apache.hadoop.hbase.tool.BulkLoadHFiles;
5756
import org.apache.hadoop.hbase.util.CommonFSUtils;
5857
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -149,8 +148,8 @@ public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo, C
149148
// For each region, write the region info to disk
150149
LOG.debug("Starting to write region info for table " + table);
151150
for (RegionInfo regionInfo : regions) {
152-
Path regionDir =
153-
HRegion.getRegionDir(new Path(backupInfo.getTableBackupDir(table)), regionInfo);
151+
Path regionDir = FSUtils
152+
.getRegionDirFromTableDir(new Path(backupInfo.getTableBackupDir(table)), regionInfo);
154153
regionDir = new Path(backupInfo.getTableBackupDir(table), regionDir.getName());
155154
writeRegioninfoOnFilesystem(conf, targetFs, regionDir, regionInfo);
156155
}

hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import java.util.Locale;
2929
import java.util.Map;
3030
import java.util.concurrent.ConcurrentHashMap;
31-
3231
import org.apache.hadoop.HadoopIllegalArgumentException;
3332
import org.apache.hadoop.conf.Configuration;
3433
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -428,11 +427,9 @@ private static boolean isValidWALRootDir(Path walDir, final Configuration c) thr
428427
* @return the region directory used to store WALs under the WALRootDir
429428
* @throws IOException if there is an exception determining the WALRootDir
430429
*/
431-
public static Path getWALRegionDir(final Configuration conf,
432-
final TableName tableName, final String encodedRegionName)
433-
throws IOException {
434-
return new Path(getWALTableDir(conf, tableName),
435-
encodedRegionName);
430+
public static Path getWALRegionDir(final Configuration conf, final TableName tableName,
431+
final String encodedRegionName) throws IOException {
432+
return new Path(getWALTableDir(conf, tableName), encodedRegionName);
436433
}
437434

438435
/**
@@ -444,8 +441,22 @@ public static Path getWALRegionDir(final Configuration conf,
444441
*/
445442
public static Path getWALTableDir(final Configuration conf, final TableName tableName)
446443
throws IOException {
447-
return new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
448-
tableName.getQualifierAsString());
444+
Path baseDir = new Path(getWALRootDir(conf), HConstants.BASE_NAMESPACE_DIR);
445+
return new Path(new Path(baseDir, tableName.getNamespaceAsString()),
446+
tableName.getQualifierAsString());
447+
}
448+
449+
/**
450+
* For backward compatibility with HBASE-20734, where we store recovered edits in a wrong
451+
* directory without BASE_NAMESPACE_DIR. See HBASE-22617 for more details.
452+
* @deprecated For compatibility, will be removed in 4.0.0.
453+
*/
454+
@Deprecated
455+
public static Path getWrongWALRegionDir(final Configuration conf, final TableName tableName,
456+
final String encodedRegionName) throws IOException {
457+
Path wrongTableDir = new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
458+
tableName.getQualifierAsString());
459+
return new Path(wrongTableDir, encodedRegionName);
449460
}
450461

451462
/**
@@ -1058,5 +1069,4 @@ public StreamLacksCapabilityException(String message) {
10581069
super(message);
10591070
}
10601071
}
1061-
10621072
}

hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
import org.apache.hadoop.fs.Path;
4040
import org.apache.hadoop.fs.PathFilter;
4141
import org.apache.hadoop.hbase.client.RegionInfo;
42-
import org.apache.hadoop.hbase.regionserver.HRegion;
4342
import org.apache.hadoop.hbase.regionserver.HStoreFile;
4443
import org.apache.hadoop.hbase.util.Bytes;
4544
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -86,23 +85,21 @@ private HFileArchiver() {
8685
public static boolean exists(Configuration conf, FileSystem fs, RegionInfo info)
8786
throws IOException {
8887
Path rootDir = FSUtils.getRootDir(conf);
89-
Path regionDir = HRegion.getRegionDir(rootDir, info);
88+
Path regionDir = FSUtils.getRegionDirFromRootDir(rootDir, info);
9089
return fs.exists(regionDir);
9190
}
9291

9392
/**
94-
* Cleans up all the files for a HRegion by archiving the HFiles to the
95-
* archive directory
93+
* Cleans up all the files for a HRegion by archiving the HFiles to the archive directory
9694
* @param conf the configuration to use
9795
* @param fs the file system object
9896
* @param info RegionInfo for region to be deleted
99-
* @throws IOException
10097
*/
10198
public static void archiveRegion(Configuration conf, FileSystem fs, RegionInfo info)
10299
throws IOException {
103100
Path rootDir = FSUtils.getRootDir(conf);
104101
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
105-
HRegion.getRegionDir(rootDir, info));
102+
FSUtils.getRegionDirFromRootDir(rootDir, info));
106103
}
107104

108105
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public Path getWALRootDir() {
221221
* @return the directory for a give {@code region}.
222222
*/
223223
public Path getRegionDir(RegionInfo region) {
224-
return FSUtils.getRegionDir(FSUtils.getTableDir(getRootDir(), region.getTable()), region);
224+
return FSUtils.getRegionDirFromRootDir(getRootDir(), region);
225225
}
226226

227227
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCRegionProcedure.java

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,26 @@
1818
package org.apache.hadoop.hbase.master.assignment;
1919

2020
import java.io.IOException;
21-
2221
import org.apache.hadoop.fs.FileSystem;
22+
import org.apache.hadoop.fs.Path;
2323
import org.apache.hadoop.hbase.MetaTableAccessor;
2424
import org.apache.hadoop.hbase.backup.HFileArchiver;
2525
import org.apache.hadoop.hbase.client.RegionInfo;
2626
import org.apache.hadoop.hbase.favored.FavoredNodesManager;
27+
import org.apache.hadoop.hbase.master.MasterFileSystem;
2728
import org.apache.hadoop.hbase.master.MasterServices;
2829
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
2930
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
3031
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
3132
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
3233
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
34+
import org.apache.hadoop.hbase.util.FSUtils;
3335
import org.apache.yetus.audience.InterfaceAudience;
3436
import org.slf4j.Logger;
3537
import org.slf4j.LoggerFactory;
38+
3639
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
40+
3741
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
3842
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
3943
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.GCRegionState;
@@ -64,46 +68,65 @@ public TableOperationType getTableOperationType() {
6468

6569
@Override
6670
protected Flow executeFromState(MasterProcedureEnv env, GCRegionState state)
67-
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
71+
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
6872
if (LOG.isTraceEnabled()) {
6973
LOG.trace(this + " execute state=" + state);
7074
}
7175
MasterServices masterServices = env.getMasterServices();
7276
try {
7377
switch (state) {
74-
case GC_REGION_PREPARE:
75-
// Nothing to do to prepare.
76-
setNextState(GCRegionState.GC_REGION_ARCHIVE);
77-
break;
78-
case GC_REGION_ARCHIVE:
79-
FileSystem fs = masterServices.getMasterFileSystem().getFileSystem();
80-
if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) {
81-
if (LOG.isDebugEnabled()) LOG.debug("Archiving region=" + getRegion().getShortNameToLog());
82-
HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion());
83-
}
84-
setNextState(GCRegionState.GC_REGION_PURGE_METADATA);
85-
break;
86-
case GC_REGION_PURGE_METADATA:
87-
// TODO: Purge metadata before removing from HDFS? This ordering is copied
88-
// from CatalogJanitor.
89-
AssignmentManager am = masterServices.getAssignmentManager();
90-
if (am != null) {
91-
if (am.getRegionStates() != null) {
92-
am.getRegionStates().deleteRegion(getRegion());
78+
case GC_REGION_PREPARE:
79+
// Nothing to do to prepare.
80+
setNextState(GCRegionState.GC_REGION_ARCHIVE);
81+
break;
82+
case GC_REGION_ARCHIVE:
83+
MasterFileSystem mfs = masterServices.getMasterFileSystem();
84+
FileSystem fs = mfs.getFileSystem();
85+
if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) {
86+
if (LOG.isDebugEnabled()) {
87+
LOG.debug("Archiving region=" + getRegion().getShortNameToLog());
88+
}
89+
HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion());
90+
}
91+
FileSystem walFs = mfs.getWALFileSystem();
92+
// Cleanup the directories on WAL filesystem also
93+
Path regionWALDir = FSUtils.getWALRegionDir(env.getMasterConfiguration(),
94+
getRegion().getTable(), getRegion().getEncodedName());
95+
if (walFs.exists(regionWALDir)) {
96+
if (!walFs.delete(regionWALDir, true)) {
97+
LOG.debug("Failed to delete {}", regionWALDir);
98+
}
99+
}
100+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(env.getMasterConfiguration(),
101+
getRegion().getTable(), getRegion().getEncodedName());
102+
if (walFs.exists(wrongRegionWALDir)) {
103+
if (!walFs.delete(wrongRegionWALDir, true)) {
104+
LOG.debug("Failed to delete {}", regionWALDir);
105+
}
106+
}
107+
setNextState(GCRegionState.GC_REGION_PURGE_METADATA);
108+
break;
109+
case GC_REGION_PURGE_METADATA:
110+
// TODO: Purge metadata before removing from HDFS? This ordering is copied
111+
// from CatalogJanitor.
112+
AssignmentManager am = masterServices.getAssignmentManager();
113+
if (am != null) {
114+
if (am.getRegionStates() != null) {
115+
am.getRegionStates().deleteRegion(getRegion());
116+
}
117+
}
118+
MetaTableAccessor.deleteRegion(masterServices.getConnection(), getRegion());
119+
masterServices.getServerManager().removeRegion(getRegion());
120+
FavoredNodesManager fnm = masterServices.getFavoredNodesManager();
121+
if (fnm != null) {
122+
fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion()));
93123
}
94-
}
95-
MetaTableAccessor.deleteRegion(masterServices.getConnection(), getRegion());
96-
masterServices.getServerManager().removeRegion(getRegion());
97-
FavoredNodesManager fnm = masterServices.getFavoredNodesManager();
98-
if (fnm != null) {
99-
fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion()));
100-
}
101-
return Flow.NO_MORE_STATE;
102-
default:
103-
throw new UnsupportedOperationException(this + " unhandled state=" + state);
124+
return Flow.NO_MORE_STATE;
125+
default:
126+
throw new UnsupportedOperationException(this + " unhandled state=" + state);
104127
}
105128
} catch (IOException ioe) {
106-
// TODO: This is going to spew log?
129+
// TODO: This is going to spew log? Add retry backoff
107130
LOG.warn("Error trying to GC " + getRegion().getShortNameToLog() + "; retrying...", ioe);
108131
}
109132
return Flow.HAS_MORE_STATE;

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -773,16 +773,16 @@ private ServerName getServerName(final MasterProcedureEnv env) {
773773
}
774774

775775
private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
776-
FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
776+
MasterFileSystem fs = env.getMasterFileSystem();
777777
long maxSequenceId = -1L;
778778
for (RegionInfo region : regionsToMerge) {
779779
maxSequenceId =
780-
Math.max(maxSequenceId, WALSplitUtil.getMaxRegionSequenceId(
781-
walFS, getWALRegionDir(env, region)));
780+
Math.max(maxSequenceId, WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
781+
region, fs::getFileSystem, fs::getWALFileSystem));
782782
}
783783
if (maxSequenceId > 0) {
784-
WALSplitUtil.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, mergedRegion),
785-
maxSequenceId);
784+
WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
785+
getWALRegionDir(env, mergedRegion), maxSequenceId);
786786
}
787787
}
788788

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import java.io.IOException;
2121
import java.util.Collections;
2222
import java.util.List;
23-
import org.apache.hadoop.fs.FileSystem;
2423
import org.apache.hadoop.hbase.Cell;
2524
import org.apache.hadoop.hbase.CellBuilderFactory;
2625
import org.apache.hadoop.hbase.CellBuilderType;
@@ -35,13 +34,13 @@
3534
import org.apache.hadoop.hbase.client.Result;
3635
import org.apache.hadoop.hbase.client.Table;
3736
import org.apache.hadoop.hbase.client.TableDescriptor;
37+
import org.apache.hadoop.hbase.master.MasterFileSystem;
3838
import org.apache.hadoop.hbase.master.MasterServices;
3939
import org.apache.hadoop.hbase.master.RegionState.State;
4040
import org.apache.hadoop.hbase.procedure2.Procedure;
4141
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
4242
import org.apache.hadoop.hbase.util.Bytes;
4343
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
44-
import org.apache.hadoop.hbase.util.FSUtils;
4544
import org.apache.hadoop.hbase.wal.WALSplitUtil;
4645
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
4746
import org.apache.yetus.audience.InterfaceAudience;
@@ -217,10 +216,9 @@ private void updateRegionLocation(RegionInfo regionInfo, State state, Put put)
217216
}
218217

219218
private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException {
220-
FileSystem walFS = master.getMasterWalManager().getFileSystem();
221-
long maxSeqId =
222-
WALSplitUtil.getMaxRegionSequenceId(walFS, FSUtils.getWALRegionDir(
223-
master.getConfiguration(), region.getTable(), region.getEncodedName()));
219+
MasterFileSystem fs = master.getMasterFileSystem();
220+
long maxSeqId = WALSplitUtil.getMaxRegionSequenceId(master.getConfiguration(), region,
221+
fs::getFileSystem, fs::getWALFileSystem);
224222
return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM;
225223
}
226224

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ public void createDaughterRegions(final MasterProcedureEnv env) throws IOExcepti
600600
final FileSystem fs = mfs.getFileSystem();
601601
HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
602602
env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
603-
regionFs.createSplitsDir();
603+
regionFs.createSplitsDir(daughterOneRI, daughterTwoRI);
604604

605605
Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
606606

@@ -874,14 +874,14 @@ private int getRegionReplication(final MasterProcedureEnv env) throws IOExceptio
874874
}
875875

876876
private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
877-
FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
878-
long maxSequenceId =
879-
WALSplitUtil.getMaxRegionSequenceId(walFS, getWALRegionDir(env, getParentRegion()));
877+
MasterFileSystem fs = env.getMasterFileSystem();
878+
long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(),
879+
getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
880880
if (maxSequenceId > 0) {
881-
WALSplitUtil.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughterOneRI),
882-
maxSequenceId);
883-
WALSplitUtil.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughterTwoRI),
884-
maxSequenceId);
881+
WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
882+
getWALRegionDir(env, daughterOneRI), maxSequenceId);
883+
WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(),
884+
getWALRegionDir(env, daughterTwoRI), maxSequenceId);
885885
}
886886
}
887887

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -319,12 +319,11 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
319319

320320
// Archive regions from FS (temp directory)
321321
if (archive) {
322-
List<Path> regionDirList = regions.stream()
323-
.filter(RegionReplicaUtil::isDefaultReplica)
324-
.map(region -> FSUtils.getRegionDir(tempTableDir, region))
322+
List<Path> regionDirList = regions.stream().filter(RegionReplicaUtil::isDefaultReplica)
323+
.map(region -> FSUtils.getRegionDirFromTableDir(tempTableDir, region))
325324
.collect(Collectors.toList());
326-
HFileArchiver.archiveRegions(env.getMasterConfiguration(), fs, mfs.getRootDir(),
327-
tempTableDir, regionDirList);
325+
HFileArchiver.archiveRegions(env.getMasterConfiguration(), fs, mfs.getRootDir(), tempTableDir,
326+
regionDirList);
328327
LOG.debug("Table '{}' archived!", tableName);
329328
}
330329

@@ -348,6 +347,13 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
348347
throw new IOException("Couldn't delete mob dir " + mobTableDir);
349348
}
350349
}
350+
351+
// Delete the directory on wal filesystem
352+
FileSystem walFs = mfs.getWALFileSystem();
353+
Path tableWALDir = FSUtils.getWALTableDir(env.getMasterConfiguration(), tableName);
354+
if (walFs.exists(tableWALDir) && !walFs.delete(tableWALDir, true)) {
355+
throw new IOException("Couldn't delete table dir on wal filesystem" + tableWALDir);
356+
}
351357
}
352358

353359
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
package org.apache.hadoop.hbase.master.procedure;
2020

2121
import java.io.IOException;
22-
import org.apache.hadoop.fs.FileSystem;
2322
import org.apache.hadoop.hbase.HBaseIOException;
2423
import org.apache.hadoop.hbase.HConstants;
2524
import org.apache.hadoop.hbase.MetaTableAccessor;
@@ -31,6 +30,7 @@
3130
import org.apache.hadoop.hbase.client.TableState;
3231
import org.apache.hadoop.hbase.constraint.ConstraintException;
3332
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
33+
import org.apache.hadoop.hbase.master.MasterFileSystem;
3434
import org.apache.hadoop.hbase.master.TableStateManager;
3535
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
3636
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -111,13 +111,13 @@ protected Flow executeFromState(final MasterProcedureEnv env, final DisableTable
111111
case DISABLE_TABLE_ADD_REPLICATION_BARRIER:
112112
if (env.getMasterServices().getTableDescriptors().get(tableName)
113113
.hasGlobalReplicationScope()) {
114-
FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
114+
MasterFileSystem fs = env.getMasterFileSystem();
115115
try (BufferedMutator mutator = env.getMasterServices().getConnection()
116116
.getBufferedMutator(TableName.META_TABLE_NAME)) {
117117
for (RegionInfo region : env.getAssignmentManager().getRegionStates()
118118
.getRegionsOfTable(tableName)) {
119-
long maxSequenceId =
120-
WALSplitUtil.getMaxRegionSequenceId(walFS, getWALRegionDir(env, region));
119+
long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(
120+
env.getMasterConfiguration(), region, fs::getFileSystem, fs::getWALFileSystem);
121121
long openSeqNum = maxSequenceId > 0 ? maxSequenceId + 1 : HConstants.NO_SEQNUM;
122122
mutator.mutate(MetaTableAccessor.makePutForReplicationBarrier(region, openSeqNum,
123123
EnvironmentEdgeManager.currentTime()));

0 commit comments

Comments
 (0)