diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java index 85ef9dc574a1..37a6a94e017a 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java @@ -26,8 +26,10 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -656,6 +658,7 @@ private static List> getSnapshotFiles(final Configu // Get snapshot files LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list"); + Set addedFiles = new HashSet<>(); SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc, new SnapshotReferenceUtil.SnapshotVisitor() { @Override @@ -675,7 +678,13 @@ public void storeFile(final RegionInfo regionInfo, final String family, snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table, referencedRegion, family, referencedHFile, storeFile.hasFileSize() ? storeFile.getFileSize() : -1); } - files.add(snapshotFileAndSize); + String fileToExport = snapshotFileAndSize.getFirst().getHfile(); + if (!addedFiles.contains(fileToExport)) { + files.add(snapshotFileAndSize); + addedFiles.add(fileToExport); + } else { + LOG.debug("Skip the existing file: {}.", fileToExport); + } } }); diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java index cfb671a707bb..30a9973320c9 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java @@ -51,10 +51,12 @@ import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; +import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool; import org.apache.hadoop.hbase.util.AbstractHBaseTool; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.HFileTestUtil; import org.apache.hadoop.hbase.util.Pair; import org.junit.After; import org.junit.AfterClass; @@ -206,6 +208,51 @@ public void testExportFileSystemStateWithMergeRegion() throws Exception { TEST_UTIL.deleteTable(tableName0); } + @Test + public void testExportFileSystemStateWithSplitRegion() throws Exception { + // disable compaction + admin.compactionSwitch(false, + admin.getRegionServers().stream().map(a -> a.getServerName()).collect(Collectors.toList())); + // create Table + TableName splitTableName = TableName.valueOf(testName.getMethodName()); + String splitTableSnap = "snapshot-" + testName.getMethodName(); + admin.createTable(TableDescriptorBuilder.newBuilder(splitTableName).setColumnFamilies( + Lists.newArrayList(ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).build())).build()); + + Path output = TEST_UTIL.getDataTestDir("output/cf"); + TEST_UTIL.getTestFileSystem().mkdirs(output); + // Create and load a large hfile to ensure the execution time of MR job. + HFileTestUtil.createHFile(TEST_UTIL.getConfiguration(), TEST_UTIL.getTestFileSystem(), + new Path(output, "test_file"), FAMILY, Bytes.toBytes("q"), Bytes.toBytes("1"), + Bytes.toBytes("9"), 9999999); + BulkLoadHFilesTool tool = new BulkLoadHFilesTool(TEST_UTIL.getConfiguration()); + tool.run(new String[] { output.getParent().toString(), splitTableName.getNameAsString() }); + + List regions = admin.getRegions(splitTableName); + assertEquals(1, regions.size()); + tableNumFiles = regions.size(); + + // split region + admin.splitRegionAsync(regions.get(0).getEncodedNameAsBytes(), Bytes.toBytes("5")).get(); + regions = admin.getRegions(splitTableName); + assertEquals(2, regions.size()); + + // take a snapshot + admin.snapshot(splitTableSnap, splitTableName); + // export snapshot and verify + Configuration tmpConf = TEST_UTIL.getConfiguration(); + // Decrease the buffer size of copier to avoid the export task finished shortly + tmpConf.setInt("snapshot.export.buffer.size", 1); + // Decrease the maximum files of each mapper to ensure the three files(1 hfile + 2 reference + // files) copied in different mappers concurrently. + tmpConf.setInt("snapshot.export.default.map.group", 1); + testExportFileSystemState(tmpConf, splitTableName, Bytes.toBytes(splitTableSnap), + Bytes.toBytes(splitTableSnap), tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), + getHdfsDestinationDir(), false, false, getBypassRegionPredicate(), true, false); + // delete table + TEST_UTIL.deleteTable(splitTableName); + } + @Test public void testExportFileSystemStateWithSkipTmp() throws Exception { TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP, true);