diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupMasterObserver.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupMasterObserver.java new file mode 100644 index 000000000000..3e95e7bbcbc5 --- /dev/null +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupMasterObserver.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.backup.impl.BackupManager; +import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; +import org.apache.hadoop.hbase.backup.impl.BulkLoad; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; +import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.MasterObserver; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; + +/** + * An Observer to facilitate backup operations + */ +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) +public class BackupMasterObserver implements MasterCoprocessor, MasterObserver { + private static final Logger LOG = LoggerFactory.getLogger(BackupMasterObserver.class); + + @Override + public Optional getMasterObserver() { + return Optional.of(this); + } + + @Override + public void postDeleteTable(ObserverContext ctx, + TableName tableName) throws IOException { + Configuration cfg = ctx.getEnvironment().getConfiguration(); + if (!BackupManager.isBackupEnabled(cfg)) { + LOG.debug("Skipping postDeleteTable hook since backup is disabled"); + return; + } + deleteBulkLoads(cfg, tableName, (ignored) -> true); + } + + @Override + public void postTruncateTable(ObserverContext ctx, + TableName tableName) throws IOException { + Configuration cfg = ctx.getEnvironment().getConfiguration(); + if (!BackupManager.isBackupEnabled(cfg)) { + LOG.debug("Skipping postTruncateTable hook since backup is disabled"); + return; + } + deleteBulkLoads(cfg, tableName, (ignored) -> true); + } + + @Override + public void postModifyTable(final ObserverContext ctx, + final TableName tableName, TableDescriptor oldDescriptor, TableDescriptor currentDescriptor) + throws IOException { + Configuration cfg = ctx.getEnvironment().getConfiguration(); + if (!BackupManager.isBackupEnabled(cfg)) { + LOG.debug("Skipping postModifyTable hook since backup is disabled"); + return; + } + + Set oldFamilies = Arrays.stream(oldDescriptor.getColumnFamilies()) + .map(ColumnFamilyDescriptor::getNameAsString).collect(Collectors.toSet()); + Set newFamilies = Arrays.stream(currentDescriptor.getColumnFamilies()) + .map(ColumnFamilyDescriptor::getNameAsString).collect(Collectors.toSet()); + + Set removedFamilies = Sets.difference(oldFamilies, newFamilies); + if (!removedFamilies.isEmpty()) { + Predicate filter = bulkload -> removedFamilies.contains(bulkload.getColumnFamily()); + deleteBulkLoads(cfg, tableName, filter); + } + } + + /** + * Deletes all bulk load entries for the given table, matching the provided predicate. + */ + private void deleteBulkLoads(Configuration config, TableName tableName, + Predicate filter) throws IOException { + try (Connection connection = ConnectionFactory.createConnection(config); + BackupSystemTable tbl = new BackupSystemTable(connection)) { + List bulkLoads = tbl.readBulkloadRows(ImmutableList.of(tableName)); + List rowsToDelete = + bulkLoads.stream().filter(filter).map(BulkLoad::getRowKey).collect(Collectors.toList()); + tbl.deleteBulkLoadedRows(rowsToDelete); + } + } +} diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java index 30a5674eb021..d05a421a3956 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.backup; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.yetus.audience.InterfaceAudience; /** @@ -98,16 +99,17 @@ public interface BackupRestoreConstants { String JOB_NAME_CONF_KEY = "mapreduce.job.name"; - String BACKUP_CONFIG_STRING = - BackupRestoreConstants.BACKUP_ENABLE_KEY + "=true\n" + "hbase.master.logcleaner.plugins=" - + "YOUR_PLUGINS,org.apache.hadoop.hbase.backup.master.BackupLogCleaner\n" - + "hbase.procedure.master.classes=YOUR_CLASSES," - + "org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager\n" - + "hbase.procedure.regionserver.classes=YOUR_CLASSES," - + "org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager\n" - + "hbase.coprocessor.region.classes=YOUR_CLASSES," - + "org.apache.hadoop.hbase.backup.BackupObserver\n" + "and restart the cluster\n" - + "For more information please see http://hbase.apache.org/book.html#backuprestore\n"; + String BACKUP_CONFIG_STRING = BackupRestoreConstants.BACKUP_ENABLE_KEY + "=true\n" + + "hbase.master.logcleaner.plugins=" + + "YOUR_PLUGINS,org.apache.hadoop.hbase.backup.master.BackupLogCleaner\n" + + "hbase.procedure.master.classes=YOUR_CLASSES," + + "org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager\n" + + "hbase.procedure.regionserver.classes=YOUR_CLASSES," + + "org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager\n" + + CoprocessorHost.REGION_COPROCESSOR_CONF_KEY + "=YOUR_CLASSES," + + BackupObserver.class.getSimpleName() + "\n" + CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY + + "=YOUR_CLASSES," + BackupMasterObserver.class.getSimpleName() + "\nand restart the cluster\n" + + "For more information please see http://hbase.apache.org/book.html#backuprestore\n"; String ENABLE_BACKUP = "Backup is not enabled. To enable backup, " + "in hbase-site.xml, set:\n " + BACKUP_CONFIG_STRING; diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java index 5afd580a649d..e294cb887c18 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java @@ -119,11 +119,17 @@ public static void decorateMasterConfiguration(Configuration conf) { plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS); conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, (plugins == null ? "" : plugins + ",") + BackupHFileCleaner.class.getName()); + + String observerClass = BackupObserver.class.getName(); + String masterCoProc = conf.get(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY); + conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, + (masterCoProc == null ? "" : masterCoProc + ",") + observerClass); + if (LOG.isDebugEnabled()) { LOG.debug( "Added log cleaner: {}. Added master procedure manager: {}." - + "Added master procedure manager: {}", - cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName()); + + " Added master procedure manager: {}. Added master observer: {}", + cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName(), observerClass); } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index 203f3f61b0fb..705cac02542c 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -411,25 +411,24 @@ public void registerBulkLoad(TableName tableName, byte[] region, try (BufferedMutator bufferedMutator = connection.getBufferedMutator(bulkLoadTableName)) { List puts = BackupSystemTable.createPutForBulkLoad(tableName, region, cfToHfilePath); bufferedMutator.mutate(puts); - LOG.debug("Written {} rows for bulk load of {}", puts.size(), tableName); + LOG.debug("Written {} rows for bulk load of table {}", puts.size(), tableName); } } - /* - * Removes rows recording bulk loaded hfiles from backup table - * @param lst list of table names - * @param rows the rows to be deleted + /** + * Removes entries from the table that tracks all bulk loaded hfiles. + * @param rows the row keys of the entries to be deleted */ public void deleteBulkLoadedRows(List rows) throws IOException { try (BufferedMutator bufferedMutator = connection.getBufferedMutator(bulkLoadTableName)) { - List lstDels = new ArrayList<>(); + List deletes = new ArrayList<>(); for (byte[] row : rows) { Delete del = new Delete(row); - lstDels.add(del); - LOG.debug("orig deleting the row: " + Bytes.toString(row)); + deletes.add(del); + LOG.debug("Deleting bulk load entry with key: {}", Bytes.toString(row)); } - bufferedMutator.mutate(lstDels); - LOG.debug("deleted " + rows.size() + " original bulkload rows"); + bufferedMutator.mutate(deletes); + LOG.debug("Deleted {} bulk load entries.", rows.size()); } } @@ -1522,16 +1521,6 @@ public static void deleteSnapshot(Connection conn) throws IOException { } } - public static List createDeleteForOrigBulkLoad(List lst) { - List lstDels = new ArrayList<>(lst.size()); - for (TableName table : lst) { - Delete del = new Delete(rowkey(BULK_LOAD_PREFIX, table.toString(), BLK_LD_DELIM)); - del.addFamily(BackupSystemTable.META_FAMILY); - lstDels.add(del); - } - return lstDels; - } - private Put createPutForDeleteOperation(String[] backupIdList) { byte[] value = Bytes.toBytes(StringUtils.join(backupIdList, ",")); Put put = new Put(DELETE_OP_ROW); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java index c4017e8c1a1a..7fb7a5768805 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java @@ -26,7 +26,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.BackupCopyJob; import org.apache.hadoop.hbase.backup.BackupInfo; @@ -152,6 +154,11 @@ public void execute() throws IOException { // the snapshot. LOG.info("Execute roll log procedure for full backup ..."); + // Gather the bulk loads being tracked by the system, which can be deleted (since their data + // will be part of the snapshot being taken). We gather this list before taking the actual + // snapshots for the same reason as the log rolls. + List bulkLoadsToDelete = backupManager.readBulkloadRows(tableList); + Map props = new HashMap<>(); props.put("backupRoot", backupInfo.getBackupRootDir()); admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, @@ -192,6 +199,9 @@ public void execute() throws IOException { BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap)); backupManager.writeBackupStartCode(newStartCode); + backupManager.deleteBulkLoadedRows( + bulkLoadsToDelete.stream().map(BulkLoad::getRowKey).collect(Collectors.toList())); + // backup complete completeBackup(conn, backupInfo, BackupType.FULL, conf); } catch (Exception e) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java index 833ce35edd0a..5c8d4b96c53d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java @@ -293,14 +293,6 @@ private void runTest(String testName, TableName tableName, BloomType bloomType, runTest(testName, htd, preCreateTable, tableSplitKeys, hfileRanges, useMap, false, depth); } - public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util, - byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys, - byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles, int initRowCount, - int factor) throws Exception { - return loadHFiles(testName, htd, util, fam, qual, preCreateTable, tableSplitKeys, hfileRanges, - useMap, deleteFile, copyFiles, initRowCount, factor, 2); - } - public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util, byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles, int initRowCount,