-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-25899 Improve efficiency of SnapshotHFileCleaner #3280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,10 +20,8 @@ | |
| import java.io.IOException; | ||
| import java.util.Collection; | ||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.Timer; | ||
| import java.util.TimerTask; | ||
| import java.util.concurrent.locks.Lock; | ||
|
|
@@ -36,6 +34,8 @@ | |
| import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; | ||
| import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; | ||
| import org.apache.hadoop.hbase.util.CommonFSUtils; | ||
| import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; | ||
| import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; | ||
| import org.apache.yetus.audience.InterfaceAudience; | ||
| import org.apache.yetus.audience.InterfaceStability; | ||
| import org.slf4j.Logger; | ||
|
|
@@ -91,12 +91,12 @@ Collection<String> filesUnderSnapshot(final FileSystem fs, final Path snapshotDi | |
| private final FileSystem fs, workingFs; | ||
| private final SnapshotFileInspector fileInspector; | ||
| private final Path snapshotDir, workingSnapshotDir; | ||
| private final Set<String> cache = new HashSet<>(); | ||
| private volatile ImmutableSet<String> cache = ImmutableSet.of(); | ||
| /** | ||
| * This is a helper map of information about the snapshot directories so we don't need to rescan | ||
| * them if they haven't changed since the last time we looked. | ||
| */ | ||
| private final Map<String, SnapshotDirectoryInfo> snapshots = new HashMap<>(); | ||
| private ImmutableMap<String, SnapshotDirectoryInfo> snapshots = ImmutableMap.of(); | ||
| private final Timer refreshTimer; | ||
|
|
||
| /** | ||
|
|
@@ -184,7 +184,7 @@ public synchronized void triggerCacheRefreshForTesting() { | |
| // XXX this is inefficient to synchronize on the method, when what we really need to guard against | ||
| // is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the | ||
| // cache, but that seems overkill at the moment and isn't necessarily a bottleneck. | ||
| public synchronized Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatus> files, | ||
| public Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatus> files, | ||
| final SnapshotManager snapshotManager) throws IOException { | ||
| List<FileStatus> unReferencedFiles = Lists.newArrayList(); | ||
| List<String> snapshotsInProgress = null; | ||
|
|
@@ -200,13 +200,17 @@ public synchronized Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatu | |
| "skip to clean the HFiles this time"); | ||
| return unReferencedFiles; | ||
| } | ||
| ImmutableSet<String> currentCache = cache; | ||
| for (FileStatus file : files) { | ||
| String fileName = file.getPath().getName(); | ||
| if (!refreshed && !cache.contains(fileName)) { | ||
| refreshCache(); | ||
| refreshed = true; | ||
| if (!refreshed && !currentCache.contains(fileName)) { | ||
| synchronized (this) { | ||
| refreshCache(); | ||
| currentCache = cache; | ||
| refreshed = true; | ||
| } | ||
| } | ||
|
Comment on lines
+207
to
212
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this line is still blocking the delete tasks... basically I LOG.info/print the lock information before the line of maybe I'm wondered, why do we need a write lock for refreshing snapshot file cache ? |
||
| if (cache.contains(fileName)) { | ||
| if (currentCache.contains(fileName)) { | ||
| continue; | ||
| } | ||
| if (snapshotsInProgress == null) { | ||
|
|
@@ -235,22 +239,23 @@ private void refreshCache() throws IOException { | |
|
|
||
| // clear the cache, as in the below code, either we will also clear the snapshots, or we will | ||
| // refill the file name cache again. | ||
| this.cache.clear(); | ||
| if (ArrayUtils.isEmpty(snapshotDirs)) { | ||
| // remove all the remembered snapshots because we don't have any left | ||
| if (LOG.isDebugEnabled() && this.snapshots.size() > 0) { | ||
| LOG.debug("No snapshots on-disk, clear cache"); | ||
| } | ||
| this.snapshots.clear(); | ||
| this.snapshots = ImmutableMap.of(); | ||
| this.cache = ImmutableSet.of(); | ||
| return; | ||
| } | ||
|
|
||
| ImmutableSet.Builder<String> cacheBuilder = ImmutableSet.builder(); | ||
| ImmutableMap.Builder<String, SnapshotDirectoryInfo> snapshotsBuilder = ImmutableMap.builder(); | ||
| // iterate over all the cached snapshots and see if we need to update some, it is not an | ||
| // expensive operation if we do not reload the manifest of snapshots. | ||
| Map<String, SnapshotDirectoryInfo> newSnapshots = new HashMap<>(); | ||
| for (FileStatus snapshotDir : snapshotDirs) { | ||
| String name = snapshotDir.getPath().getName(); | ||
| SnapshotDirectoryInfo files = this.snapshots.remove(name); | ||
| SnapshotDirectoryInfo files = snapshots.get(name); | ||
| // if we don't know about the snapshot or its been modified, we need to update the | ||
| // files the latter could occur where I create a snapshot, then delete it, and then make a | ||
| // new snapshot with the same name. We will need to update the cache the information from | ||
|
|
@@ -262,19 +267,20 @@ private void refreshCache() throws IOException { | |
| files = new SnapshotDirectoryInfo(snapshotDir.getModificationTime(), storedFiles); | ||
| } | ||
| // add all the files to cache | ||
| this.cache.addAll(files.getFiles()); | ||
| newSnapshots.put(name, files); | ||
| cacheBuilder.addAll(files.getFiles()); | ||
| snapshotsBuilder.put(name, files); | ||
| } | ||
| // set the snapshots we are tracking | ||
| this.snapshots.clear(); | ||
| this.snapshots.putAll(newSnapshots); | ||
| this.snapshots = snapshotsBuilder.build(); | ||
| this.cache = cacheBuilder.build(); | ||
| } | ||
|
|
||
| List<String> getSnapshotsInProgress() throws IOException { | ||
| List<String> snapshotInProgress = Lists.newArrayList(); | ||
| // only add those files to the cache, but not to the known snapshots | ||
|
|
||
| FileStatus[] snapshotsInProgress = CommonFSUtils.listStatus(this.workingFs, this.workingSnapshotDir); | ||
| FileStatus[] snapshotsInProgress = CommonFSUtils.listStatus(this.workingFs, | ||
| this.workingSnapshotDir); | ||
|
|
||
| if (!ArrayUtils.isEmpty(snapshotsInProgress)) { | ||
| for (FileStatus snapshot : snapshotsInProgress) { | ||
|
|
@@ -301,11 +307,10 @@ public void run() { | |
| } catch (IOException e) { | ||
| LOG.warn("Failed to refresh snapshot hfile cache!", e); | ||
| // clear all the cached entries if we meet an error | ||
| cache.clear(); | ||
| snapshots.clear(); | ||
| cache = ImmutableSet.of(); | ||
| snapshots = ImmutableMap.of(); | ||
| } | ||
| } | ||
|
|
||
| } | ||
| } | ||
|
|
||
|
|
@@ -315,7 +320,6 @@ public void stop(String why) { | |
| this.stop = true; | ||
| this.refreshTimer.cancel(); | ||
| } | ||
|
|
||
| } | ||
|
|
||
| @Override | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.