Skip to content

Commit 8e4a508

Browse files
authored
HDDS-11668. Recon List Keys API: Reuse key prefix if parentID is the same (#7410)
1 parent ee63232 commit 8e4a508

2 files changed

Lines changed: 78 additions & 9 deletions

File tree

hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,37 @@ public static String constructFullPath(OmKeyInfo omKeyInfo,
317317
public static String constructFullPath(String keyName, long initialParentId, String volumeName, String bucketName,
318318
ReconNamespaceSummaryManager reconNamespaceSummaryManager,
319319
ReconOMMetadataManager omMetadataManager) throws IOException {
320-
StringBuilder fullPath = new StringBuilder(keyName);
320+
StringBuilder fullPath = constructFullPathPrefix(initialParentId, volumeName, bucketName,
321+
reconNamespaceSummaryManager, omMetadataManager);
322+
if (fullPath.length() == 0) {
323+
return "";
324+
}
325+
fullPath.append(keyName);
326+
return fullPath.toString();
327+
}
328+
329+
330+
/**
331+
* Constructs the prefix path to a key from its key name and parent ID using a bottom-up approach, starting from the
332+
* leaf node.
333+
*
334+
* The method begins with the leaf node (the key itself) and recursively prepends parent directory names, fetched
335+
* via NSSummary objects, until reaching the parent bucket (parentId is -1). It effectively builds the path from
336+
* bottom to top, finally prepending the volume and bucket names to complete the full path. If the directory structure
337+
* is currently being rebuilt (indicated by the rebuildTriggered flag), this method returns an empty string to signify
338+
* that path construction is temporarily unavailable.
339+
*
340+
* @param initialParentId The parent ID of the key
341+
* @param volumeName The name of the volume
342+
* @param bucketName The name of the bucket
343+
* @return A StringBuilder containing the constructed prefix path of the key, or an empty string builder if a rebuild
344+
* is in progress.
345+
* @throws IOException
346+
*/
347+
public static StringBuilder constructFullPathPrefix(long initialParentId, String volumeName,
348+
String bucketName, ReconNamespaceSummaryManager reconNamespaceSummaryManager,
349+
ReconOMMetadataManager omMetadataManager) throws IOException {
350+
StringBuilder fullPath = new StringBuilder();
321351
long parentId = initialParentId;
322352
boolean isDirectoryPresent = false;
323353

@@ -326,16 +356,21 @@ public static String constructFullPath(String keyName, long initialParentId, Str
326356
if (nsSummary == null) {
327357
log.warn("NSSummary tree is currently being rebuilt or the directory could be in the progress of " +
328358
"deletion, returning empty string for path construction.");
329-
return "";
359+
fullPath.setLength(0);
360+
return fullPath;
330361
}
331362
if (nsSummary.getParentId() == -1) {
332363
if (rebuildTriggered.compareAndSet(false, true)) {
333364
triggerRebuild(reconNamespaceSummaryManager, omMetadataManager);
334365
}
335366
log.warn("NSSummary tree is currently being rebuilt, returning empty string for path construction.");
336-
return "";
367+
fullPath.setLength(0);
368+
return fullPath;
369+
}
370+
// On the last pass, dir-name will be empty and parent will be zero, indicating the loop should end.
371+
if (!nsSummary.getDirName().isEmpty()) {
372+
fullPath.insert(0, nsSummary.getDirName() + OM_KEY_PREFIX);
337373
}
338-
fullPath.insert(0, nsSummary.getDirName() + OM_KEY_PREFIX);
339374

340375
// Move to the parent ID of the current directory
341376
parentId = nsSummary.getParentId();
@@ -344,10 +379,18 @@ public static String constructFullPath(String keyName, long initialParentId, Str
344379

345380
// Prepend the volume and bucket to the constructed path
346381
fullPath.insert(0, volumeName + OM_KEY_PREFIX + bucketName + OM_KEY_PREFIX);
382+
// TODO - why is this needed? It seems lke it should handle double slashes in the path name,
383+
// but its not clear how they get there. This normalize call is quite expensive as it
384+
// creates several objects (URI, PATH, back to string). There was a bug fixed above
385+
// where the last parent dirName was empty, which always caused a double // after the
386+
// bucket name, but with that fixed, it seems like this should not be needed. All tests
387+
// pass without it for key listing.
347388
if (isDirectoryPresent) {
348-
return OmUtils.normalizeKey(fullPath.toString(), true);
389+
String path = fullPath.toString();
390+
fullPath.setLength(0);
391+
fullPath.append(OmUtils.normalizeKey(path, true));
349392
}
350-
return fullPath.toString();
393+
return fullPath;
351394
}
352395

353396
private static void triggerRebuild(ReconNamespaceSummaryManager reconNamespaceSummaryManager,

hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,9 @@ private void retrieveKeysFromTable(
11761176
keyIter.seek(paramInfo.getStartPrefix());
11771177
}
11781178

1179+
long prevParentID = -1;
1180+
StringBuilder keyPrefix = null;
1181+
int keyPrefixLength = 0;
11791182
while (keyIter.hasNext()) {
11801183
Table.KeyValue<String, KeyEntityInfoProtoWrapper> entry = keyIter.next();
11811184
String dbKey = entry.getKey();
@@ -1189,9 +1192,32 @@ private void retrieveKeysFromTable(
11891192
if (applyFilters(entry, paramInfo)) {
11901193
KeyEntityInfoProtoWrapper keyEntityInfo = entry.getValue();
11911194
keyEntityInfo.setKey(dbKey);
1192-
keyEntityInfo.setPath(ReconUtils.constructFullPath(keyEntityInfo.getKeyName(), keyEntityInfo.getParentId(),
1193-
keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager,
1194-
omMetadataManager));
1195+
if (keyEntityInfo.getParentId() == 0) {
1196+
// Legacy bucket keys have a parentID of zero. OBS bucket keys have a parentID of the bucketID.
1197+
// FSO keys have a parent of the immediate parent directory.
1198+
// Legacy buckets are obsolete, so this code path is not optimized. We don't expect to see many Legacy
1199+
// buckets in practice.
1200+
prevParentID = -1;
1201+
keyEntityInfo.setPath(ReconUtils.constructFullPath(keyEntityInfo.getKeyName(), keyEntityInfo.getParentId(),
1202+
keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager,
1203+
omMetadataManager));
1204+
} else {
1205+
// As we iterate keys in sorted order, its highly likely that keys have the same prefix for many keys in a
1206+
// row. Especially for FSO buckets, its expensive to construct the path for each key. So, we construct the
1207+
// prefix once and reuse it for each identical parent. Only if the parent changes do we need to construct
1208+
// a new prefix path.
1209+
if (prevParentID != keyEntityInfo.getParentId()) {
1210+
prevParentID = keyEntityInfo.getParentId();
1211+
keyPrefix = ReconUtils.constructFullPathPrefix(keyEntityInfo.getParentId(),
1212+
keyEntityInfo.getVolumeName(), keyEntityInfo.getBucketName(), reconNamespaceSummaryManager,
1213+
omMetadataManager);
1214+
keyPrefixLength = keyPrefix.length();
1215+
}
1216+
keyPrefix.setLength(keyPrefixLength);
1217+
keyPrefix.append(keyEntityInfo.getKeyName());
1218+
keyEntityInfo.setPath(keyPrefix.toString());
1219+
}
1220+
11951221
results.add(keyEntityInfo);
11961222
paramInfo.setLastKey(dbKey);
11971223
if (results.size() >= paramInfo.getLimit()) {

0 commit comments

Comments
 (0)