Skip to content

Commit 9a60e53

Browse files
committed
Add namespace key for INode. (shv)
1 parent 1f1a0c4 commit 9a60e53

File tree

4 files changed

+176
-18
lines changed

4 files changed

+176
-18
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PartitionedGSet.java

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.Iterator;
2323
import java.util.Map.Entry;
2424
import java.util.NavigableMap;
25+
import java.util.Set;
2526
import java.util.TreeMap;
2627

2728
import org.apache.hadoop.HadoopIllegalArgumentException;
@@ -44,7 +45,8 @@
4445
@InterfaceAudience.Private
4546
public class PartitionedGSet<K, E extends K> implements GSet<K, E> {
4647

47-
private static final int DEFAULT_PARTITION_CAPACITY = 2027;
48+
private static final int DEFAULT_PARTITION_CAPACITY = 65536; // 4096; // 5120; // 2048; // 1027;
49+
private static final float DEFAULT_PARTITION_OVERFLOW = 1.8f;
4850

4951
/**
5052
* An ordered map of contiguous segments of elements.
@@ -81,25 +83,31 @@ public PartitionedGSet(final int capacity,
8183
final E rootKey) {
8284
this.partitions = new TreeMap<K, PartitionEntry>(comparator);
8385
this.latchLock = latchLock;
84-
addNewPartition(rootKey).put(rootKey);
85-
this.size = 1;
86+
// addNewPartition(rootKey).put(rootKey);
87+
// this.size = 1;
88+
this.size = 0;
89+
LOG.info("Partition capacity = {}", DEFAULT_PARTITION_CAPACITY);
90+
LOG.info("Partition overflow factor = {}", DEFAULT_PARTITION_OVERFLOW);
8691
}
8792

8893
/**
8994
* Creates new empty partition.
9095
* @param key
9196
* @return
9297
*/
93-
private PartitionEntry addNewPartition(final K key) {
98+
public PartitionEntry addNewPartition(final K key) {
99+
Entry<K, PartitionEntry> lastEntry = partitions.lastEntry();
94100
PartitionEntry lastPart = null;
95-
if(size > 0)
96-
lastPart = partitions.lastEntry().getValue();
101+
if(lastEntry != null)
102+
lastPart = lastEntry.getValue();
97103

98104
PartitionEntry newPart =
99105
new PartitionEntry(DEFAULT_PARTITION_CAPACITY);
100106
// assert size == 0 || newPart.partLock.isWriteTopLocked() :
101107
// "Must hold write Lock: key = " + key;
102-
partitions.put(key, newPart);
108+
PartitionEntry oldPart = partitions.put(key, newPart);
109+
assert oldPart == null :
110+
"RangeMap already has a partition associated with " + key;
103111

104112
LOG.debug("Total GSet size = {}", size);
105113
LOG.debug("Number of partitions = {}", partitions.size());
@@ -173,7 +181,7 @@ public E put(final E element) {
173181

174182
private PartitionEntry addNewPartitionIfNeeded(
175183
PartitionEntry curPart, K key) {
176-
if(curPart.size() < DEFAULT_PARTITION_CAPACITY * 1.1
184+
if(curPart.size() < DEFAULT_PARTITION_CAPACITY * DEFAULT_PARTITION_OVERFLOW
177185
|| curPart.contains(key)) {
178186
return curPart;
179187
}
@@ -197,12 +205,56 @@ public E remove(final K key) {
197205
public void clear() {
198206
LOG.error("Total GSet size = {}", size);
199207
LOG.error("Number of partitions = {}", partitions.size());
208+
printStats();
200209
// assert latchLock.hasWriteTopLock() : "Must hold write topLock";
201210
// SHV May need to clear all partitions?
202211
partitions.clear();
203212
size = 0;
204213
}
205214

215+
private void printStats() {
216+
int partSizeMin = Integer.MAX_VALUE, partSizeAvg = 0, partSizeMax = 0;
217+
long totalSize = 0;
218+
int numEmptyPartitions = 0, numFullPartitions = 0;
219+
Collection<PartitionEntry> parts = partitions.values();
220+
Set<Entry<K, PartitionEntry>> entries = partitions.entrySet();
221+
int i = 0;
222+
for(Entry<K, PartitionEntry> e : entries) {
223+
PartitionEntry part = e.getValue();
224+
int s = part.size;
225+
if(s == 0) numEmptyPartitions++;
226+
if(s > DEFAULT_PARTITION_CAPACITY) numFullPartitions++;
227+
totalSize += s;
228+
partSizeMin = (s < partSizeMin ? s : partSizeMin);
229+
partSizeMax = (partSizeMax < s ? s : partSizeMax);
230+
Class<?> inodeClass = e.getKey().getClass();
231+
try {
232+
long[] key = (long[]) inodeClass.
233+
getMethod("getNamespaceKey", int.class).invoke(e.getKey(), 2);
234+
long[] firstKey = new long[0];
235+
if(part.iterator().hasNext()) {
236+
Object first = part.iterator().next();
237+
firstKey = (long[]) inodeClass.getMethod(
238+
"getNamespaceKey", int.class).invoke(first, 2);
239+
Object parent = inodeClass.
240+
getMethod("getParent").invoke(first);
241+
long parentId = (parent == null ? 0L :
242+
(long) inodeClass.getMethod("getId").invoke(parent));
243+
firstKey[0] = parentId;
244+
}
245+
LOG.error("Partition #{}\t key: {}\t size: {}\t first: {}",
246+
i++, key, s, firstKey); // SHV should be info
247+
} catch (Exception ex) {
248+
LOG.error("Cannot find Method getNamespaceKey() in {}", inodeClass);
249+
}
250+
}
251+
partSizeAvg = (int) (totalSize / parts.size());
252+
LOG.error("Partition sizes: min = {}, avg = {}, max = {}, sum = {}",
253+
partSizeMin, partSizeAvg, partSizeMax, totalSize);
254+
LOG.error("Number of partitions: empty = {}, full = {}",
255+
numEmptyPartitions, numFullPartitions);
256+
}
257+
206258
@Override
207259
public Collection<E> values() {
208260
// TODO Auto-generated method stub
@@ -234,15 +286,19 @@ public EntryIterator() {
234286

235287
@Override
236288
public boolean hasNext() {
237-
if(partitionIterator.hasNext()) {
238-
return true;
289+
while(!partitionIterator.hasNext()) {
290+
if(!keyIterator.hasNext()) {
291+
return false;
292+
}
293+
K curKey = keyIterator.next();
294+
partitionIterator = getPartition(curKey).iterator();
239295
}
240-
return keyIterator.hasNext();
296+
return partitionIterator.hasNext();
241297
}
242298

243299
@Override
244300
public E next() {
245-
if(!partitionIterator.hasNext()) {
301+
while(!partitionIterator.hasNext()) {
246302
K curKey = keyIterator.next();
247303
partitionIterator = getPartition(curKey).iterator();
248304
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirMkdirOp.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,10 +265,13 @@ private static INode[] createPathDirectories(FSDirectory fsd,
265265
// create the missing directories along the path
266266
INode[] missing = new INode[numMissing];
267267
final int last = iip.length();
268+
INode parent = existing.getLastINode();
268269
for (int i = existing.length(); i < last; i++) {
269270
byte[] component = iip.getPathComponent(i);
270271
missing[i - existing.length()] =
271272
createDirectoryINode(fsd, existing, component, perm);
273+
missing[i - existing.length()].setParent(parent.asDirectory());
274+
parent = missing[i - existing.length()];
272275
}
273276
return missing;
274277
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import java.io.PrintStream;
4848
import java.io.PrintWriter;
4949
import java.io.StringWriter;
50+
import java.util.Arrays;
5051
import java.util.List;
5152
import java.util.Map;
5253

@@ -577,6 +578,43 @@ public final String getLocalName() {
577578
return name == null? null: DFSUtil.bytes2String(name);
578579
}
579580

581+
private long[] namespaceKey;
582+
583+
/**
584+
* Key of an INode.
585+
* Defines partitioning of INodes in the INodeMap.
586+
*
587+
* @param level how many levels to be included in the key
588+
* @return
589+
*/
590+
public long[] getNamespaceKey(int level) {
591+
if(namespaceKey == null) { // generate the namespace key
592+
long[] buf = new long[level];
593+
INode cur = this;
594+
for(int l = 0; l < level; l++) {
595+
long curId = (cur == null) ? 0L : cur.getId();
596+
buf[level - l - 1] = curId;
597+
cur = (cur == null) ? null : cur.parent;
598+
}
599+
buf[0] = indexOf(buf);
600+
namespaceKey = buf;
601+
}
602+
return namespaceKey;
603+
}
604+
605+
private final static long LARGE_PRIME = 512927357;
606+
public static long indexOf(long[] key) {
607+
if(key[key.length-1] == INodeId.ROOT_INODE_ID) {
608+
return key[0];
609+
}
610+
long idx = LARGE_PRIME * key[0];
611+
idx = (idx ^ (idx >> 32)) & (INodeMap.NUM_RANGES_STATIC -1);
612+
return idx;
613+
}
614+
615+
/**
616+
* Key of a snapshot Diff Element
617+
*/
580618
@Override
581619
public final byte[] getKey() {
582620
return getLocalNameBytes();
@@ -636,7 +674,7 @@ public byte[][] getPathComponents() {
636674

637675
@Override
638676
public String toString() {
639-
return getLocalName();
677+
return getLocalName() + ": " + Arrays.toString(namespaceKey);
640678
}
641679

642680
@VisibleForTesting

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,63 @@
2929
import org.apache.hadoop.util.LatchLock;
3030
import org.apache.hadoop.util.LightWeightGSet;
3131
import org.apache.hadoop.util.PartitionedGSet;
32-
import org.slf4j.Logger;
33-
import org.slf4j.LoggerFactory;
3432

3533
/**
3634
* Storing all the {@link INode}s and maintaining the mapping between INode ID
3735
* and INode.
3836
*/
3937
public class INodeMap {
38+
static final int NAMESPACE_KEY_DEBTH = 2;
39+
static final int NUM_RANGES_STATIC = 256; // power of 2
40+
41+
public static class INodeKeyComparator implements Comparator<INode> {
42+
INodeKeyComparator() {
43+
FSDirectory.LOG.info("Namespace key debth = {}", NAMESPACE_KEY_DEBTH);
44+
}
45+
46+
@Override
47+
public int compare(INode i1, INode i2) {
48+
if (i1 == null || i2 == null) {
49+
throw new NullPointerException("Cannot compare null INodes");
50+
}
51+
long[] key1 = i1.getNamespaceKey(NAMESPACE_KEY_DEBTH);
52+
long[] key2 = i2.getNamespaceKey(NAMESPACE_KEY_DEBTH);
53+
for(int l = 0; l < NAMESPACE_KEY_DEBTH; l++) {
54+
if(key1[l] == key2[l]) continue;
55+
return (key1[l] < key2[l] ? -1 : 1);
56+
}
57+
return 0;
58+
}
59+
}
60+
61+
/**
62+
* INodeKeyComparator with Hashed Parent
63+
*
64+
*/
65+
public static class HPINodeKeyComparator implements Comparator<INode> {
66+
HPINodeKeyComparator() {
67+
FSDirectory.LOG.info("Namespace key debth = {}", NAMESPACE_KEY_DEBTH);
68+
}
69+
70+
@Override
71+
public int compare(INode i1, INode i2) {
72+
if (i1 == null || i2 == null) {
73+
throw new NullPointerException("Cannot compare null INodes");
74+
}
75+
long[] key1 = i1.getNamespaceKey(NAMESPACE_KEY_DEBTH);
76+
long[] key2 = i2.getNamespaceKey(NAMESPACE_KEY_DEBTH);
77+
long key1_0 = INode.indexOf(key1);
78+
long key2_0 = INode.indexOf(key2);
79+
if(key1_0 != key2_0)
80+
return (key1_0 < key2_0 ? -1 : 1);
81+
for(int l = 1; l < NAMESPACE_KEY_DEBTH; l++) {
82+
if(key1[l] == key2[l]) continue;
83+
return (key1[l] < key2[l] ? -1 : 1);
84+
}
85+
return 0;
86+
}
87+
}
88+
4089
public static class INodeIdComparator implements Comparator<INode> {
4190
@Override
4291
public int compare(INode i1, INode i2) {
@@ -50,8 +99,6 @@ public int compare(INode i1, INode i2) {
5099
}
51100

52101
public class INodeMapLock extends LatchLock<ReentrantReadWriteLock> {
53-
Logger LOG = LoggerFactory.getLogger(INodeMapLock.class);
54-
55102
private ReentrantReadWriteLock childLock;
56103

57104
INodeMapLock() {
@@ -146,8 +193,22 @@ private INodeMap(INodeDirectory rootDir, FSNamesystem ns) {
146193
this.namesystem = ns;
147194
// Compute the map capacity by allocating 1% of total memory
148195
int capacity = LightWeightGSet.computeCapacity(1, "INodeMap");
149-
this.map = new PartitionedGSet<>(capacity, new INodeIdComparator(),
196+
this.map = new PartitionedGSet<>(capacity, new INodeKeyComparator(),
150197
new INodeMapLock(), rootDir);
198+
199+
// Pre-populate initial empty partitions
200+
PartitionedGSet<INode, INodeWithAdditionalFields> pgs =
201+
(PartitionedGSet<INode, INodeWithAdditionalFields>) map;
202+
PermissionStatus perm = new PermissionStatus(
203+
"", "", new FsPermission((short) 0));
204+
for(int p = 0; p < NUM_RANGES_STATIC; p++) {
205+
INodeDirectory key = new INodeDirectory(
206+
INodeId.ROOT_INODE_ID, "range key".getBytes(), perm, 0);
207+
key.setParent(new INodeDirectory((long)p, null, perm, 0));
208+
pgs.addNewPartition(key);
209+
}
210+
211+
map.put(rootDir);
151212
}
152213

153214
/**

0 commit comments

Comments
 (0)