Skip to content

Commit aba5b8a

Browse files
authored
Merge pull request #1498 from mpeyrotc/dev/mapeyrot/final_changes
Use new SegmentedList and SegmentedDictionary to process large heap dumps
2 parents 2afa78e + 7e15f43 commit aba5b8a

File tree

7 files changed

+113
-37
lines changed

7 files changed

+113
-37
lines changed

src/FastSerialization/SegmentedDictionary/SegmentedDictionary.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,8 @@ public int EnsureCapacity(int capacity)
781781
throw new ArgumentOutOfRangeException(nameof(capacity));
782782
}
783783

784-
var currentCapacity = _entries.Capacity;
784+
// Normal usage of a dictionary should never ask for a capacity that exceeds int32.MaxValue.
785+
var currentCapacity = (int)_entries.Capacity;
785786
if (currentCapacity >= capacity)
786787
{
787788
return currentCapacity;

src/FastSerialization/SegmentedList.cs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public long Count
100100
}
101101
}
102102

103-
public int Capacity => this.capacity;
103+
internal long Capacity => this.capacity;
104104

105105
/// <summary>
106106
/// Copy to Array
@@ -177,7 +177,7 @@ public T this[long index]
177177
}
178178
}
179179

180-
public ref T GetElementByReference(int index) =>
180+
internal ref T GetElementByReference(int index) =>
181181
ref this.items[index >> this.segmentShift][index & this.offsetMask];
182182

183183
/// <summary>
@@ -190,6 +190,18 @@ public bool IsValidIndex(long index)
190190
return this.items[index >> this.segmentShift] != null;
191191
}
192192

193+
/// <summary>
194+
/// Get slot of an element
195+
/// </summary>
196+
/// <param name="index"></param>
197+
/// <param name="slot"></param>
198+
/// <returns></returns>
199+
public T[] GetSlot(int index, out int slot)
200+
{
201+
slot = index & this.offsetMask;
202+
return this.items[index >> this.segmentShift];
203+
}
204+
193205
/// <summary>
194206
/// Adds new element at the end of the list.
195207
/// </summary>
@@ -443,12 +455,12 @@ public void CopyTo(T[] array, int arrayIndex)
443455
"Destination array is not long enough to copy all the items in the collection. Check array index and length.");
444456
}
445457

446-
int remain = (int)this.count;
458+
long remain = this.count;
447459

448-
for (int i = 0; (remain > 0) && (i < this.items.Length); i++)
460+
for (long i = 0; (remain > 0) && (i < this.items.Length); i++)
449461
{
450462
// We can safely cast to int, since that is the max value that items[i].Length can have.
451-
int len = Math.Min(remain, this.items[i].Length);
463+
int len = (int)Math.Min(remain, this.items[i].Length);
452464

453465
Array.Copy(this.items[i], 0, array, arrayIndex, len);
454466

src/FastSerialization/SegmentedMemoryStreamReader.cs

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Diagnostics;
4-
using System.IO;
54
using System.Text;
65

76
namespace FastSerialization
@@ -13,12 +12,31 @@ public class SegmentedMemoryStreamReader
1312
/// <summary>
1413
/// Create a IStreamReader (reads binary data) from a given byte buffer
1514
/// </summary>
16-
public SegmentedMemoryStreamReader(SegmentedList<byte> data) : this(data, 0, (int)data.Count) { }
15+
public SegmentedMemoryStreamReader(SegmentedList<byte> data, SerializationConfiguration config = null) : this(data, 0, data.Count, config) { }
1716
/// <summary>
1817
/// Create a IStreamReader (reads binary data) from a given subregion of a byte buffer
1918
/// </summary>
20-
public SegmentedMemoryStreamReader(SegmentedList<byte> data, int start, int length)
19+
public SegmentedMemoryStreamReader(SegmentedList<byte> data, long start, long length, SerializationConfiguration config = null)
2120
{
21+
SerializationConfiguration = config ?? new SerializationConfiguration();
22+
23+
if (SerializationConfiguration.StreamLabelWidth == StreamLabelWidth.FourBytes)
24+
{
25+
readLabel = () =>
26+
{
27+
return (StreamLabel)(uint)ReadInt32();
28+
};
29+
sizeOfSerializedStreamLabel = 4;
30+
}
31+
else
32+
{
33+
readLabel = () =>
34+
{
35+
return (StreamLabel)(ulong)ReadInt64();
36+
};
37+
sizeOfSerializedStreamLabel = 8;
38+
}
39+
2240
bytes = new SegmentedList<byte>(65_536, length);
2341
bytes.AppendFrom(data, start, length);
2442
position = start;
@@ -130,18 +148,24 @@ public string ReadString()
130148
/// <summary>
131149
/// Implementation of IStreamReader
132150
/// </summary>
133-
public StreamLabel ReadLabel()
134-
{
135-
return (StreamLabel)(uint)ReadInt32();
136-
}
151+
public StreamLabel ReadLabel() => readLabel();
152+
137153
/// <summary>
138154
/// Implementation of IStreamReader
139155
/// </summary>
140156
public virtual void Goto(StreamLabel label)
141157
{
142158
Debug.Assert(label != StreamLabel.Invalid);
143-
Debug.Assert((long)label <= int.MaxValue);
144-
position = (int)label;
159+
160+
if (SerializationConfiguration.StreamLabelWidth == StreamLabelWidth.FourBytes)
161+
{
162+
Debug.Assert((long)label <= int.MaxValue);
163+
position = (uint)label;
164+
}
165+
else
166+
{
167+
position = (long)label;
168+
}
145169
}
146170
/// <summary>
147171
/// Implementation of IStreamReader
@@ -150,16 +174,22 @@ public virtual StreamLabel Current
150174
{
151175
get
152176
{
153-
return (StreamLabel)(uint)position;
177+
if (SerializationConfiguration.StreamLabelWidth == StreamLabelWidth.FourBytes)
178+
{
179+
return (StreamLabel)(uint)position;
180+
}
181+
else
182+
{
183+
return (StreamLabel)position;
184+
}
154185
}
155186
}
156187
/// <summary>
157188
/// Implementation of IStreamReader
158189
/// </summary>
159190
public virtual void GotoSuffixLabel()
160191
{
161-
const int serializedStreamLabelSize = 4;
162-
Goto((StreamLabel)(Length - serializedStreamLabelSize));
192+
Goto((StreamLabel)(Length - sizeOfSerializedStreamLabel));
163193
Goto(ReadLabel());
164194
}
165195
/// <summary>
@@ -174,6 +204,11 @@ public void Dispose()
174204
/// Dispose pattern
175205
/// </summary>
176206
protected virtual void Dispose(bool disposing) { }
207+
208+
/// <summary>
209+
/// Returns the SerializationConfiguration for this stream reader.
210+
/// </summary>
211+
internal SerializationConfiguration SerializationConfiguration { get; private set; }
177212
#endregion
178213

179214
#region private
@@ -182,9 +217,11 @@ protected virtual void Dispose(bool disposing) { }
182217
throw new Exception("Streamreader read past end of buffer");
183218
}
184219
internal /*protected*/ SegmentedList<byte> bytes;
185-
internal /*protected*/ int position;
186-
internal /*protected*/ int endPosition;
220+
internal /*protected*/ long position;
221+
internal /*protected*/ long endPosition;
187222
private StringBuilder sb;
223+
private Func<StreamLabel> readLabel;
224+
private readonly int sizeOfSerializedStreamLabel;
188225
#endregion
189226
}
190227
}

src/FastSerialization/SegmentedMemoryStreamWriter.cs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,32 @@
22
using System.Collections.Generic;
33
using System.Diagnostics;
44
using System.IO;
5-
using System.Text;
65

76
namespace FastSerialization
87
{
98
public class SegmentedMemoryStreamWriter
109
{
11-
public SegmentedMemoryStreamWriter() : this(64) { }
12-
public SegmentedMemoryStreamWriter(int initialSize)
10+
public SegmentedMemoryStreamWriter(SerializationConfiguration config = null) : this(64, config) { }
11+
public SegmentedMemoryStreamWriter(long initialSize, SerializationConfiguration config = null)
1312
{
13+
SerializationConfiguration = config ?? new SerializationConfiguration();
14+
15+
if (SerializationConfiguration.StreamLabelWidth == StreamLabelWidth.FourBytes)
16+
{
17+
writeLabel = (value) =>
18+
{
19+
Debug.Assert((long)value <= int.MaxValue, "The StreamLabel overflowed, it should not be treated as a 32bit value.");
20+
Write((int)value);
21+
};
22+
}
23+
else
24+
{
25+
writeLabel = (value) =>
26+
{
27+
Write((long)value);
28+
};
29+
}
30+
1431
bytes = new SegmentedList<byte>(65_536, initialSize);
1532
}
1633

@@ -45,10 +62,8 @@ public void Write(long value)
4562
bytes.Add((byte)value); value = value >> 8;
4663
bytes.Add((byte)value); value = value >> 8;
4764
}
48-
public void Write(StreamLabel value)
49-
{
50-
Write((int)value);
51-
}
65+
public void Write(StreamLabel value) => writeLabel(value);
66+
5267
public void Write(string value)
5368
{
5469
if (value == null)
@@ -85,10 +100,15 @@ public void WriteToStream(Stream outputStream)
85100
public SegmentedMemoryStreamReader GetReader()
86101
{
87102
var readerBytes = bytes;
88-
return new SegmentedMemoryStreamReader(readerBytes, 0, (int)readerBytes.Count);
103+
return new SegmentedMemoryStreamReader(readerBytes, 0, readerBytes.Count, SerializationConfiguration);
89104
}
90105
public void Dispose() { }
91106

107+
/// <summary>
108+
/// Returns the SerializationConfiguration for this stream writer.
109+
/// </summary>
110+
internal SerializationConfiguration SerializationConfiguration { get; private set; }
111+
92112
#region private
93113
protected virtual void MakeSpace()
94114
{
@@ -101,6 +121,7 @@ public byte[] GetBytes()
101121
}
102122

103123
protected SegmentedList<byte> bytes;
124+
private Action<StreamLabel> writeLabel;
104125
#endregion
105126
}
106127
}

src/HeapDump/GCHeapDump.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ void IFastSerializable.FromStream(Deserializer deserializer)
709709
/// </graph>
710710
///
711711
/// </summary>
712-
public class XmlGcHeapDump
712+
internal class XmlGcHeapDump
713713
{
714714
public static GCHeapDump ReadGCHeapDumpFromXml(string fileName)
715715
{
@@ -840,7 +840,7 @@ public static MemoryGraph ReadMemoryGraphFromXml(XmlReader reader)
840840
return graph;
841841
}
842842

843-
public static void WriteGCDumpToXml(GCHeapDump gcDump, StreamWriter writer)
843+
internal static void WriteGCDumpToXml(GCHeapDump gcDump, StreamWriter writer)
844844
{
845845
writer.WriteLine("<GCHeapDump>");
846846

src/MemoryGraph/MemoryGraph.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public MemoryGraph(int expectedSize, bool isVeryLargeGraph = false)
1212
{
1313
// If we have too many addresses we will reach the Dictionary's internal array's size limit and throw.
1414
// Therefore use a new implementation of it that is similar in performance but that can handle the extra load.
15-
if (expectedSize > 200_000)
15+
if (isVeryLargeGraph)
1616
{
1717
m_addressToNodeIndex = new SegmentedDictionary<Address, NodeIndex>(expectedSize);
1818
}

src/MemoryGraph/graph.cs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
using System.Collections.Generic;
66
using System.Diagnostics;
77
using System.IO;
8-
using System.Text;
98
using System.Text.RegularExpressions;
109
using Address = System.UInt64;
1110

@@ -120,9 +119,9 @@ public virtual NodeType AllocTypeNodeStorage()
120119
/// </summary>
121120
public NodeIndex NodeIndexLimit { get { return (NodeIndex)m_nodes.Count; } }
122121
/// <summary>
123-
/// Same as NodeIndexLimit, just cast to an integer.
122+
/// Same as NodeIndexLimit.
124123
/// </summary>
125-
public int NodeCount { get { return (int)m_nodes.Count; } }
124+
public long NodeCount { get { return m_nodes.Count; } }
126125
/// <summary>
127126
/// It is expected that users will want additional information associated with TYPES of the nodes of the graph. They can
128127
/// do this by allocating an array of code:NodeTypeIndexLimit and then indexing this by code:NodeTypeIndex
@@ -161,8 +160,11 @@ public virtual NodeType AllocTypeNodeStorage()
161160
///
162161
/// TODO I can eliminate the need for AllowReading.
163162
/// </summary>
163+
/// <remarks>if isVeryLargeGraph argument is true, then StreamLabels will be serialized as longs
164+
/// too acommodate for the extra size of the graph's stream representation.</remarks>
164165
public Graph(int expectedNodeCount, bool isVeryLargeGraph = false)
165166
{
167+
m_isVeryLargeGraph = isVeryLargeGraph;
166168
m_expectedNodeCount = expectedNodeCount;
167169
m_types = new GrowableArray<TypeInfo>(Math.Max(expectedNodeCount / 100, 2000));
168170
m_nodes = new SegmentedList<StreamLabel>(SegmentSize, m_expectedNodeCount);
@@ -462,7 +464,8 @@ private void ClearWorker()
462464
RootIndex = NodeIndex.Invalid;
463465
if (m_writer == null)
464466
{
465-
m_writer = new SegmentedMemoryStreamWriter(m_expectedNodeCount * 8);
467+
m_writer = new SegmentedMemoryStreamWriter(m_expectedNodeCount * 8,
468+
m_isVeryLargeGraph ? new SerializationConfiguration() { StreamLabelWidth = StreamLabelWidth.EightBytes } : null);
466469
}
467470

468471
m_totalSize = 0;
@@ -590,7 +593,9 @@ public void FromStream(Deserializer deserializer)
590593
// Read in the Blob stream.
591594
// TODO be lazy about reading in the blobs.
592595
int blobCount = deserializer.ReadInt();
593-
SegmentedMemoryStreamWriter writer = new SegmentedMemoryStreamWriter(blobCount);
596+
SegmentedMemoryStreamWriter writer = new SegmentedMemoryStreamWriter(blobCount,
597+
m_isVeryLargeGraph ? new SerializationConfiguration() { StreamLabelWidth = StreamLabelWidth.EightBytes } : null);
598+
594599
while (8 <= blobCount)
595600
{
596601
writer.Write(deserializer.ReadInt64());
@@ -649,7 +654,7 @@ public void FromStream(Deserializer deserializer)
649654
}
650655
}
651656

652-
private int m_expectedNodeCount; // Initial guess at graph Size.
657+
private long m_expectedNodeCount; // Initial guess at graph Size.
653658
private long m_totalSize; // Total Size of all the nodes in the graph.
654659
internal int m_totalRefs; // Total Number of references in the graph
655660
internal GrowableArray<TypeInfo> m_types; // We expect only thousands of these

0 commit comments

Comments
 (0)