public HeapSnapshot(int index, DateTime when, string filename, string text) { MemoryStatistics memory = new MemoryStatistics(); bool scanningForStart = true, scanningForMemory = false; Heap scanningHeap = null; var regexes = new Regexes(); int groupModule = regexes.SnapshotModule.GroupNumberFromName("module"); int groupModuleOffset = regexes.SnapshotModule.GroupNumberFromName("offset"); int groupModuleSize = regexes.SnapshotModule.GroupNumberFromName("size"); int groupHeaderId = regexes.HeapHeader.GroupNumberFromName("id"); int groupAllocOffset = regexes.Allocation.GroupNumberFromName("offset"); int groupAllocSize = regexes.Allocation.GroupNumberFromName("size"); int groupAllocOverhead = regexes.Allocation.GroupNumberFromName("overhead"); int groupAllocId = regexes.Allocation.GroupNumberFromName("id"); Match m; // Instead of allocating a tiny new UInt32[] for every traceback we read in, // we store groups of tracebacks into fixed-size buffers so that the GC has // less work to do when performing collections. Tracebacks are read-only after // being constructed, and all the tracebacks from a snapshot have the same // lifetime, so this works out well. var frameBuffer = new UInt32[FrameBufferSize]; int frameBufferCount = 0; var lr = new LineReader(text); LineReader.Line line; while (lr.ReadLine(out line)) { if (scanningHeap != null) { if (line.StartsWith("*-") && line.Contains("End of data for heap")) { scanningHeap.Allocations.TrimExcess(); scanningHeap = null; } else if (regexes.Allocation.TryMatch(ref line, out m)) { var tracebackId = UInt32.Parse(m.Groups[groupAllocId].Value, NumberStyles.HexNumber); Traceback traceback; if (!Tracebacks.TryGetValue(tracebackId, out traceback)) { // If the frame buffer could fill up while we're building our traceback, // let's allocate a new one. if (frameBufferCount >= frameBuffer.Length - MaxTracebackLength) { frameBuffer = new UInt32[frameBuffer.Length]; frameBufferCount = 0; } int firstFrame = frameBufferCount; // This is only valid if every allocation is followed by an empty line while (lr.ReadLine(out line)) { if (line.StartsWith("\t")) { frameBuffer[frameBufferCount++] = UInt32.Parse( line.ToString(), NumberStyles.HexNumber | NumberStyles.AllowLeadingWhite ); } else { lr.Rewind(ref line); break; } } Tracebacks.Add(traceback = new Traceback( tracebackId, new ArraySegment <UInt32>(frameBuffer, firstFrame, frameBufferCount - firstFrame) )); } scanningHeap.Allocations.Add(new Allocation( UInt32.Parse(m.Groups[groupAllocOffset].Value, NumberStyles.HexNumber), UInt32.Parse(m.Groups[groupAllocSize].Value, NumberStyles.HexNumber), UInt32.Parse(m.Groups[groupAllocOverhead].Value, NumberStyles.HexNumber), traceback.ID )); } } else if (scanningForMemory) { if (regexes.HeapHeader.TryMatch(ref line, out m)) { scanningHeap = new Heap(index, UInt32.Parse(m.Groups[groupHeaderId].Value, NumberStyles.HexNumber)); Heaps.Add(scanningHeap); } else if (line.StartsWith("// Memory=")) { memory = new MemoryStatistics(line.ToString()); scanningForMemory = false; break; } else { continue; } } else if (scanningForStart) { if (line.Contains("Loaded modules")) { scanningForStart = false; } else if (line.Contains("Start of data for heap")) { break; } else { continue; } } else { if (!regexes.SnapshotModule.TryMatch(ref line, out m)) { if (line.Contains("Process modules enumerated")) { scanningForMemory = true; } else { continue; } } else { var modulePath = Path.GetFullPath(m.Groups[groupModule].Value).ToLowerInvariant(); Modules.Add(new Module( modulePath, UInt32.Parse(m.Groups[groupModuleOffset].Value, System.Globalization.NumberStyles.HexNumber), UInt32.Parse(m.Groups[groupModuleSize].Value, System.Globalization.NumberStyles.HexNumber) )); } } } foreach (var heap in Heaps) { heap.Allocations.Sort( (lhs, rhs) => lhs.Address.CompareTo(rhs.Address) ); heap.ComputeStatistics(); } Info = new HeapSnapshotInfo(index, when, filename, memory, this); }
public static IEnumerator <object> FromFile(string filename, IProgressListener progress) { progress.Status = "Loading diff..."; Future <string> fText; // We could stream the lines in from the IO thread while we parse them, but this // part of the load is usually pretty quick even on a regular hard disk, and // loading the whole diff at once eliminates some context switches using (var fda = new FileDataAdapter( filename, FileMode.Open, FileAccess.Read, FileShare.Read, 1024 * 128 )) { var fBytes = fda.ReadToEnd(); yield return(fBytes); fText = Future.RunInThread( () => Encoding.ASCII.GetString(fBytes.Result) ); yield return(fText); } yield return(fText); var lr = new LineReader(fText.Result); LineReader.Line line; progress.Status = "Parsing diff..."; var frames = new List <TracebackFrame>(); var moduleNames = new NameTable(StringComparer.Ordinal); var symbolTypes = new NameTable(StringComparer.Ordinal); var functionNames = new NameTable(StringComparer.Ordinal); var deltas = new List <DeltaInfo>(); var tracebacks = new Dictionary <UInt32, TracebackInfo>(); var regexes = new Regexes(); // Regex.Groups[string] does an inefficient lookup, so we do that lookup once here int groupModule = regexes.DiffModule.GroupNumberFromName("module"); int groupSymbolType = regexes.DiffModule.GroupNumberFromName("symbol_type"); int groupTraceId = regexes.BytesDelta.GroupNumberFromName("trace_id"); int groupType = regexes.BytesDelta.GroupNumberFromName("type"); int groupDeltaBytes = regexes.BytesDelta.GroupNumberFromName("delta_bytes"); int groupNewBytes = regexes.BytesDelta.GroupNumberFromName("new_bytes"); int groupOldBytes = regexes.BytesDelta.GroupNumberFromName("old_bytes"); int groupNewCount = regexes.BytesDelta.GroupNumberFromName("new_count"); int groupOldCount = regexes.CountDelta.GroupNumberFromName("old_count"); int groupCountDelta = regexes.CountDelta.GroupNumberFromName("delta_count"); int groupTracebackModule = regexes.TracebackFrame.GroupNumberFromName("module"); int groupTracebackFunction = regexes.TracebackFrame.GroupNumberFromName("function"); int groupTracebackOffset = regexes.TracebackFrame.GroupNumberFromName("offset"); int groupTracebackOffset2 = regexes.TracebackFrame.GroupNumberFromName("offset2"); int groupTracebackPath = regexes.TracebackFrame.GroupNumberFromName("path"); int groupTracebackLine = regexes.TracebackFrame.GroupNumberFromName("line"); var delay = new Sleep(0.01); int i = 0; while (lr.ReadLine(out line)) { retryFromHere: i += 1; if (i % ProgressInterval == 0) { progress.Maximum = lr.Length; progress.Progress = lr.Position; // Suspend processing for a bit yield return(delay); } Match m; if (regexes.DiffModule.TryMatch(ref line, out m)) { moduleNames.Add(m.Groups[groupModule].Value); } else if (regexes.BytesDelta.TryMatch(ref line, out m)) { var added = (m.Groups[groupType].Value == "+"); var traceId = UInt32.Parse(m.Groups[groupTraceId].Value, NumberStyles.HexNumber); var info = new DeltaInfo { BytesDelta = int.Parse(m.Groups[groupDeltaBytes].Value, NumberStyles.HexNumber) * (added ? 1 : -1), NewBytes = int.Parse(m.Groups[groupNewBytes].Value, NumberStyles.HexNumber), OldBytes = int.Parse(m.Groups[groupOldBytes].Value, NumberStyles.HexNumber), NewCount = int.Parse(m.Groups[groupNewCount].Value, NumberStyles.HexNumber), }; if (lr.ReadLine(out line)) { if (regexes.CountDelta.TryMatch(ref line, out m)) { info.OldCount = int.Parse(m.Groups[groupOldCount].Value, NumberStyles.HexNumber); info.CountDelta = int.Parse(m.Groups[groupCountDelta].Value, NumberStyles.HexNumber) * (added ? 1 : -1); } } bool readingLeadingWhitespace = true, doRetry = false; frames.Clear(); var itemModules = new NameTable(StringComparer.Ordinal); var itemFunctions = new NameTable(StringComparer.Ordinal); while (lr.ReadLine(out line)) { if (line.ToString().Trim().Length == 0) { if (readingLeadingWhitespace) { continue; } else { break; } } else if (regexes.TracebackFrame.TryMatch(ref line, out m)) { readingLeadingWhitespace = false; var moduleName = moduleNames[m.Groups[groupTracebackModule].Value]; itemModules.Add(moduleName); var functionName = functionNames[m.Groups[groupTracebackFunction].Value]; itemFunctions.Add(functionName); var frame = new TracebackFrame { Module = moduleName, Function = functionName, Offset = UInt32.Parse(m.Groups[groupTracebackOffset].Value, NumberStyles.HexNumber) }; if (m.Groups[groupTracebackOffset2].Success) { frame.Offset2 = UInt32.Parse(m.Groups[groupTracebackOffset2].Value, NumberStyles.HexNumber); } if (m.Groups[groupTracebackPath].Success) { frame.SourceFile = m.Groups[groupTracebackPath].Value; } if (m.Groups[groupTracebackLine].Success) { frame.SourceLine = int.Parse(m.Groups[groupTracebackLine].Value); } frames.Add(frame); } else { // We hit the beginning of a new allocation, so make sure it gets parsed doRetry = true; break; } } if (tracebacks.ContainsKey(traceId)) { info.Traceback = tracebacks[traceId]; Program.ErrorList.ReportError("Duplicate traceback for id {0}!", traceId); } else { var frameArray = ImmutableArrayPool <TracebackFrame> .Allocate(frames.Count); frames.CopyTo(frameArray.Array, frameArray.Offset); info.Traceback = tracebacks[traceId] = new TracebackInfo { TraceId = traceId, Frames = frameArray, Modules = itemModules, Functions = itemFunctions }; } deltas.Add(info); if (doRetry) { goto retryFromHere; } } else if (line.StartsWith("//")) { // Comment, ignore it } else if (line.StartsWith("Total increase") || line.StartsWith("Total decrease")) { // Ignore this too } else if (line.StartsWith(" ") && (line.EndsWith(".pdb"))) { // Symbol path for a module, ignore it } else { Program.ErrorList.ReportError("Unrecognized diff content: {0}", line.ToString()); } } var result = new HeapDiff( filename, moduleNames, functionNames, deltas, tracebacks ); yield return(new Result(result)); }