public void WriteBundle(HgBundle hgBundle, Stream stream, HgBundleFormat format, HgBundleCompression compression) { // // First things first -- we need to write out bundle header var header = format == HgBundleFormat.BundlePre10 ? null : GetCompressionHeader(compression); if(header != null) { var headerBuffer = Encoding.ASCII.GetBytes(header); stream.Write(headerBuffer, 0, headerBuffer.Length); } // if using(var compressedStream = GetCompressedStream(stream, compression)) using(var binaryWriter = new BigEndianBinaryWriter(new BufferedStream(compressedStream, 1024 * 128))) { log.Debug("writing changesets"); WriteBundleGroup(hgBundle.Changelog, binaryWriter); binaryWriter.Flush(); log.Debug("writing manifests"); WriteBundleGroup(hgBundle.Manifest, binaryWriter); binaryWriter.Flush(); // // Sometimes HgBundleFile.File has no chunks and Mercurial chokes on that. log.Debug("writing files"); foreach(var file in hgBundle.Files) { string filePath = null; foreach(var chunk in file.File) { if(filePath == null) { filePath = file.Path.FullPath.TrimStart('/'); binaryWriter.Write((uint)filePath.Length + 4); binaryWriter.Write(hgEncoder.EncodeAsLocal(filePath)); log.Debug("writing file '{0}'", filePath); } // if WriteChunk(binaryWriter, chunk); } // foreach if(filePath != null) WriteZeroChunk(binaryWriter); binaryWriter.Flush(); } // foreach binaryWriter.Write((uint)0); binaryWriter.Flush(); compressedStream.Flush(); stream.Flush(); } // using }
public static byte[] Diff(byte[] source, byte[] destination) { var ms = new MemoryStream(); var bw = new BigEndianBinaryWriter(ms); if(source == null || source.Length == 0) { bw.Write((uint)0); bw.Write((uint)0); bw.Write((uint)destination.Length); bw.Write(destination); bw.Flush(); return ms.ToArray(); } // if var a = source.Split((byte)'\n'); var b = destination.Split((byte)'\n'); var p = new List<int> { 0 }; Array.ForEach(a, s => p.Add(p[p.Count - 1] + s.Length)); var d = new SequenceMatcher<Segment>(a, b, (l, r) => l.Equals(r)).GetMatchingBlocks(); int la = 0, lb = 0; foreach(var x in d) { int am = x.SourceIndex, bm = x.DestinationIndex, size = x.Length; var sz = (lb == bm && lb == 0) ? 0 : Enumerable.Range(lb, bm - lb).Select(i => b[i]).Sum(w => w.Length); if(am > la || sz > 0) { bw.Write((uint)p[la]); bw.Write((uint)p[am]); bw.Write((uint)sz); if(sz > 0) { for(var z = lb; z < bm; ++z) bw.Write(destination, b[z].Offset, b[z].Length); } // if } // if la = am + size; lb = bm + size; } // foreach bw.Flush(); return ms.ToArray(); }
private void WriteRevlog() { var directoryPath = Path.GetDirectoryName(revlog.IndexPath); Debug.Assert(directoryPath != null, "directoryPath != null"); if(File.Exists(revlog.IndexPath)) return; if(!Directory.Exists(directoryPath)) Directory.CreateDirectory(directoryPath); if(!revlog.InlineData) fileSystem.CreateWrite(revlog.DataPath).Close(); using(var stream = fileSystem.CreateWrite(revlog.IndexPath)) { using(var binaryWriter = new BigEndianBinaryWriter(stream)) { var header = NG; if(revlog.InlineData) header |= InlineDataFlag; binaryWriter.Write(header); binaryWriter.Write((Int16)0); } // using } // using }
public int WriteRevlogEntries(IEnumerable<HgChunk> chunks, Func<HgNodeID, uint> linkRevisionProviderCallback, Action<HgRevlogEntry> duplicateEntryHandlerCallback = null) { HgNodeID? prevNodeID = null; ulong offset = 0; long accumulatedCompressedLength = 0; var entries = 0; WriteRevlog(); if(revlog.Entries.Count > 0) { var tip = revlog.Entries[revlog.Entries.Count - 1]; offset = (uint)(tip.Offset + tip.CompressedLength); } // if using(var indexFileStream = fileSystem.OpenWrite(revlog.IndexPath)) using(var dataFileStream = revlog.InlineData ? new NonClosingStreamWrapper(indexFileStream) : (Stream)fileSystem.OpenOrCreateWrite(revlog.DataPath)) { indexFileStream.Seek(indexFileStream.Length, SeekOrigin.Begin); dataFileStream.Seek(dataFileStream.Length, SeekOrigin.Begin); using(var indexBinaryWriter = new BigEndianBinaryWriter(indexFileStream)) using(var dataBinaryWriter = new BigEndianBinaryWriter(dataFileStream)) { var fullSnapshot = new byte[] { }; var requiresRediffing = false; foreach(var chunk in chunks) { byte[] data = null; entries++; if(prevNodeID == null) { if(chunk.FirstParentNodeID == HgNodeID.Null) { fullSnapshot = MPatch.Patch(fullSnapshot, new List<byte[]> { chunk.Data }); } // if else { var revlogEntry = revlogReader.ReadRevlogEntry(chunk.FirstParentNodeID); fullSnapshot = MPatch.Patch(revlogEntry.Data, new List<byte[]> { chunk.Data }); } // else } // if else { fullSnapshot = MPatch.Patch(fullSnapshot, new List<byte[]> { chunk.Data }); } // else var uncompressedLength = (uint)fullSnapshot.Length; uint baseRevision = 0; if(prevNodeID == null || requiresRediffing) { if(revlog.Entries.Count > 1) { var revlogEntry = revlog.Entries[revlog.Entries.Count - 1]; accumulatedCompressedLength = revlog.Entries. Where(e => e.Revision > revlogEntry.BaseRevision). Sum(e => e.CompressedLength); if(accumulatedCompressedLength > fullSnapshot.Length * SnapshottingFactor) { baseRevision = (uint)revlog.Entries.Count; data = fullSnapshot; accumulatedCompressedLength = 0; } // if else { var revlogEntryData = revlogReader.ReadRevlogEntry((uint)revlog.Entries.Count - 1); data = BDiff.Diff(revlogEntryData.Data, fullSnapshot); baseRevision = revlogEntryData.Entry.BaseRevision; } // else } else { baseRevision = (uint)revlog.Entries.Count; data = fullSnapshot; accumulatedCompressedLength = 0; } // else requiresRediffing = false; } // if else { var revlogEntry = revlog.Entries[revlog.Entries.Count - 1]; baseRevision = revlogEntry.BaseRevision; if(accumulatedCompressedLength > fullSnapshot.Length * SnapshottingFactor) { baseRevision = (uint)revlog.Entries.Count; accumulatedCompressedLength = 0; data = fullSnapshot; } // if else { data = chunk.Data; } // else } // else prevNodeID = chunk.NodeID; var hgRevlogEntry = revlog.GetEntry(chunk.NodeID); if(hgRevlogEntry == null) { var linkRevision = linkRevisionProviderCallback(chunk.ChangesetNodeID); uint compressedLength; WriteRevlogEntryDataRaw(chunk.NodeID, linkRevision, chunk.FirstParentNodeID, chunk.SecondParentNodeID, data, ref offset, baseRevision, uncompressedLength, out compressedLength, indexBinaryWriter, dataBinaryWriter); accumulatedCompressedLength += compressedLength; } // if else { if(duplicateEntryHandlerCallback != null) duplicateEntryHandlerCallback(hgRevlogEntry); requiresRediffing = true; } // else } } // using } // using return entries; }
public HgNodeID WriteRevlogEntryData(uint linkRevision, HgNodeID firstParentNodeID, HgNodeID secondParentNodeID, byte[] data) { // // Get previous revision and diff against it byte[] revlogEntryData = null; ulong offset = 0; uint baseRevision = 0; if(revlog.Entries.Count > 0) { var lastRevlogEntry = revlog.Entries[revlog.Entries.Count - 1]; baseRevision = lastRevlogEntry.BaseRevision; offset = lastRevlogEntry.Offset + lastRevlogEntry.CompressedLength; // // If the size of stored entries is comparable to the size of @data, store snapshot as well if(revlog.Entries.Count > 1) { var accumulatedDataLength = revlog.Entries. Where(e => e.Revision > baseRevision && e.Revision <= revlog.Entries.Count). Sum(e => e.CompressedLength); log.Trace("accumulated {0}", accumulatedDataLength); if(accumulatedDataLength > data.Length * SnapshottingFactor) { baseRevision = (uint)revlog.Entries.Count; revlogEntryData = data; log.Trace("storing snapshot at r{0} - accumulated data length exceeds data length with factor of {1}", baseRevision, SnapshottingFactor); } // if } // if if(revlogEntryData == null) { var lastRevlogEntryData = revlogReader.ReadRevlogEntry(lastRevlogEntry.Revision); var diff = BDiff.Diff(lastRevlogEntryData.Data, data); // // If a diff we got is actually bigger than data itself, store snapshot. Do not forget to set @baseRevision appropriately if(diff.Length > data.Length) { baseRevision = (uint)revlog.Entries.Count; revlogEntryData = data; log.Trace("storing snapshot at r{0} - diff length exceeds data length", baseRevision); } // if else { revlogEntryData = diff; } // else } // if } // if else revlogEntryData = data; var revlogEntryNodeID = GetRevlogEntryDataNodeID(firstParentNodeID, secondParentNodeID, data); if(revlog.GetEntry(revlogEntryNodeID) != null) return revlogEntryNodeID; if(!File.Exists(revlog.IndexPath)) { // // We need to decide whether we want inline data initially // No, it's not you who decides. // revlog.InlineData = data.Length < MaxInlineFileSize; WriteRevlog(); } // if using(var indexFileStream = fileSystem.OpenWrite(revlog.IndexPath)) using(var dataFileStream = revlog.InlineData ? new NonClosingStreamWrapper(indexFileStream) : (Stream)fileSystem.OpenOrCreateWrite(revlog.DataPath)) { indexFileStream.Seek(indexFileStream.Length, SeekOrigin.Begin); dataFileStream.Seek(dataFileStream.Length, SeekOrigin.Begin); using(var indexBinaryWriter = new BigEndianBinaryWriter(indexFileStream)) using(var dataBinaryWriter = new BigEndianBinaryWriter(dataFileStream)) { uint compressedLength; WriteRevlogEntryDataRaw(revlogEntryNodeID, linkRevision, firstParentNodeID, secondParentNodeID, revlogEntryData, ref offset, baseRevision, (uint)data.Length, out compressedLength, indexBinaryWriter, dataBinaryWriter); } // using } // using return revlogEntryNodeID; }
private IEnumerable<HgChunk> BuildBundleGroup(HgRepository hgRepository, HgRevlog hgRevlog, HgRevset hgRevset, Action<HgRevlogEntryData> callback = null) { var hgRevlogReader = new HgRevlogReader(hgRevlog, fileSystem); // // See http://stackoverflow.com/a/10359273/60188. Pure magic var revisionChunks = hgRevset. Select(hre => hre.Revision). OrderBy(r => r). Select((r, i) => new { r, i }). GroupBy(x => x.r - x.i). Select(x => x.Select(xx => xx.r)). Select(c => c.ToArray()). ToArray(); if(revisionChunks.Length == 0) yield break; byte[] prev = null; uint prevRev = uint.MaxValue; var prediff = false; var hgRevlogEntry = hgRevlog[revisionChunks[0][0]]; if(hgRevlogEntry.FirstParentRevisionNodeID != HgNodeID.Null) { prev = hgRevlogReader.ReadRevlogEntry(hgRevlogEntry.FirstParentRevision).Data; prediff = true; } foreach(var revisionChunk in revisionChunks) { foreach(var revision in revisionChunk) { hgRevlogEntry = hgRevlog[revision]; var hgChangeset = hgRepository.Changelog.Revlog[hgRevlogEntry.LinkRevision]; byte[] data = null; if(prev == null || hgRevlogEntry.BaseRevision == hgRevlogEntry.Revision || prediff || (prevRev != UInt32.MaxValue && prevRev + 1 != revision)) { var hgRevlogEntryData = hgRevlogReader.ReadRevlogEntry(revision); if(prev == null) { // // Trivial case var buffer = new byte[hgRevlogEntryData.Data.Length + 12]; using(var stream = new MemoryStream(buffer)) using(var binaryWriter = new BigEndianBinaryWriter(stream)) { binaryWriter.Write((uint)0); binaryWriter.Write((uint)0); binaryWriter.Write((uint)hgRevlogEntryData.Data.Length); binaryWriter.Write(hgRevlogEntryData.Data); } // using data = buffer; } // if else { data = BDiff.Diff(prev, hgRevlogEntryData.Data); if(prediff) prediff = false; } // else prev = hgRevlogEntryData.Data; } // if else { data = hgRevlogReader.ReadRevlogEntryDataRaw(revision); prev = MPatch.Patch(prev, new List<byte[]> { data }); } // else if(callback != null) callback(new HgRevlogEntryData(hgRevlogEntry, prev)); if(performIntegrityChecks) { var expectedNodeID = GetRevlogEntryDataNodeID(hgRevlogEntry.FirstParentRevisionNodeID, hgRevlogEntry.SecondParentRevisionNodeID, prev); if(expectedNodeID != hgRevlogEntry.NodeID) { // TODO: Exception class throw new ApplicationException("integrity violation for " + hgRevlogEntry.NodeID.Short); } // if } // if var hgChunk = new HgChunk(hgRevlogEntry.NodeID, hgRevlogEntry.FirstParentRevisionNodeID, hgRevlogEntry.SecondParentRevisionNodeID, hgChangeset.NodeID, data); yield return hgChunk; prevRev = revision; } // foreach } // foreach }