/// <summary> /// Creates a new VCDIFF Encoder. The input streams will not be closed once this object is disposed. /// </summary> /// <param name="source">The dictionary (sourceStream file).</param> /// <param name="target">The target to create the diff from.</param> /// <param name="outputStream">The stream to write the diff into.</param> /// <param name="maxBufferSize">The maximum buffer size for window chunking in megabytes (MiB).</param> /// <param name="blockSize"> /// The block size to use. Must be a power of two. No match smaller than this block size will be identified. /// Increasing blockSize by a factor of two will halve the amount of memory needed for the next block table, and will halve the setup time /// for a new BlockHash. However, it also doubles the minimum match length that is guaranteed to be found. /// /// Blocksizes that are n mod 32 = 0 are AVX2 accelerated. Blocksizes that are n mod 16 = 0 are SSE2 accelerated, if supported. 16 is a good default /// for most scenarios, but you should use a block size of 32 or 64 for very similar data, or to optimize for speed. /// </param> /// <param name="chunkSize"> /// The minimum size of a string match that is worth putting into a COPY. This must be bigger than twice the block size.</param> /// <param name="rollingHash"> /// Manually provide a <see cref="RollingHash"/> instance that can be reused for multiple encoding instances /// of the same block size. /// /// If you provide a <see cref="RollingHash"/> instance, you must dispose of it yourself. /// </param> /// <exception cref="ArgumentException">If an invalid blockSize or chunkSize is used..</exception> public VcEncoder(Stream source, Stream target, Stream outputStream, int maxBufferSize = 1, int blockSize = 16, int chunkSize = 0, RollingHash?rollingHash = null) { if (maxBufferSize <= 0) { maxBufferSize = 1; } this.blockSize = blockSize; this.chunkSize = chunkSize < 2 ? this.blockSize * 2 : chunkSize; this.sourceStream = source; this.targetData = new ByteStreamReader(target); this.outputStream = outputStream; if (rollingHash == null) { this.disposeRollingHash = true; this.hasher = new RollingHash(this.blockSize); } else { this.hasher = rollingHash; } if (this.hasher.WindowSize != this.blockSize) { throw new ArgumentException("Supplied RollingHash instance has a different window size than blocksize!"); } this.bufferSize = maxBufferSize * 1024 * 1024; if (this.blockSize % 2 != 0 || this.chunkSize < 2 || this.chunkSize < 2 * this.blockSize) { throw new ArgumentException($"{this.blockSize} can not be less than 2 or twice the blocksize of the dictionary {this.blockSize}."); } }
/// <summary> /// Performs the actual encoding of a chunk of data into the VCDiff format /// </summary> /// <param name="dictionary">The dictionary hash table</param> /// <param name="oldData">The data for the dictionary hash table</param> /// <param name="hash">The rolling hash object</param> /// <param name="interleaved">Whether to interleave the data or not</param> /// <param name="checksum">Whether to include checksums for each window</param> public ChunkEncoder(BlockHash dictionary, IByteBuffer oldData, RollingHash hash, bool interleaved = false, bool checksum = false) { this.hasChecksum = checksum; this.hasher = hash; this.oldData = oldData; this.dictionary = dictionary; this.interleaved = interleaved; }
/// <summary> /// Performs the actual encoding of a chunk of data into the VCDiff format /// </summary> /// <param name="dictionary">The dictionary hash table</param> /// <param name="oldData">The data for the dictionary hash table</param> /// <param name="hash">The rolling hash object</param> /// <param name="interleaved">Whether to interleave the data or not</param> /// <param name="checksumFormat">The format of the checksums for each window.</param> /// <param name="minBlockSize">The minimum block size to use. Defaults to 32, and must be a power of 2. /// This value must also be smaller than the block size of the dictionary.</param> public ChunkEncoder(BlockHash dictionary, ByteBuffer oldData, RollingHash hash, ChecksumFormat checksumFormat, bool interleaved = false, int minBlockSize = 32) { this.checksumFormat = checksumFormat; this.hasher = hash; this.oldData = oldData; this.dictionary = dictionary; this.minBlockSize = minBlockSize; this.interleaved = interleaved; }
/// <summary> /// The easy public structure for encoding into a vcdiff format /// Simply instantiate it with the proper streams and use the Encode() function. /// Does not check if data is equal already. You will need to do that. /// Returns VCDiffResult: should always return success, unless either the dict or the target streams have 0 bytes /// See the VCDecoder for decoding vcdiff format /// </summary> /// <param name="dict">The dictionary (previous data)</param> /// <param name="target">The new data</param> /// <param name="sout">The output stream</param> /// <param name="maxBufferSize">The maximum buffer size for window chunking. It is in Megabytes. 2 would mean 2 megabytes etc. Default is 1.</param> public VCCoder(Stream dict, Stream target, Stream sout, int maxBufferSize = 1) { if (maxBufferSize <= 0) { maxBufferSize = 1; } this.oldData = new ByteStreamReader(dict); this.newData = new ByteStreamReader(target); this.sout = new ByteStreamWriter(sout); hasher = new RollingHash(BlockHash.BlockSize); this.bufferSize = maxBufferSize * 1024 * 1024; }
/// <summary> /// Create a hash lookup table for the data /// </summary> /// <param name="sin">the data to create the table for</param> /// <param name="offset">the offset usually 0</param> /// <param name="hasher">the hashing method</param> public BlockHash(IByteBuffer sin, int offset, RollingHash hasher) { maxMatchesToCheck = (blockSize >= 32) ? 32 : (32 * (32 / blockSize)); this.hasher = hasher; sourceData = sin; this.offset = offset; tableSize = CalcTableSize(); if (tableSize == 0) { throw new Exception("BlockHash Table Size is Invalid == 0"); } hashTableMask = (ulong)tableSize - 1; hashTable = new long[tableSize]; nextBlockTable = new long[BlocksCount]; lastBlockTable = new long[BlocksCount]; lastBlockAdded = -1; SetTablesToInvalid(); }