Пример #1
0
        /// <summary>
        /// Creates a new VCDIFF Encoder. The input streams will not be closed once this object is disposed.
        /// </summary>
        /// <param name="source">The dictionary (sourceStream file).</param>
        /// <param name="target">The target to create the diff from.</param>
        /// <param name="outputStream">The stream to write the diff into.</param>
        /// <param name="maxBufferSize">The maximum buffer size for window chunking in megabytes (MiB).</param>
        /// <param name="blockSize">
        /// The block size to use. Must be a power of two. No match smaller than this block size will be identified.
        /// Increasing blockSize by a factor of two will halve the amount of memory needed for the next block table, and will halve the setup time
        /// for a new BlockHash.  However, it also doubles the minimum match length that is guaranteed to be found.
        ///
        /// Blocksizes that are n mod 32 = 0 are AVX2 accelerated. Blocksizes that are n mod 16 = 0 are SSE2 accelerated, if supported. 16 is a good default
        /// for most scenarios, but you should use a block size of 32 or 64 for very similar data, or to optimize for speed.
        /// </param>
        /// <param name="chunkSize">
        /// The minimum size of a string match that is worth putting into a COPY. This must be bigger than twice the block size.</param>
        /// <param name="rollingHash">
        /// Manually provide a <see cref="RollingHash"/> instance that can be reused for multiple encoding instances
        /// of the same block size.
        ///
        /// If you provide a <see cref="RollingHash"/> instance, you must dispose of it yourself.
        /// </param>
        /// <exception cref="ArgumentException">If an invalid blockSize or chunkSize is used..</exception>
        public VcEncoder(Stream source, Stream target, Stream outputStream,
                         int maxBufferSize = 1, int blockSize = 16, int chunkSize = 0, RollingHash?rollingHash = null)
        {
            if (maxBufferSize <= 0)
            {
                maxBufferSize = 1;
            }
            this.blockSize    = blockSize;
            this.chunkSize    = chunkSize < 2 ? this.blockSize * 2 : chunkSize;
            this.sourceStream = source;
            this.targetData   = new ByteStreamReader(target);
            this.outputStream = outputStream;
            if (rollingHash == null)
            {
                this.disposeRollingHash = true;
                this.hasher             = new RollingHash(this.blockSize);
            }
            else
            {
                this.hasher = rollingHash;
            }
            if (this.hasher.WindowSize != this.blockSize)
            {
                throw new ArgumentException("Supplied RollingHash instance has a different window size than blocksize!");
            }
            this.bufferSize = maxBufferSize * 1024 * 1024;

            if (this.blockSize % 2 != 0 || this.chunkSize < 2 || this.chunkSize < 2 * this.blockSize)
            {
                throw new ArgumentException($"{this.blockSize} can not be less than 2 or twice the blocksize of the dictionary {this.blockSize}.");
            }
        }
Пример #2
0
 /// <summary>
 /// Performs the actual encoding of a chunk of data into the VCDiff format
 /// </summary>
 /// <param name="dictionary">The dictionary hash table</param>
 /// <param name="oldData">The data for the dictionary hash table</param>
 /// <param name="hash">The rolling hash object</param>
 /// <param name="interleaved">Whether to interleave the data or not</param>
 /// <param name="checksum">Whether to include checksums for each window</param>
 public ChunkEncoder(BlockHash dictionary, IByteBuffer oldData, RollingHash hash, bool interleaved = false, bool checksum = false)
 {
     this.hasChecksum = checksum;
     this.hasher      = hash;
     this.oldData     = oldData;
     this.dictionary  = dictionary;
     this.interleaved = interleaved;
 }
Пример #3
0
 /// <summary>
 /// Performs the actual encoding of a chunk of data into the VCDiff format
 /// </summary>
 /// <param name="dictionary">The dictionary hash table</param>
 /// <param name="oldData">The data for the dictionary hash table</param>
 /// <param name="hash">The rolling hash object</param>
 /// <param name="interleaved">Whether to interleave the data or not</param>
 /// <param name="checksumFormat">The format of the checksums for each window.</param>
 /// <param name="minBlockSize">The minimum block size to use. Defaults to 32, and must be a power of 2.
 ///     This value must also be smaller than the block size of the dictionary.</param>
 public ChunkEncoder(BlockHash dictionary, ByteBuffer oldData,
                     RollingHash hash, ChecksumFormat checksumFormat, bool interleaved = false, int minBlockSize = 32)
 {
     this.checksumFormat = checksumFormat;
     this.hasher         = hash;
     this.oldData        = oldData;
     this.dictionary     = dictionary;
     this.minBlockSize   = minBlockSize;
     this.interleaved    = interleaved;
 }
Пример #4
0
        /// <summary>
        /// The easy public structure for encoding into a vcdiff format
        /// Simply instantiate it with the proper streams and use the Encode() function.
        /// Does not check if data is equal already. You will need to do that.
        /// Returns VCDiffResult: should always return success, unless either the dict or the target streams have 0 bytes
        /// See the VCDecoder for decoding vcdiff format
        /// </summary>
        /// <param name="dict">The dictionary (previous data)</param>
        /// <param name="target">The new data</param>
        /// <param name="sout">The output stream</param>
        /// <param name="maxBufferSize">The maximum buffer size for window chunking. It is in Megabytes. 2 would mean 2 megabytes etc. Default is 1.</param>
        public VCCoder(Stream dict, Stream target, Stream sout, int maxBufferSize = 1)
        {
            if (maxBufferSize <= 0)
            {
                maxBufferSize = 1;
            }

            this.oldData = new ByteStreamReader(dict);
            this.newData = new ByteStreamReader(target);
            this.sout    = new ByteStreamWriter(sout);
            hasher       = new RollingHash(BlockHash.BlockSize);

            this.bufferSize = maxBufferSize * 1024 * 1024;
        }
Пример #5
0
        /// <summary>
        /// Create a hash lookup table for the data
        /// </summary>
        /// <param name="sin">the data to create the table for</param>
        /// <param name="offset">the offset usually 0</param>
        /// <param name="hasher">the hashing method</param>
        public BlockHash(IByteBuffer sin, int offset, RollingHash hasher)
        {
            maxMatchesToCheck = (blockSize >= 32) ? 32 : (32 * (32 / blockSize));
            this.hasher       = hasher;
            sourceData        = sin;
            this.offset       = offset;
            tableSize         = CalcTableSize();

            if (tableSize == 0)
            {
                throw new Exception("BlockHash Table Size is Invalid == 0");
            }

            hashTableMask  = (ulong)tableSize - 1;
            hashTable      = new long[tableSize];
            nextBlockTable = new long[BlocksCount];
            lastBlockTable = new long[BlocksCount];
            lastBlockAdded = -1;
            SetTablesToInvalid();
        }