// very manual writer... but want to see how small I can get the data. public static CompleteSignature ReadBinaryCompleteSignature(Stream s) { var sig = new CompleteSignature(); var l = new List<BlockSignature>(); var reader = new BinaryReader(s); int numberOfEntries = reader.ReadInt32(); for (var i = 0; i < numberOfEntries; i++) { var entry = new BlockSignature(); // 8 bytes. offset long offset = reader.ReadInt64(); // 4 bytes. size int size = reader.ReadInt32(); // 4 bytes. Block Number; int blockNumber = reader.ReadInt32(); // 4 bytes. Rolling Signature. decimal sig1 = reader.ReadDecimal(); decimal sig2 = reader.ReadDecimal(); RollingSignature rollingSig = new RollingSignature() { Sig1 = sig1, Sig2 = sig2 }; // should be 16 bytes. byte[] md5 = reader.ReadBytes(16); entry.BlockNumber = (UInt32)blockNumber; entry.RollingSig = (RollingSignature)rollingSig; entry.MD5Signature = md5; entry.Offset = offset; entry.Size = (uint)size; l.Add(entry); } sig.SignatureList = l.ToArray<BlockSignature>(); return sig; }
public static SizeBasedCompleteSignature CreateSignatureForLocalFile(string localFilePath) { var sig = new SizeBasedCompleteSignature(); var buffer = new byte[ConfigHelper.SignatureSize]; var sigDict = new Dictionary<int, List<BlockSignature>>(); using (var fs = new FileStream(localFilePath, FileMode.Open)) { long offset = 0; uint idCount = 0; int bytesRead = 0; while ((bytesRead = fs.Read(buffer, 0, ConfigHelper.SignatureSize)) > 0) { var blockSig = GenerateBlockSig(buffer, offset,bytesRead, idCount); List<BlockSignature> sigList; if (!sigDict.TryGetValue(bytesRead, out sigList)) { sigList = new List<BlockSignature>(); sigDict[bytesRead] = sigList; } sigList.Add(blockSig); offset += bytesRead; idCount++; } } var sizedBaseSignature = new SizeBasedCompleteSignature(); sizedBaseSignature.Signatures = new Dictionary<int, CompleteSignature>(); foreach (var key in sigDict.Keys) { var compSig = new CompleteSignature() {SignatureList = sigDict[key].ToArray()}; sizedBaseSignature.Signatures[key] = compSig; } return sizedBaseSignature; }
private static List<RemainingBytes> SearchLocalFileForSignaturesBasedOnSize(CompleteSignature sig, MemoryMappedViewAccessor accessor, List<RemainingBytes> remainingByteList, int sigSize, long fileSize, List<BlockSignature> signaturesToReuse) { var windowSize = sigSize; var newRemainingBytes = new List<RemainingBytes>(); var sigDict = GenerateBlockDict(sig); var buffer = new byte[sigSize]; var offset = 0L; foreach (var byteRange in remainingByteList) { var byteRangeSize = byteRange.EndOffset - byteRange.BeginOffset + 1; // if byte range is large... and signature size is small (what values???) then dont check. // We could end up with LOADS of tiny sig matching where ideally we'd use a larger new sig block. // The exception is when the sig size exactly matches the byterange size... then we allow it to check if the sig will match // in practice this allows small (1-2 byte sigs) to match the byte ranges. if (byteRangeSize > 1000 && sigSize > 100 || byteRangeSize == sigSize) { // if byteRange is smaller than the key we're using, then there cannot be a match so add // it to the newRemainingBytes list if (byteRange.EndOffset - byteRange.BeginOffset + 1 >= windowSize) { // search this byterange for all possible keys. offset = byteRange.BeginOffset; var generateFreshSig = true; var bytesRead = 0L; RollingSignature? currentSig = null; long oldEndOffset = byteRange.BeginOffset; do { if (generateFreshSig) { bytesRead = accessor.ReadArray(offset, buffer, 0, windowSize); currentSig = CreateRollingSignature(buffer, (int)bytesRead); } else { // roll existing sig. var previousByte = accessor.ReadByte(offset - 1); var nextByte = accessor.ReadByte(offset + windowSize - 1); // Need bounds checking? currentSig = RollSignature(windowSize, previousByte, nextByte, currentSig.Value); } if (sigDict.ContainsKey(currentSig.Value)) { // populate buffer. Potential waste of IO here. bytesRead = accessor.ReadArray(offset, buffer, 0, windowSize); // check md5 sig. var md5Sig = CreateMD5Signature(buffer, (int)bytesRead); var sigsForCurrentRollingSig = sigDict[currentSig.Value]; // have a matching md5? If so, we have a match. var matchingSigs = sigsForCurrentRollingSig.Where(s => s.MD5Signature.SequenceEqual(md5Sig)) .Select(n => n) .ToList(); if (matchingSigs.Any()) { // need to add any byte ranges between oldEndOffset and offset as bytes remaining (ie not part of any sig). // This is for catching any bytes BEFORE the sig match we've just found. if (oldEndOffset != offset) { newRemainingBytes.Add(new RemainingBytes() { BeginOffset = oldEndOffset, EndOffset = offset - 1 }); } var matchingSig = matchingSigs[0]; // when storing which existing sig to use, make sure we know the offset in the NEW file it should appear. matchingSig.Offset = offset; signaturesToReuse.Add(matchingSig); offset += windowSize; generateFreshSig = true; oldEndOffset = offset; } else { offset++; generateFreshSig = false; } } else { // no match. Just increment offset and generate rolling sig. offset++; generateFreshSig = false; } } while (offset + windowSize <= byteRange.EndOffset + 1); // add remaining bytes to newRemainingBytes list // Possible to have single byte at end with offset at very last byte. if (offset <= byteRange.EndOffset) { newRemainingBytes.Add(new RemainingBytes() { BeginOffset = oldEndOffset, EndOffset = byteRange.EndOffset }); } // if last sig } else { newRemainingBytes.Add(byteRange); } } else { newRemainingBytes.Add(byteRange); } } return newRemainingBytes; }
internal static Dictionary<RollingSignature, List<BlockSignature>> GenerateBlockDict(CompleteSignature sig) { return GenerateBlockDict(sig.SignatureList); }
/// <summary> /// Existing blocks + sigs are in searchResults /// new /// </summary> /// <param name="bytesUploaded"></param> /// <returns></returns> internal static SizeBasedCompleteSignature CreateSignatureFromNewAndReusedBlocks(List<UploadedBlock> allBlocks) { var sigDict = new Dictionary<int, List<BlockSignature>>(); List<BlockSignature> sigList; // new blocks foreach (var newBlock in allBlocks ) { if (!sigDict.TryGetValue((int)newBlock.Sig.Size, out sigList)) { sigList = new List<BlockSignature>(); sigDict[(int) newBlock.Sig.Size] = sigList; } // add sig to the list. sigList.Add( newBlock.Sig); } var sizedBaseSignature = new SizeBasedCompleteSignature(); sizedBaseSignature.Signatures = new Dictionary<int, CompleteSignature>(); foreach (var key in sigDict.Keys) { var compSig = new CompleteSignature() {SignatureList = sigDict[key].ToArray()}; sizedBaseSignature.Signatures[key] = compSig; } return sizedBaseSignature; }