private static List<RemainingBytes> SearchLocalFileForSignaturesBasedOnSize(CompleteSignature sig, MemoryMappedViewAccessor accessor, List<RemainingBytes> remainingByteList, int sigSize, long fileSize, List<BlockSignature> signaturesToReuse) { var windowSize = sigSize; var newRemainingBytes = new List<RemainingBytes>(); var sigDict = GenerateBlockDict(sig); var buffer = new byte[sigSize]; var offset = 0L; foreach (var byteRange in remainingByteList) { var byteRangeSize = byteRange.EndOffset - byteRange.BeginOffset + 1; // if byte range is large... and signature size is small (what values???) then dont check. // We could end up with LOADS of tiny sig matching where ideally we'd use a larger new sig block. // The exception is when the sig size exactly matches the byterange size... then we allow it to check if the sig will match // in practice this allows small (1-2 byte sigs) to match the byte ranges. if (byteRangeSize > 1000 && sigSize > 100 || byteRangeSize == sigSize) { // if byteRange is smaller than the key we're using, then there cannot be a match so add // it to the newRemainingBytes list if (byteRange.EndOffset - byteRange.BeginOffset + 1 >= windowSize) { // search this byterange for all possible keys. offset = byteRange.BeginOffset; var generateFreshSig = true; var bytesRead = 0L; RollingSignature? currentSig = null; long oldEndOffset = byteRange.BeginOffset; do { if (generateFreshSig) { bytesRead = accessor.ReadArray(offset, buffer, 0, windowSize); currentSig = CreateRollingSignature(buffer, (int)bytesRead); } else { // roll existing sig. var previousByte = accessor.ReadByte(offset - 1); var nextByte = accessor.ReadByte(offset + windowSize - 1); // Need bounds checking? currentSig = RollSignature(windowSize, previousByte, nextByte, currentSig.Value); } if (sigDict.ContainsKey(currentSig.Value)) { // populate buffer. Potential waste of IO here. bytesRead = accessor.ReadArray(offset, buffer, 0, windowSize); // check md5 sig. var md5Sig = CreateMD5Signature(buffer, (int)bytesRead); var sigsForCurrentRollingSig = sigDict[currentSig.Value]; // have a matching md5? If so, we have a match. var matchingSigs = sigsForCurrentRollingSig.Where(s => s.MD5Signature.SequenceEqual(md5Sig)) .Select(n => n) .ToList(); if (matchingSigs.Any()) { // need to add any byte ranges between oldEndOffset and offset as bytes remaining (ie not part of any sig). // This is for catching any bytes BEFORE the sig match we've just found. if (oldEndOffset != offset) { newRemainingBytes.Add(new RemainingBytes() { BeginOffset = oldEndOffset, EndOffset = offset - 1 }); } var matchingSig = matchingSigs[0]; // when storing which existing sig to use, make sure we know the offset in the NEW file it should appear. matchingSig.Offset = offset; signaturesToReuse.Add(matchingSig); offset += windowSize; generateFreshSig = true; oldEndOffset = offset; } else { offset++; generateFreshSig = false; } } else { // no match. Just increment offset and generate rolling sig. offset++; generateFreshSig = false; } } while (offset + windowSize <= byteRange.EndOffset + 1); // add remaining bytes to newRemainingBytes list // Possible to have single byte at end with offset at very last byte. if (offset <= byteRange.EndOffset) { newRemainingBytes.Add(new RemainingBytes() { BeginOffset = oldEndOffset, EndOffset = byteRange.EndOffset }); } // if last sig } else { newRemainingBytes.Add(byteRange); } } else { newRemainingBytes.Add(byteRange); } } return newRemainingBytes; }
private void LoadHeaderAndStreams(PeHeaderReader pe, MemoryMappedViewAccessor mm) { var clrDataDir = pe.DataDirectories[PeHeaderReader.Image_Directory_Entry_Type.COM_DESCRIPTOR]; if (Marshal.SizeOf(typeof(IMAGE_COR20_HEADER)) != clrDataDir.Size) throw new Exception("Size wrong."); mm.Read<IMAGE_COR20_HEADER>(pe.GetFileOffset(clrDataDir.VirtualAddress), out mHeader); if (mHeader.cb != clrDataDir.Size) throw new Exception("Size wrong."); var metaLoc = pe.GetFileOffset(mHeader.MetaData.VirtualAddress); mm.Read<MetaDataHeaderPart1>(metaLoc, out mMetaHeader); var versionBytes = new byte[mMetaHeader.VersionLength]; metaLoc += Marshal.SizeOf(typeof(MetaDataHeaderPart1)); mm.ReadArray<byte>(metaLoc, versionBytes, 0, versionBytes.Length); int versionSize = 0; while (versionSize < versionBytes.Length && versionBytes[versionSize] != 0) versionSize++; mVersionName = Encoding.ASCII.GetString(versionBytes, 0, versionSize); metaLoc += mMetaHeader.VersionLength; mMetaDataFlags = mm.ReadUInt16(metaLoc); metaLoc += 2; uint numberOfMetaStreams = mm.ReadUInt16(metaLoc); metaLoc += 2; for (int i = 0; i < numberOfMetaStreams; i++) { MetaDataStream mds; mm.Read<MetaDataStream>(metaLoc, out mds); metaLoc += Marshal.SizeOf(typeof(MetaDataStream)); byte b; StringBuilder sb = new StringBuilder(); while ((b = mm.ReadByte(metaLoc++)) != 0) { sb.Append((char)b); } metaLoc += 3; metaLoc &= ~3; mMetaStreams.Add(sb.ToString(), mds); } }