public int SortByDBSizeMask(HashRec hx, HashRec hy) { ulong xx = hx.Index & SortMask; ulong yy = hy.Index & SortMask; return(xx == yy ? 0 : xx > yy ? 1 : -1); }
// only need this if were grouping across processes //public HashRecord Owner; public static byte[] ToByteArrNoRID(HashRec rec) { byte[] rv = new byte[12]; Array.Copy(BitConverter.GetBytes(rec.CompressedHash), 0, rv, 0, 8); Array.Copy(BitConverter.GetBytes(rec.CompressedShortHash), 0, rv, 8, 4); return(rv); }
public List <bool> FileChecker(string aPath, bool Force = false, int OnlySize = 0) { var rv = new List <bool>(); var inputFile = CheckFile(aPath); if (inputFile != null || Force) { if (Force && inputFile == null) { var toCheck = FractHashTree.CreateRecsFromMemory(File.ReadAllBytes(aPath), MinHashSize, GetHP, 0, 0, OnlySize); rv.AddRange(HashRecLookup(toCheck)); } else { foreach (var ms in inputFile.Sections) { if (!ms.IsCode || !ms.IsExec) { continue; } var totSiz = FractHashTree.TotalHashesForSize(ms.RawFileSize, MinHashSize); var hr = new HashRec[totSiz]; FractHashTree.CreateRecsFromFile(aPath, ms, MinHashSize, (int)totSiz, hr, 0, GetHP); rv.AddRange(HashRecLookup(hr)); } } } return(rv); }
public HashEntity(HashRec rec) { Hash = rec; PartitionKey = $"{rec.FullHash[0].ToString("x")}"; RowKey = BitConverter.ToString(rec.FullHash, 1).Replace("-", "").ToLower(); MetaInfo = rec.RID.ToString(); }
public List <bool> HashRecLookup(HashRec[] hashArr) { int Count = hashArr.Length; var rv = new List <bool>(Count); ParallelAlgorithms.Sort <HashRec>(hashArr, 0, Count, GetICompareer <HashRec>(SortByDBSizeMask)); using (var fs = new FileStream(DBFile, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, DB_READ_SIZE)) { // we need 2 pages now since were block reading and we might pick a hash that start's scan // at the very end of a page byte[] buff = new byte[DB_READ_SIZE]; byte[] zero = new byte[HASH_REC_BYTES]; int i = 0, firstIndex = 0; do { var Index = hashArr[i].Index; // convert Index to PageIndex var DBPage = (ulong)((Index & SortMask) & ~DB_PAGE_MASK); // find block offset for this hash fs.Seek((long)DBPage, SeekOrigin.Begin); fs.Read(buff, 0, DB_READ_SIZE); do { // re-read Inxex since we could be on the inner loop Index = hashArr[i].Index; // Index inside of a page var PageIndex = Index & DB_PAGE_MASK; // Hash to populate the DB with var toRead = HashRec.ToByteArrNoRID(hashArr[i]); // do we already have this hash from disk? firstIndex = buff.SearchBytes(toRead, (int)PageIndex, HASH_REC_BYTES); if (firstIndex >= 0) { rv.Add(true); } else { rv.Add(false); } i++; // continue to next entry if it's in the same block } while (i < Count && (((hashArr[i].Index & SortMask) & ~DB_PAGE_MASK) == DBPage)); } while (i < Count); } return(rv); }
/* * public byte[] HashData; // 16 * public int RID; // 4 remote ID (meta DB) * public byte BlockLen; // 1 * // public int Verified // we steal 1 bit from RID to signafy at run time (since were a value struct) if verify was succsess * // oddly enough a negative RID mean's PASSED verification * public byte[] HD2; // 3 * public ulong Index; // 8 * * public byte[] Serialized; */ public static byte[] ToByteArr(HashRec rec) { byte[] rv = new byte[HASH_REC_BYTES]; Array.Copy(rec.HashData, 0, rv, 0, rec.HashData.Length); Array.Copy(BitConverter.GetBytes(rec.RID), 0, rv, 16, 4); Array.Copy(rec.HD2, 0, rv, 20, 3); rv[24] = rec.BlockLen; // theorietcally we can use more of this for other purposes // since the index is the location in the DB.. // depending on size, usually like 30something+ bits Array.Copy(BitConverter.GetBytes(rec.Index), 0, rv, 24, 8); return(rv); }
public int LoadFromMem(byte[] Input) { int written = 0; var hashArr = FractHashTree.CreateRecsFromMemory(Input, MinHashSize, GetHP); var Count = hashArr.Length; using (var fs = new FileStream(DBFile, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, DB_READ_SIZE)) { // we need 2 pages now since were block reading and we might pick a hash that start's scan // at the very end of a page byte[] buff = new byte[DB_READ_SIZE]; byte[] zero = new byte[HASH_REC_BYTES]; int i = 0, firstIndex = 0, zeroIndex = 0; bool WriteBack = false; do { var Index = hashArr[i].Index; // convert Index to PageIndex var DBPage = (long)((Index & SortMask) & ~DB_PAGE_MASK); // find block offset for this hash fs.Seek(DBPage, SeekOrigin.Begin); fs.Read(buff, 0, DB_READ_SIZE); WriteBack = false; do { // skip duplicates if (i + 1 < Count && hashArr[i].Index == hashArr[i + 1].Index && hashArr[i].CompressedHash == hashArr[i + 1].CompressedHash) { i++; continue; } if (i < Count) { // re-read Inxex since we could be on the inner loop Index = hashArr[i].Index; // Index inside of a page var PageIndex = Index & DB_PAGE_MASK; // Hash to populate the DB with var toWrite = HashRec.ToByteArr(hashArr[i]); // do we already have this hash from disk? firstIndex = buff.SearchBytes(toWrite, (int)PageIndex, toWrite.Length); if (firstIndex < 0) { zeroIndex = buff.SearchBytes(zero, (int)PageIndex, zero.Length); if (zeroIndex >= 0) { // we want the modified buffer to get written back WriteBack = true; int j, k; // update buff with new hash entry for write back //Array.Copy(toWrite, 0, buff, zeroIndex, toWrite.Length); for (j = zeroIndex, k = 0; j < zeroIndex + toWrite.Length; j++, k++) { buff[j] = toWrite[k]; } written++; // set to the origional index, shift down since were bit aligned HDB.SetIdxBit(Index); } else if (zeroIndex < 0) { var strerr = $"HASH TABLE SATURATED! YOU NEED TO MAKE THE DB LARGER!!"; WriteColor(ConsoleColor.Red, strerr); throw new ApplicationException(strerr); } } } i++; // continue to next entry if it's in the same block } while (i < Count && (((hashArr[i].Index & SortMask) & ~DB_PAGE_MASK) == (ulong)DBPage)); if (WriteBack) { // reset seek position fs.Seek(DBPage, SeekOrigin.Begin); // only write back 1 page if we can help it fs.Write(buff, 0, DB_READ_SIZE); } } while (i < Count); } return(written); }
void FillHashBuff(ParallelOptions po) { int TotalHashGenCount = 0; int HashGenCnt = 0; int LoadedCnt = 0; HashRec[] hashX; Stopwatch sw = Stopwatch.StartNew(); do { Extract next = null; #region Partition // prescan enough entries to not overspill the specified hash buffer count long CountForMaxBuff = 0; ConcurrentStack <Extract> ReadyList = new ConcurrentStack <Extract>(); while (!DoneDirScan || !LoadList.IsEmpty) { LoadList.TryPop(out next); if (next == null && !DoneDirScan) { if (po.CancellationToken.IsCancellationRequested) { return; } Thread.Yield(); continue; } foreach (var ms in next.Sections) { if (!ms.IsCode && !ms.IsExec) { continue; } var BufferSize = (uint)((ms.RawFileSize + 0xfff) & ~0xfff); CountForMaxBuff += FractHashTree.TotalHashesForSize(BufferSize, MinHashSize); } if (CountForMaxBuff < BufferCount) { ReadyList.Push(next); } // add it back for reprocessing else { LoadList.Push(next); if (po.CancellationToken.IsCancellationRequested) { return; } po.CancellationToken.ThrowIfCancellationRequested(); break; } } #endregion try { hashX = new HashRec[BufferCount]; } catch (Exception ex) { WriteColor(ConsoleColor.Red, $"BuferCount {BufferCount} too large, try something a bit smaller (however keep it as large as you can :)"); WriteColor(ConsoleColor.Yellow, $"{ex.ToString()}"); source.Cancel(); return; } //WriteColor(ConsoleColor.White, $"Parallel partition from {StartingAvailable} to {CurrAvailableMax} starting."); Parallel.ForEach(ReadyList, (hashFile) => //for (int i = StartingAvailable; i < CurrAvailableMax; i++) { if (po.CancellationToken.IsCancellationRequested) { return; } Interlocked.Increment(ref LoadedCnt); foreach (var ms in hashFile.Sections) { // ONLY hash CODE/EXEC file sections & PEHeader if (!ms.IsCode && !ms.IsExec) { continue; } if (ms.RawFileSize <= 0) { LogEx(0, $"Compressed/malishous PE {hashFile.FileName} is too small. Consider manual review of section [{ms.Name}] (e.g. UPX will overlap sections so we will hash it on next pass, TODO: UPX decoder)."); continue; } //var tot = (int)FractHashTree.TotalHashesForSize(ms.RawFileSize, MinHashSize); //var myCnt = Interlocked.Add(ref HashGenCnt, tot); //var fht = new FractHashTree(hashFile.FileName, ms, MinHashSize, GetHP); //var dht = fht.DumpRecTree(); //var len = dht.Count(); //var myLim = Interlocked.Add(ref HashGenCnt, len); //dht.CopyTo(0, hashX, myLim - len, len); var ReadSize = ms.VirtualSize; var BufferSize = (int)((ReadSize + 0xfff) & ~0xfff); var memBuff = new byte[BufferSize]; using (var fread = new FileStream(hashFile.FileName, FileMode.Open, FileAccess.Read, FileShare.Read, PAGE_SIZE)) { fread.Seek(ms.RawFilePointer, SeekOrigin.Begin); fread.Read(memBuff, 0, (int)ReadSize); } var recs = FractHashTree.CreateRecsFromMemory(memBuff, MinHashSize, GetHP, hashFile.rID, 0, 0, true); if (HashGenCnt + recs.Length > hashX.Length) { LoadList.Push(hashFile); break; } var myLim = Interlocked.Add(ref HashGenCnt, recs.Length); recs.CopyTo(hashX, myLim - recs.Length); //FractHashTree.CreateRecsFromFile(hashFile.FileName, ms, MinHashSize, tot, hashX, myCnt - tot, GetHP); if ((LoadedCnt % 100) == 0 && sw.Elapsed.TotalSeconds > 0) { WriteColor(ConsoleColor.Green, $"HashGen entries: {HashGenCnt:N0} - per second { ((TotalHashGenCount + HashGenCnt) / sw.Elapsed.TotalSeconds):N0}"); } //} } }); if (po.CancellationToken.IsCancellationRequested) { return; } TotalHashGenCount += HashGenCnt; WriteColor(ConsoleColor.Green, $"Filled queue {HashGenCnt:N0}, signaling readyqueue."); WriteColor(ConsoleColor.Green, $"Loaded-Files/Generated-Hash-Values {LoadedCnt:N0}/{TotalHashGenCount:N0}. HashGen: {(TotalHashGenCount / sw.Elapsed.TotalSeconds):N0} per second."); sw.Stop(); ReadyQueue.Add(Tuple.Create <int, HashRec[]>(HashGenCnt, hashX)); HashGenCnt = 0; sw.Start(); } while (!DoneDirScan || !LoadList.IsEmpty); sw.Stop(); WriteColor(ConsoleColor.Green, $"Finished Files/Hashes {LoadedCnt:N0}/{TotalHashGenCount:N0}. HashGen: {(TotalHashGenCount / sw.Elapsed.TotalSeconds):N0} per second."); return; }
public static HashRec[] CreateRecsFromMemory(byte[] MemPage, int minBlockSize, Func <HashLib.IHash> getHP, int rID = 0, long VA = 0, int OnlySize = 0, bool PreSerialize = false, bool FullHashes = false) { if (MemPage == null) { return(null); } //var LevelCount = 1; int RawSize = MemPage.Length; var TotalHashs = BlockCount(RawSize, minBlockSize); var sHash = new HashRec[TotalHashs]; if (OnlySize != 0) { minBlockSize = OnlySize; TotalHashs = BlockCount(RawSize, minBlockSize); sHash = new HashRec[TotalHashs]; } //var topCnt = BlockCount(RawSize, PAGE_SIZE); if (getHP == null) { getHP = new Func <HashLib.IHash>(() => { return(HashLib.HashFactory.Crypto.CreateTiger2()); }); } /* * var levelMap = LevelMaps(RawSize, minBlockSize); * int LevelCount = levelMap.Count(); * long TotalHashs = levelMap[LevelCount - 1].Item1 + levelMap[LevelCount - 1].Item2; */ HashLib.IHash localHashProv; // smallest to largest orginization // for (int i = 0; i < LevelCount; i++) localHashProv = getHP(); //for (byte lvl = 0; lvl < LevelCount; lvl++) //{ //var blockSize = minBlockSize << lvl; //var blockCnt = BlockCount(RawSize, OnlySize); //ar hashLevelIndex = levelMap[lvl].Item1; localHashProv.Initialize(); for (int arri = 0; arri < TotalHashs; arri++) { localHashProv.TransformBytes(MemPage, arri * minBlockSize, minBlockSize); var hashBytes = localHashProv.TransformFinal().GetBytes(); sHash[arri] = new HashRec(hashBytes, 0, rID); if (VA != 0) { sHash[arri].Address = VA + (arri * minBlockSize); } //if (!FullHashes) // sHash[arri].FullHash = null; // trying to reduce some load in the DB commit path if (PreSerialize) { sHash[arri].Serialized = HashRec.ToByteArr(sHash[arri]); } } //} return(sHash); }
public static HashRec[] CreateRecsFromFile(string BackingFile, MiniSection input, int minBlockSize, int Totalhash, HashRec[] DestArr, int DestIdx, Func <HashLib.IHash> getHP) { int RawSize = (int)((input.RawFileSize + 0xfff) & ~0xfff); //int VirtualSize = (int) input.VirtualSize; var topCnt = BlockCount(RawSize, PAGE_SIZE); if (getHP == null) { getHP = new Func <HashLib.IHash>(() => { return(HashLib.HashFactory.Crypto.CreateTiger2()); }); } /* * var levelMap = LevelMaps(RawSize, minBlockSize); * int LevelCount = levelMap.Count(); * long TotalHashs = levelMap[LevelCount - 1].Item1 + levelMap[LevelCount - 1].Item2; */ int LevelCount = 1; var TotalHashs = BlockCount(RawSize, minBlockSize); HashRec[] sHash = null; if (DestArr == null) { sHash = new HashRec[TotalHashs]; } HashLib.IHash[] localHashProv = new HashLib.IHash[LevelCount]; // smallest to largest orginization for (int i = 0; i < LevelCount; i++) { localHashProv[i] = getHP(); } byte[] pageBuf; byte[][] buffers = { new byte[PAGE_SIZE], new byte[PAGE_SIZE] }; int filled = 0; // we do this many 4k Read's "top level" // var TopCnt = levelMap[LevelCount - 1].Item2; var TopCnt = 1; using (var fs = new FileStream(BackingFile, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan)) { fs.Position = input.RawFilePointer; int remaining = (int)input.RawFileSize; int readIn = fs.Read(buffers[filled], 0, PAGE_SIZE); remaining -= readIn; if (remaining < 0) { Array.Clear(buffers[filled], (int)input.RawFileSize, PAGE_SIZE - (int)input.RawFileSize); } for (int i = 0; i < TopCnt; i++) { // setup buffers for parallel load/read pageBuf = buffers[filled]; // swap filled so parallel task can start loading while we compute previous chunk filled ^= 1; Parallel.Invoke(() => { for (int lvl = 0; lvl < LevelCount; lvl++) { //var blockSize = (int)levelMap[lvl].Item3; //var blockCnt = PAGE_SIZE / blockSize; //var hashLevelIndex = levelMap[lvl].Item1; localHashProv[lvl].Initialize(); for (int arri = 0; arri < blockCnt; arri++) { localHashProv[lvl].TransformBytes(pageBuf, arri * blockSize, blockSize); var hashBytes = localHashProv[lvl].TransformFinal().GetBytes(); if (DestArr != null) { DestArr[DestIdx + hashLevelIndex + arri + (i * blockCnt)] = new HashRec(hashBytes, (byte)lvl); } else { sHash[hashLevelIndex + arri + (i * blockCnt)] = new HashRec(hashBytes, (byte)lvl); } } } }, () => { while (remaining > 0) { readIn = fs.Read(buffers[filled], 0, PAGE_SIZE); if (readIn < PAGE_SIZE) { Array.Clear(buffers[filled], readIn, PAGE_SIZE - readIn); readIn = PAGE_SIZE; } if (readIn > remaining) { Array.Clear(buffers[filled], remaining, readIn - remaining); } remaining -= readIn; } }); } } return(sHash); }
public static HashRec[] CreateRecsFromMemory(byte[] MemPage, int minBlockSize, Func <HashLib.IHash> getHP, int rID = 0, long VA = 0, int OnlySize = 0, bool PreSerialize = false) { if (MemPage == null) { return(null); } int RawSize = MemPage.Length; var topCnt = BlockCount(RawSize, PAGE_SIZE); if (getHP == null) { getHP = new Func <HashLib.IHash>(() => { return(HashLib.HashFactory.Crypto.CreateTiger2()); }); } var levelMap = LevelMaps(RawSize, minBlockSize); int LevelCount = levelMap.Count(); long TotalHashs = levelMap[LevelCount - 1].Item1 + levelMap[LevelCount - 1].Item2; HashLib.IHash[] localHashProv = new HashLib.IHash[LevelCount]; var sHash = new HashRec[TotalHashs]; if (OnlySize != 0) { LevelCount = 1; minBlockSize = OnlySize; TotalHashs = BlockCount(RawSize, minBlockSize); sHash = new HashRec[TotalHashs]; } // smallest to largest orginization for (int i = 0; i < LevelCount; i++) { localHashProv[i] = getHP(); } for (byte lvl = 0; lvl < LevelCount; lvl++) { var blockSize = minBlockSize << lvl; var blockCnt = BlockCount(RawSize, blockSize); var hashLevelIndex = levelMap[lvl].Item1; localHashProv[lvl].Initialize(); for (int arri = 0; arri < blockCnt; arri++) { localHashProv[lvl].TransformBytes(MemPage, arri * blockSize, blockSize); var hashBytes = localHashProv[lvl].TransformFinal().GetBytes(); sHash[hashLevelIndex + arri] = new HashRec(hashBytes, lvl, rID); // trying to reduce some load in the DB commit path if (PreSerialize) { sHash[hashLevelIndex + arri].Serialized = HashRec.ToByteArr(sHash[hashLevelIndex + arri]); } } } return(sHash); }
void DumpBufToDisk(ParallelOptions po) { Stopwatch sw; long TotalDBWrites = 0; long TotalRequested = 0; long DBPage = 0; SortMask = HDB.DBEntriesMask << HASH_SHIFT; do { var hashArrTpl = ReadyQueue.Take(po.CancellationToken); var hashArr = hashArrTpl.Item2; var Count = hashArrTpl.Item1; ParallelAlgorithms.Sort <HashRec>(hashArr, 0, Count, GetICompareer <HashRec>(SortByDBSizeMask)); TotalRequested += Count; if (Vtero.VerboseLevel >= 1) { WriteColor(ConsoleColor.Cyan, $"Hash entries to store: {Count:N0}"); } using (var fs = new FileStream(DBFile, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, DB_READ_SIZE)) { // we need 2 pages now since were block reading and we might pick a hash that start's scan // at the very end of a page byte[] buff = new byte[DB_READ_SIZE]; byte[] zero = new byte[HASH_REC_BYTES]; int i = 0, firstIndex = 0, zeroIndex = 0; bool WriteBack = false; sw = Stopwatch.StartNew(); do { var Index = hashArr[i].Index; // convert Index to PageIndex DBPage = (long)((Index & SortMask) & ~DB_PAGE_MASK); // find block offset for this hash fs.Seek(DBPage, SeekOrigin.Begin); fs.Read(buff, 0, DB_READ_SIZE); WriteBack = false; if (po.CancellationToken.IsCancellationRequested) { return; } po.CancellationToken.ThrowIfCancellationRequested(); do { // skip duplicates if (i + 1 < Count && hashArr[i].Index == hashArr[i + 1].Index) //&& UnsafeHelp.UnsafeCompare(hashArr[i].HashData, hashArr[i + 1].HashData)) { i++; continue; } if (i < Count) { // re-read Inxex since we could be on the inner loop Index = hashArr[i].Index; // Index inside of a page var PageIndex = (int)(Index & DB_PAGE_MASK); // Hash to populate the DB with var toWrite = BitConverter.GetBytes(hashArr[i].CompressedHash); // do we already have this hash from disk? firstIndex = buff.SearchBytes(toWrite, PageIndex, HASH_REC_BYTES); if (firstIndex < 0) { zeroIndex = buff.SearchBytes(zero, PageIndex, HASH_REC_BYTES); if (zeroIndex >= 0) { // we want the modified buffer to get written back WriteBack = true; toWrite = HashRec.ToByteArr(hashArr[i]); // update buff with new hash entry for write back //Array.Copy(toWrite, 0, buff, zeroIndex, toWrite.Length); for (int j = zeroIndex, k = 0; j < zeroIndex + toWrite.Length; j++, k++) { buff[j] = toWrite[k]; } TotalDBWrites++; // set to the origional index, shift down since were bit aligned HDB.SetIdxBit(Index); } else if (zeroIndex < 0) { var strerr = $"HASH TABLE SATURATED!!! ({DBPage:X}:{PageIndex:X}) YOU NEED TO MAKE THE DB LARGER!!"; WriteColor(ConsoleColor.Red, strerr); source.Cancel(); } } } i++; if (i % 100000 == 0 && sw.Elapsed.TotalSeconds > 0) { WriteColor(ConsoleColor.Cyan, $"DB commit entries: {i:N0} - per second {(i / sw.Elapsed.TotalSeconds):N0}"); } // continue to next entry if it's in the same block } while (i < Count && (((hashArr[i].Index & SortMask) & ~DB_PAGE_MASK) == (ulong)DBPage)); if (WriteBack) { if (po.CancellationToken.IsCancellationRequested) { return; } // reset seek position fs.Seek(DBPage, SeekOrigin.Begin); // only write back 1 page if we can help it fs.Write(buff, 0, DB_READ_SIZE); } } while (i < Count); WriteColor(ConsoleColor.Cyan, $"DB entries: {i:N0} - per second {(i / sw.Elapsed.TotalSeconds):N0}"); //aPool.Return(hashArr); } } while (!DoneHashLoad || ReadyQueue.Count() > 0); WriteColor(ConsoleColor.Cyan, $"Finished DB write {TotalDBWrites:N0} NEW entries. Requsted {TotalRequested:N0} (reduced count reflects de-duplication). Task time: {sw.Elapsed}"); }