public List <bool> HashRecLookup(HashRec[] hashArr) { int Count = hashArr.Length; var rv = new List <bool>(Count); ParallelAlgorithms.Sort <HashRec>(hashArr, 0, Count, GetICompareer <HashRec>(SortByDBSizeMask)); using (var fs = new FileStream(DBFile, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, DB_READ_SIZE)) { // we need 2 pages now since were block reading and we might pick a hash that start's scan // at the very end of a page byte[] buff = new byte[DB_READ_SIZE]; byte[] zero = new byte[HASH_REC_BYTES]; int i = 0, firstIndex = 0; do { var Index = hashArr[i].Index; // convert Index to PageIndex var DBPage = (ulong)((Index & SortMask) & ~DB_PAGE_MASK); // find block offset for this hash fs.Seek((long)DBPage, SeekOrigin.Begin); fs.Read(buff, 0, DB_READ_SIZE); do { // re-read Inxex since we could be on the inner loop Index = hashArr[i].Index; // Index inside of a page var PageIndex = Index & DB_PAGE_MASK; // Hash to populate the DB with var toRead = HashRec.ToByteArrNoRID(hashArr[i]); // do we already have this hash from disk? firstIndex = buff.SearchBytes(toRead, (int)PageIndex, HASH_REC_BYTES); if (firstIndex >= 0) { rv.Add(true); } else { rv.Add(false); } i++; // continue to next entry if it's in the same block } while (i < Count && (((hashArr[i].Index & SortMask) & ~DB_PAGE_MASK) == DBPage)); } while (i < Count); } return(rv); }
void DumpBufToDisk(ParallelOptions po) { Stopwatch sw; long TotalDBWrites = 0; long TotalRequested = 0; long DBPage = 0; SortMask = HDB.DBEntriesMask << HASH_SHIFT; do { var hashArrTpl = ReadyQueue.Take(po.CancellationToken); var hashArr = hashArrTpl.Item2; var Count = hashArrTpl.Item1; ParallelAlgorithms.Sort <HashRec>(hashArr, 0, Count, GetICompareer <HashRec>(SortByDBSizeMask)); TotalRequested += Count; if (Vtero.VerboseLevel >= 1) { WriteColor(ConsoleColor.Cyan, $"Hash entries to store: {Count:N0}"); } using (var fs = new FileStream(DBFile, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, DB_READ_SIZE)) { // we need 2 pages now since were block reading and we might pick a hash that start's scan // at the very end of a page byte[] buff = new byte[DB_READ_SIZE]; byte[] zero = new byte[HASH_REC_BYTES]; int i = 0, firstIndex = 0, zeroIndex = 0; bool WriteBack = false; sw = Stopwatch.StartNew(); do { var Index = hashArr[i].Index; // convert Index to PageIndex DBPage = (long)((Index & SortMask) & ~DB_PAGE_MASK); // find block offset for this hash fs.Seek(DBPage, SeekOrigin.Begin); fs.Read(buff, 0, DB_READ_SIZE); WriteBack = false; if (po.CancellationToken.IsCancellationRequested) { return; } po.CancellationToken.ThrowIfCancellationRequested(); do { // skip duplicates if (i + 1 < Count && hashArr[i].Index == hashArr[i + 1].Index) //&& UnsafeHelp.UnsafeCompare(hashArr[i].HashData, hashArr[i + 1].HashData)) { i++; continue; } if (i < Count) { // re-read Inxex since we could be on the inner loop Index = hashArr[i].Index; // Index inside of a page var PageIndex = (int)(Index & DB_PAGE_MASK); // Hash to populate the DB with var toWrite = HashRec.ToByteArrNoRID(hashArr[i]); // do we already have this hash from disk? firstIndex = buff.SearchBytes(toWrite, PageIndex, HASH_REC_BYTES); if (firstIndex < 0) { zeroIndex = buff.SearchBytes(zero, PageIndex, HASH_REC_BYTES); if (zeroIndex >= 0) { // we want the modified buffer to get written back WriteBack = true; toWrite = HashRec.ToByteArr(hashArr[i]); // update buff with new hash entry for write back //Array.Copy(toWrite, 0, buff, zeroIndex, toWrite.Length); for (int j = zeroIndex, k = 0; j < zeroIndex + toWrite.Length; j++, k++) { buff[j] = toWrite[k]; } TotalDBWrites++; // set to the origional index, shift down since were bit aligned HDB.SetIdxBit(Index); } else if (zeroIndex < 0) { var strerr = $"HASH TABLE SATURATED!!! ({DBPage:X}:{PageIndex:X}) YOU NEED TO MAKE THE DB LARGER!!"; WriteColor(ConsoleColor.Red, strerr); source.Cancel(); } } } i++; if (i % 100000 == 0 && sw.Elapsed.TotalSeconds > 0) { WriteColor(ConsoleColor.Cyan, $"DB commit entries: {i:N0} - per second {(i / sw.Elapsed.TotalSeconds):N0}"); } // continue to next entry if it's in the same block } while (i < Count && (((hashArr[i].Index & SortMask) & ~DB_PAGE_MASK) == (ulong)DBPage)); if (WriteBack) { if (po.CancellationToken.IsCancellationRequested) { return; } // reset seek position fs.Seek(DBPage, SeekOrigin.Begin); // only write back 1 page if we can help it fs.Write(buff, 0, DB_READ_SIZE); } } while (i < Count); WriteColor(ConsoleColor.Cyan, $"DB entries: {i:N0} - per second {(i / sw.Elapsed.TotalSeconds):N0}"); //aPool.Return(hashArr); } } while (!DoneHashLoad || ReadyQueue.Count() > 0); WriteColor(ConsoleColor.Cyan, $"Finished DB write {TotalDBWrites:N0} NEW entries. Requsted {TotalRequested:N0} (reduced count reflects de-duplication). Task time: {sw.Elapsed}"); }