public KeyDataHolder() { KeyDataArray = new KeyData[100000]; Sync = new ManualResetEvent(true); }
public static void Write(CacheyBashi cb, ushort keyLength, IEnumerable <KeyValuePair <HashBin, byte[]> > data) { var sw = new Stopwatch(); sw.Start(); List <string> batchFiles = new List <string>(); var batchNameFormat = Path.Combine(cb.Dir, cb.DbName) + ".keybatch_{0}"; var batchIndex = 0; //take batches of 100k? arbitraty or maybe roughly calc mem requirements //use 2 buffers, one for writing, and one for streaming out to file? var keyDataArray1 = new KeyData[100000]; var keyDataArray2 = new KeyData[100000]; var activeKeyDataArray = keyDataArray1; var index = 0; var datFileIndex = 0; var keyCount = (ulong)0; Task batchWriteTask = null; foreach (var kvp in data) { if (kvp.Key.Length != keyLength) { throw new ArgumentException($"All keys must be of the provided keyLength: {keyLength}"); } //need to copy the key array here incase someone is re-using the buffer activeKeyDataArray[index].Key = kvp.Key.Clone(); activeKeyDataArray[index].DataAddr.addr = (ulong)datFileIndex; activeKeyDataArray[index].DataAddr.len = (ulong)kvp.Value.Length; //need to write the dat file here so we can discard data from memory //cleanup tasks cb.CbData.UnsafeWrite(kvp.Value); datFileIndex += kvp.Value.Length; var newBatch = index == activeKeyDataArray.Length - 1; if (newBatch)//time to sort and start a new batch { var batchFile = string.Format(batchNameFormat, batchIndex); batchWriteTask?.Wait(); batchWriteTask?.Dispose(); var array = activeKeyDataArray; batchWriteTask = Task.Run(() => { WriteBatch(array, batchFile, array.Length); }); batchFiles.Add(batchFile); batchIndex++; index = 0; //swap the active buffer if (activeKeyDataArray == keyDataArray1) { activeKeyDataArray = keyDataArray2; } else { activeKeyDataArray = keyDataArray1; } } if (!newBatch) { index++; } keyCount++; } //did we finish processing exactly on a batch boundary? //if so roll back a batch index. if (index == 0 && batchIndex > 0) { batchIndex--; } if (batchWriteTask != null && !batchWriteTask.IsCompleted) { batchWriteTask.Wait(); } if (index > 0) //write the remaining keys to the final batch { var batchFile = string.Format(batchNameFormat, batchIndex); WriteBatch(activeKeyDataArray, batchFile, (int)index); batchFiles.Add(batchFile); } //Console.WriteLine($"writing batches took: {sw.ElapsedMilliseconds}"); //no more sorting required if only 1 batch so just write the file directly if (batchIndex == 1) { var outFile = cb.CbKey.FileStream; var writer = new BinaryWriter(outFile); writer.Write(keyCount); //first the keys foreach (var keyData in activeKeyDataArray) { outFile.Write(keyData.Key.Hash, 0, keyData.Key.Length); } //then the addr infos foreach (var keyData in activeKeyDataArray) { writer.Write(keyData.DataAddr.addr); writer.Write(keyData.DataAddr.len); } return; } sw.Restart(); //now sort the batches into the final file SortAndWrite(batchFiles, keyCount, cb, keyLength, cb.KeyFile); //Console.WriteLine($"sorting batches and writing took: {sw.ElapsedMilliseconds}"); }