Esempio n. 1
0
 public KeyDataHolder()
 {
     KeyDataArray = new KeyData[100000];
     Sync         = new ManualResetEvent(true);
 }
Esempio n. 2
0
        public static void Write(CacheyBashi cb, ushort keyLength, IEnumerable <KeyValuePair <HashBin, byte[]> > data)
        {
            var sw = new Stopwatch();

            sw.Start();
            List <string> batchFiles      = new List <string>();
            var           batchNameFormat = Path.Combine(cb.Dir, cb.DbName) + ".keybatch_{0}";
            var           batchIndex      = 0;

            //take batches of 100k? arbitraty or maybe roughly calc mem requirements
            //use 2 buffers, one for writing, and one for streaming out to file?

            var  keyDataArray1      = new KeyData[100000];
            var  keyDataArray2      = new KeyData[100000];
            var  activeKeyDataArray = keyDataArray1;
            var  index          = 0;
            var  datFileIndex   = 0;
            var  keyCount       = (ulong)0;
            Task batchWriteTask = null;

            foreach (var kvp in data)
            {
                if (kvp.Key.Length != keyLength)
                {
                    throw new ArgumentException($"All keys must be of the provided keyLength: {keyLength}");
                }

                //need to copy the key array here incase someone is re-using the buffer
                activeKeyDataArray[index].Key           = kvp.Key.Clone();
                activeKeyDataArray[index].DataAddr.addr = (ulong)datFileIndex;
                activeKeyDataArray[index].DataAddr.len  = (ulong)kvp.Value.Length;
                //need to write the dat file here so we can discard data from memory
                //cleanup tasks
                cb.CbData.UnsafeWrite(kvp.Value);
                datFileIndex += kvp.Value.Length;

                var newBatch = index == activeKeyDataArray.Length - 1;

                if (newBatch)//time to sort and start a new batch
                {
                    var batchFile = string.Format(batchNameFormat, batchIndex);

                    batchWriteTask?.Wait();
                    batchWriteTask?.Dispose();
                    var array = activeKeyDataArray;
                    batchWriteTask = Task.Run(() =>
                    {
                        WriteBatch(array, batchFile, array.Length);
                    });

                    batchFiles.Add(batchFile);
                    batchIndex++;
                    index = 0;
                    //swap the active buffer
                    if (activeKeyDataArray == keyDataArray1)
                    {
                        activeKeyDataArray = keyDataArray2;
                    }
                    else
                    {
                        activeKeyDataArray = keyDataArray1;
                    }
                }

                if (!newBatch)
                {
                    index++;
                }

                keyCount++;
            }

            //did we finish processing exactly on a batch boundary?
            //if so roll back a batch index.
            if (index == 0 && batchIndex > 0)
            {
                batchIndex--;
            }

            if (batchWriteTask != null && !batchWriteTask.IsCompleted)
            {
                batchWriteTask.Wait();
            }

            if (index > 0) //write the remaining keys to the final batch
            {
                var batchFile = string.Format(batchNameFormat, batchIndex);
                WriteBatch(activeKeyDataArray, batchFile, (int)index);
                batchFiles.Add(batchFile);
            }

            //Console.WriteLine($"writing batches took: {sw.ElapsedMilliseconds}");

            //no more sorting required if only 1 batch so just write the file directly
            if (batchIndex == 1)
            {
                var outFile = cb.CbKey.FileStream;
                var writer  = new BinaryWriter(outFile);
                writer.Write(keyCount);
                //first the keys
                foreach (var keyData in activeKeyDataArray)
                {
                    outFile.Write(keyData.Key.Hash, 0, keyData.Key.Length);
                }
                //then the addr infos
                foreach (var keyData in activeKeyDataArray)
                {
                    writer.Write(keyData.DataAddr.addr);
                    writer.Write(keyData.DataAddr.len);
                }
                return;
            }

            sw.Restart();
            //now sort the batches into the final file
            SortAndWrite(batchFiles, keyCount, cb, keyLength, cb.KeyFile);
            //Console.WriteLine($"sorting batches and writing took: {sw.ElapsedMilliseconds}");
        }