/// <summary> /// Save dataitems to end of storage, or update dataitems by pointer. /// If there are not enough place to update dataitems, it will be saved to end of storage. /// </summary> /// <returns>New pointer to dataitems (it may be equals previous pointer if dataitems were rewrited on the same place)</returns> public IPtr Save(byte[] data, IPtr _idx = null) { //dataitems = Compress.CompressGZip(dataitems); var idx = _idx as Ptr; if (idx != null && data.Length <= idx.Capacity) { return(Update(data, idx)); } return(Store(data)); }
/// <summary> /// Load dataitems by pointer _ptr. /// Return null if dataitems is corrupted and it's checksum is not valid. /// </summary> public byte[] Load(IPtr _ptr) { var ptr = _ptr as Ptr; if (ptr == null) { return(null); } var dbFile = dbFiles[ptr.FileNum]; byte[] data = ReadFromDbFile(dbFile, ptr.Capacity, ptr.Position); #if DEBUG onReading(ptr, data); #endif return(data); }
public void Set(TKey key, IPtr ptr) { if (key == null) { throw new ArgumentNullException("key"); } if (ptr == null) { throw new ArgumentNullException("ptr"); } var keydata = serializeKey(key); var ptrdata = ptr.Serialize(); var lenkey = BitConverter.GetBytes((short)keydata.Length); var lenptr = BitConverter.GetBytes((short)ptrdata.Length); long position; Idx newidx; lock (indexFile) { position = indexFile.Seek(0, SeekOrigin.End); indexFile.WriteByte(1); indexFile.Write(lenkey, 0, 2); indexFile.Write(keydata, 0, keydata.Length); indexFile.Write(lenptr, 0, 2); indexFile.Write(ptrdata, 0, ptrdata.Length); newidx = new Idx() { IndexFilePosition = position, Ptr = ptr }; indexes.AddOrUpdate(key, newidx, (key1, idx) => newidx); } // Нам вообще не важно, что было перед нами. //Все новые записи добавляются в конец файла, т.е. при чтении файла - самая правильная та, что записана последней #if DEBUG onSet(key, newidx.Ptr); #endif }
public void PerformanceBatchTest() { int N, NThreads, NBatch, NKeys; bool SAVETEST = true, READTEST = true; NKeys = 5000; byte[][] data = new byte[NKeys][]; IPtr[] ptrs = new IPtr[NKeys]; N = 100000; NThreads = 10; NBatch = 5000; int NReadThreads = 8, NWriteThreads = 2; using (storage = CreateNewStorage(true)) { for (int i = 0; i < data.Length; ++i) { data[i] = CreateData(i, 100 + rnd.Next(5000)); ptrs[i] = storage.Save(data[i]); } if (SAVETEST) { double[] KbytesSaved = Enumerable.Repeat(0.0, NThreads).ToArray(); Action <int> save = (tid) => { for (int i = 0; i < N; i += NBatch) { Tuple <IPtr, byte[]>[] values = Enumerable.Repeat(0, NBatch).Select(_ => Tuple.Create(ptrs[rnd.Next(NKeys)], data[rnd.Next(NKeys)])).ToArray(); storage.BatchSave(values); KbytesSaved[tid] += values.Sum(v => (long)v.Item2.Length) / 1024.0; } }; var tsave = MeasureTimeOfThredas(NThreads, save); Console.WriteLine("" + (N * NThreads) + " save operations in " + NThreads + " threads elapsed: " + tsave); Console.WriteLine("Save Operations in 1 sec: " + ((double)N * NThreads / tsave.TotalSeconds).ToString("### ###")); Console.WriteLine("KBytes saved in 1 sec: " + (KbytesSaved.Sum() / tsave.TotalSeconds).ToString("### ### ### Kb")); Console.WriteLine("Total KBytes saved: " + KbytesSaved.Sum()); } if (READTEST) { double[] KbytesReaded = Enumerable.Repeat(0.0, NThreads).ToArray(); Action <int> read = (tid) => { for (int i = 0; i < N; i += NBatch) { var keys = rnd.NextN(NKeys, NBatch).Select(key => ptrs[key]).ToArray(); var values = storage.BatchLoad(keys); KbytesReaded[tid] += values.Sum(v => v != null ? (long)v.Item2.Length : (long)0) / 1024.0; } }; var tread = MeasureTimeOfThredas(NThreads, read); Console.WriteLine("" + (N * NThreads) + " read operations in " + NThreads + " threads elapsed: " + tread); Console.WriteLine("Read Operations in 1 sec: " + ((double)N * NThreads / tread.TotalSeconds).ToString("### ###")); Console.WriteLine("KBytes readed in 1 sec: " + (KbytesReaded.Sum() / tread.TotalSeconds).ToString("### ### ##0 Kb")); Console.WriteLine("Total KBytes readed: " + KbytesReaded.Sum()); } if (SAVETEST && READTEST) { NThreads = NReadThreads + NWriteThreads; double[] KbytesProcessed = Enumerable.Repeat(0.0, NThreads).ToArray(); Action <int> action = (tid) => { for (int i = 0; i < N; i += NBatch) { Tuple <IPtr, byte[]>[] values; if (tid < NWriteThreads) { var keys = rnd.NextN(NKeys, NBatch).Select(key => ptrs[key]).ToArray(); values = storage.BatchLoad(keys).ToArray(); } else { values = Enumerable.Repeat(0, NBatch).Select(_ => Tuple.Create(ptrs[rnd.Next(NKeys)], data[rnd.Next(NKeys)])).ToArray(); storage.BatchSave(values); } KbytesProcessed[tid] += values.Sum(v => v != null ? (long)v.Item2.Length : (long)0) / 1024.0; } }; var time = MeasureTimeOfThredas(NThreads, action); Console.WriteLine("" + (N * NReadThreads) + " read operations in " + NReadThreads + " threads and \n" + "" + (N * NWriteThreads) + " write operations in " + NWriteThreads + " threads elapsed: " + time); Console.WriteLine("Read|Write Operations in 1 sec: " + ((double)N * NThreads / time.TotalSeconds).ToString("### ###")); Console.WriteLine("KBytes readed|writed in 1 sec: " + (KbytesProcessed.Sum() / time.TotalSeconds).ToString("### ### ### Kb")); Console.WriteLine("Total KBytes processed: " + KbytesProcessed.Sum()); } } }
/// <summary> /// Save new dataitems or override existing dataitems (if ptr is not null). /// Storage not garantee that items will be saved in the same order as they were passed, because storage determs the most optimal order of saving. /// </summary> /// <returns>New pointers to saved (overwrited) dataitems in the same order as input dataitems items</returns> public IPtr[] BatchSave(Tuple <IPtr, byte[]>[] dataitems) { var md5 = new MD5CryptoServiceProvider(); var hash = dataitems.Select(item => md5.ComputeHash(item.Item2)).ToArray(); var len = dataitems.Select(item => BitConverter.GetBytes(item.Item2.Length)).ToArray(); var ptrs = new IPtr[dataitems.Length]; Func <int, int> getFileNum = i => { var data = dataitems[i].Item2; var ptr = (Ptr)dataitems[i].Item1; return(ptr == null || ptr.Capacity < data.Length ? -1 : ptr.FileNum); }; var groupsByFile = dataitems.Select((item, i) => i).GroupBy(i => getFileNum(i)).ToArray(); foreach (var group in groupsByFile) { if (group.Key != -1) { var items = group.OrderBy(i => ((Ptr)dataitems[i].Item1).Position).ToArray(); var dbFile = dbFiles[group.Key]; lock (dbFile) foreach (var i in items) { var data = dataitems[i].Item2; var ptr = (Ptr)dataitems[i].Item1; var position = WriteToDbFile(dbFile, data, len[i], hash[i], ptr.Capacity - data.Length, ptr.Position); if (ptr.Position != position) { ptr = new Ptr(ptr.Capacity, ptr.FileNum, position); } ptrs[i] = ptr; } } else { var items = group.ToArray(); int processed = 0; while (processed < items.Length) { short filenum; lock (dbFiles) filenum = (short)(dbFiles.Count - 1); var dbFile = dbFiles[filenum]; lock (dbFile) while (processed < items.Length && dbFile.length < maxDbFileLength) { int i = items[processed]; var data = dataitems[i].Item2; int k = (int)Math.Ceiling(Math.Log(data.Length / minRecordLen + 1, 2)); int capacity = minRecordLen * (1 << k); var position = WriteToDbFile(dbFile, data, len[i], hash[i], capacity - data.Length, -1); ptrs[i] = new Ptr(capacity, filenum, position); ++processed; } if (dbFile.length >= maxDbFileLength) { OpenNewDBFileIfOverfull(); } } } } return(ptrs); }
/// <summary> /// Save new dataitems or override existing dataitems (if ptr is not null). /// Storage not garantee that items will be saved in the same order as they were passed, because storage determs the most optimal order of saving. /// </summary> /// <returns>New pointers to saved (overwrited) dataitems in the same order as input dataitems items</returns> public IPtr[] BatchSave(Tuple<IPtr, byte[]>[] dataitems) { var md5 = new MD5CryptoServiceProvider(); var hash = dataitems.Select(item => md5.ComputeHash(item.Item2)).ToArray(); var len = dataitems.Select(item => BitConverter.GetBytes(item.Item2.Length)).ToArray(); var ptrs = new IPtr[dataitems.Length]; Func<int, int> getFileNum = i => { var data = dataitems[i].Item2; var ptr = (Ptr) dataitems[i].Item1; return ptr == null || ptr.Capacity < data.Length ? -1 : ptr.FileNum; }; var groupsByFile = dataitems.Select((item, i) => i).GroupBy(i => getFileNum(i)).ToArray(); foreach (var group in groupsByFile) if (group.Key != -1) { var items = group.OrderBy(i => ((Ptr) dataitems[i].Item1).Position).ToArray(); var dbFile = dbFiles[group.Key]; lock (dbFile) foreach (var i in items) { var data = dataitems[i].Item2; var ptr = (Ptr) dataitems[i].Item1; var position = WriteToDbFile(dbFile, data, len[i], hash[i], ptr.Capacity - data.Length, ptr.Position); if (ptr.Position != position) ptr = new Ptr(ptr.Capacity, ptr.FileNum, position); ptrs[i] = ptr; } } else { var items = group.ToArray(); int processed = 0; while (processed < items.Length) { short filenum; lock (dbFiles) filenum = (short)(dbFiles.Count - 1); var dbFile = dbFiles[filenum]; lock(dbFile) while (processed < items.Length && dbFile.length < maxDbFileLength) { int i = items[processed]; var data = dataitems[i].Item2; int k = (int)Math.Ceiling(Math.Log(data.Length / minRecordLen + 1, 2)); int capacity = minRecordLen * (1 << k); var position = WriteToDbFile(dbFile, data, len[i], hash[i], capacity - data.Length, -1); ptrs[i] = new Ptr(capacity, filenum, position); ++processed; } if (dbFile.length >= maxDbFileLength) OpenNewDBFileIfOverfull(); } } return ptrs; }
/// <summary> /// Save dataitems to end of storage, or update dataitems by pointer. /// If there are not enough place to update dataitems, it will be saved to end of storage. /// </summary> /// <returns>New pointer to dataitems (it may be equals previous pointer if dataitems were rewrited on the same place)</returns> public IPtr Save(byte[] data, IPtr _idx = null) { //dataitems = Compress.CompressGZip(dataitems); var idx = _idx as Ptr; if (idx != null && data.Length <= idx.Capacity) return Update(data, idx); return Store(data); }
/// <summary> /// Batch-loading dataitems from storage. Storage determs the most optimal order for loading pointers, so items will be returned in other order than pointers. /// </summary> public IEnumerable<Tuple<IPtr, byte[]>> BatchLoad(IPtr[] ptrs, int sizeOfbatchInMb = 1024) { var md5 = new MD5CryptoServiceProvider(); long sizeOfbatch = ((long) sizeOfbatchInMb)*1024*1024; var groupsByFile = ptrs.GroupBy(p => ((Ptr) p).FileNum); foreach (var group in groupsByFile) { var ordered = group.OrderBy(p => ((Ptr) p).Position).ToArray(); int processed = 0; while (processed < ordered.Length) { var dbFile = dbFiles[group.Key]; List<Tuple<Ptr, byte[], byte[]>> loaded = new List<Tuple<Ptr, byte[], byte[]>>(); byte[] data, hash; long readedBytes = 0; lock (dbFile) { while (readedBytes < sizeOfbatch && processed < ordered.Length) { var ptr = (Ptr) ordered[processed]; ReadFromDbFile(dbFile, ptr.Capacity, ptr.Position, out data, out hash); loaded.Add(Tuple.Create(ptr, data, hash)); if (data != null) readedBytes += data.Length + hash.Length; ++processed; } } foreach (var item in loaded) { var ptr = item.Item1; data = item.Item2; hash = item.Item3; var control = md5.ComputeHash(data); if (!control.SequenceEqual(hash)) yield return new Tuple<IPtr, byte[]>(ptr, null); else yield return new Tuple<IPtr, byte[]>(ptr, data); } } } }
/// <summary> /// Load dataitems by pointer _ptr. /// Return null if dataitems is corrupted and it's checksum is not valid. /// </summary> public byte[] Load(IPtr _ptr) { var ptr = _ptr as Ptr; if (ptr == null) return null; var dbFile = dbFiles[ptr.FileNum]; byte[] data = ReadFromDbFile(dbFile, ptr.Capacity, ptr.Position); #if DEBUG onReading(ptr, data); #endif return data; }