/// <summary> /// /// </summary> void ProcessDocsBlock(List<I2> i2s) { try { Verbose("Block processing is in progress..."); System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Start(); //Console.WriteLine("ProcessDocsBlock started with {0} elements", i2s.Count()); System.Diagnostics.Stopwatch swSelect = new System.Diagnostics.Stopwatch(); System.Diagnostics.Stopwatch swInsert = new System.Diagnostics.Stopwatch(); WAH2 wah = null; byte[] val = null; int uniqueWordsFound = 0; WordInDoc wd = null; Dictionary<string, WordInDoc> wds = new Dictionary<string, WordInDoc>(); List<byte[]> PRemoves = new List<byte[]>(); List<byte[]> VTRemoves = new List<byte[]>(); using (var tran = DBreezeEngine.GetTransaction()) { string searchTable = DocumentsStorageTablesPrefix + "s" + i2s.First().doc.DocumentSpaceId.ToString(); tran.SynchronizeTables(searchTable); //Setting WAH index table var tbOneWordWAH = tran.InsertTable<int>(searchTable, 2, 0); tbOneWordWAH.ValuesLazyLoadingIsOn = false; tbOneWordWAH.Technical_SetTable_OverwriteIsNotAllowed(); //Nested table with blocks var tbBlocks = tran.InsertTable<int>(searchTable, 10, 0); //Overwrite is needed tbBlocks.ValuesLazyLoadingIsOn = false; int currentBlock = tran.Select<int, int>(searchTable, 11).Value; int numberInBlock = tran.Select<int, int>(searchTable, 12).Value; if (currentBlock == 0) { numberInBlock = 0; currentBlock = 1; } bool DocumentIsAdded = true; foreach (var i2 in i2s) { //Removing from "p" PRemoves.Add(i2.PReference); //Removing from Version table DocumentIsAdded = true; if (i2.VersionDocumentToRemove != null) { VTRemoves.Add(i2.VersionDocumentToRemove); DocumentIsAdded = false; } //doc can be null, doc can be without searchables if (i2.doc == null || String.IsNullOrEmpty(i2.doc.Searchables)) continue; var wordsCounter = GetWordsDefinitionFromText(i2.doc.Searchables); foreach (var el in wordsCounter.OrderBy(r => r.Key)) { //Trying to get from Dictionary if (!wds.TryGetValue(el.Key, out wd)) { //getting from db swSelect.Start(); var row1 = tbOneWordWAH.Select<string, byte[]>(el.Key, true); swSelect.Stop(); if (row1.Exists) { val = row1.Value; wd = new WordInDoc() { BlockId = val.Substring(0,4).To_Int32_BigEndian(), NumberInBlock = val.Substring(4, 4).To_Int32_BigEndian(), ExistsInDb = true //We don't need to save this word again (only its WAH in block) }; } else { numberInBlock++; if (numberInBlock > QuantityOfWordsInBlock) //Quantity of words (WAHs) in block { currentBlock++; numberInBlock = 1; } wd = new WordInDoc() { BlockId = currentBlock, NumberInBlock = numberInBlock, }; // Console.WriteLine(el.Key + " " + wd.NumberInBlock); uniqueWordsFound++; } } //Adding to wah document id if (DocumentIsAdded) { if (!wd.docsAdded.Contains(i2.doc.DocumentSequentialId)) wd.docsAdded.Add(i2.doc.DocumentSequentialId); } else { if (!wd.docsRemoved.Contains(i2.doc.DocumentSequentialId)) wd.docsRemoved.Add(i2.doc.DocumentSequentialId); } //Applying it to the memory wah storage wds[el.Key] = wd; }//eo foreach words in document }//eo foreach documnent //Inserting new words foreach (var wd1 in wds.OrderBy(r => r.Key)) { if (!wd1.Value.ExistsInDb) { swInsert.Start(); //Console.WriteLine("{0} {1}", wd1.Key, wd1.Value.NumberInBlock); tbOneWordWAH.Insert<string, byte[]>(wd1.Key, wd1.Value.BlockId.To_4_bytes_array_BigEndian().Concat(wd1.Value.NumberInBlock.To_4_bytes_array_BigEndian())); swInsert.Stop(); } } //Inserting WAH blocks //Going through the list of collected words order by blockID, fill blocks and save them int iterBlockId = 0; int iterBlockLen = 0; int blockSize = 0; byte[] btBlock = null; Dictionary<int, byte[]> block = new Dictionary<int, byte[]>(); byte[] btWah = null; byte[] tmp = null; foreach (var wd1 in wds.OrderBy(r => r.Value.BlockId)) { //reading block if it's not loaded if (wd1.Value.BlockId != iterBlockId) { if (iterBlockId > 0) { //We must save current datablock if (block.Count() > 0) { btBlock = block.SerializeProtobuf(); btBlock = btBlock.CompressGZip(); // Console.WriteLine("Block {0} Len {1}",iterBlockId, btBlock.Length); if ((btBlock.Length + 4) < MinimalBlockReservInBytes) //Minimal reserv { tmp = new byte[MinimalBlockReservInBytes]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else if ((btBlock.Length + 4) > iterBlockLen) { //Doubling reserve tmp = new byte[btBlock.Length * 2]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else { //Filling existing space tmp = new byte[btBlock.Length + 4]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } //Saving into DB swInsert.Start(); tbBlocks.Insert<int, byte[]>(iterBlockId, tmp); swInsert.Stop(); } block = null; } val = tbBlocks.Select<int, byte[]>(wd1.Value.BlockId).Value; iterBlockId = wd1.Value.BlockId; iterBlockLen = val == null ? 0 : val.Length; if (val != null) { blockSize = val.Substring(0, 4).To_Int32_BigEndian(); if (blockSize > 0) { btBlock = val.Substring(4, blockSize); btBlock = btBlock.DecompressGZip(); block = btBlock.DeserializeProtobuf<Dictionary<int, byte[]>>(); } else block = new Dictionary<int, byte[]>(); } else block = new Dictionary<int, byte[]>(); } //Getting from Block if (block.TryGetValue(wd1.Value.NumberInBlock, out btWah)) { wah = new WAH2(btWah); } else wah = new WAH2(null); //Adding documents foreach (var d in wd1.Value.docsAdded) wah.Add(d, true); //Removing documents foreach (var d in wd1.Value.docsRemoved) wah.Add(d, false); block[wd1.Value.NumberInBlock] = wah.GetCompressedByteArray(); }//eo foreach //Saving last element if (block != null) { //saving current block if (block.Count() > 0) { //!!!!!!!!!!! Remake it for smoothing storage btBlock = block.SerializeProtobuf(); btBlock = btBlock.CompressGZip(); if ((btBlock.Length + 4) < MinimalBlockReservInBytes) //Minimal reserve { tmp = new byte[MinimalBlockReservInBytes]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else if ((btBlock.Length + 4) > iterBlockLen) { //Doubling reserve tmp = new byte[btBlock.Length * 2]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else { //Filling existing space tmp = new byte[btBlock.Length + 4]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } //Saving into DB swInsert.Start(); tbBlocks.Insert<int, byte[]>(iterBlockId, tmp); swInsert.Stop(); } block = null; } tran.Insert<int, int>(searchTable, 11,currentBlock); tran.Insert<int, int>(searchTable, 12, numberInBlock); tran.Commit(); }//eo tran //Moved away P and VT tables Removes to avoid suspending of AddDocuments thread if(PRemoves.Count()>0 || VTRemoves.Count()>0) { swInsert.Start(); using (var tran = DBreezeEngine.GetTransaction()) { string docTable = DocumentsStorageTablesPrefix + "d" + i2s.First().doc.DocumentSpaceId.ToString(); //Console.WriteLine("{0}> started process sync", DateTime.Now.ToString("mm:ss.ms")); tran.SynchronizeTables(docTable, DocumentsStorageTablesPrefix + "p"); //Console.WriteLine("{0}> ended process sync", DateTime.Now.ToString("mm:ss.ms")); var vt = tran.InsertTable<int>(docTable, 3, 0); foreach (var el in VTRemoves) { //Console.WriteLine("Updating_V_" + el.Substring(0, 4).To_Int32_BigEndian() + "_" + el.Substring(4, 4).To_Int32_BigEndian() + "_" + el.Substring(8, 4).To_Int32_BigEndian()); //Setting version as deleted from keyword blocks vt.Insert<byte[], byte[]>(el, new byte[] { 1 }); } foreach (var el in PRemoves) { //Console.WriteLine("Removing_P_" + el.Substring(8, 4).To_Int32_BigEndian()); tran.RemoveKey<byte[]>(DocumentsStorageTablesPrefix + "p", el); } tran.Commit(); } swInsert.Stop(); } sw.Stop(); Verbose("Processed {0} documents with {3} words in DocuSpace {1}. Took {2} ms; Select {4} ms; Insert {5} ms; UniqueWords: {6}", i2s.Count(), i2s.First().doc.DocumentSpaceId, sw.ElapsedMilliseconds, wds.Count(), swSelect.ElapsedMilliseconds, swInsert.ElapsedMilliseconds, uniqueWordsFound); } catch (Exception ex) { // throw ThrowException("ProcessDocsBlock", ex.ToString()); } finally { } // Console.WriteLine("ProcessDocsBlock finished"); }