/// <summary> /// itbls and transaction must be supplied, to make it working from outside /// </summary> internal void DoIndexing(Transaction itran, Dictionary <string, ITS> xitbls) { byte[] btUdtStart = DateTime.UtcNow.Ticks.To_8_bytes_array_BigEndian(); ITS its = null; byte[] kA = null; byte[] kZ = null; byte[] newSrch = null; byte[] oldSrch = null; Row <string, byte[]> rWord = null; //Dictionary<string, WordInDocs> wds = new Dictionary<string, WordInDocs>(); WordInDocs wd = null; uint iterBlockId = 0; int iterBlockLen = 0; int blockSize = 0; byte[] btBlock = null; Dictionary <uint, byte[]> block = new Dictionary <uint, byte[]>(); byte[] btWah = null; byte[] tmp = null; byte[] val = null; WABI wah = null; foreach (var tbl in xitbls) { its = tbl.Value; if (its.srch == null) //Can be instantiated in insert procedure, depending how we use indexer { its.srch = itran.InsertTable <byte>(tbl.Key, 3, 0); its.srch.ValuesLazyLoadingIsOn = false; } //Are instantiated only hear its.blocks = itran.InsertTable <byte>(tbl.Key, 10, 0); its.words = itran.InsertTable <byte>(tbl.Key, 20, 0); its.currentBlock = itran.Select <int, uint>(tbl.Key, 11).Value; its.numberInBlock = itran.Select <int, uint>(tbl.Key, 12).Value; its.blocks.ValuesLazyLoadingIsOn = false; its.words.ValuesLazyLoadingIsOn = false; if (its.currentBlock == 0) { its.numberInBlock = 0; its.currentBlock = 1; } //Getting latest indexing time for that table var litRow = itran.Select <byte, byte[]>(tbl.Key, 4); byte[] lastIndexed = DateTime.MinValue.Ticks.To_8_bytes_array_BigEndian(); if (litRow.Exists) { lastIndexed = litRow.Value; } kA = lastIndexed.Concat(int.MinValue.To_4_bytes_array_BigEndian()); kZ = DateTime.MaxValue.Ticks.To_8_bytes_array_BigEndian().Concat(int.MaxValue.To_4_bytes_array_BigEndian()); //Key is word, Value.Item1 is documents list from which this word must be removed, Value.Item2 is documents List where word must be added Dictionary <string, Tuple <HashSet <int>, HashSet <int>, WordInDocs> > ds = new Dictionary <string, Tuple <HashSet <int>, HashSet <int>, WordInDocs> >(); Tuple <HashSet <int>, HashSet <int>, WordInDocs> tpl = null; //Dictionary<string, byte[]> tmpWrds = new Dictionary<string, byte[]>(StringComparison.Ordinal); var tmpWrds = new SortedDictionary <string, byte[]>(StringComparer.Ordinal); Action <string> createNew = (word) => { if (!tmpWrds.ContainsKey(word)) { rWord = its.words.Select <string, byte[]>(word, true); wd = new WordInDocs(); if (rWord.Exists) { wd.BlockId = rWord.Value.Substring(0, 4).To_UInt32_BigEndian(); wd.NumberInBlock = rWord.Value.Substring(4, 4).To_UInt32_BigEndian(); } else { its.numberInBlock++; if (its.numberInBlock > itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.QuantityOfWordsInBlock) //Quantity of words (WAHs) in block { its.currentBlock++; its.numberInBlock = 1; } wd.BlockId = its.currentBlock; wd.NumberInBlock = its.numberInBlock; //Inserting new definition // its.words.Insert<string, byte[]>(word, wd.BlockId.To_4_bytes_array_BigEndian().Concat(wd.NumberInBlock.To_4_bytes_array_BigEndian())); if (tmpWrds.Count < 100000) { tmpWrds[word] = wd.BlockId.To_4_bytes_array_BigEndian().Concat(wd.NumberInBlock.To_4_bytes_array_BigEndian()); } else { // its.words.Insert<string, byte[]>(word, wd.BlockId.To_4_bytes_array_BigEndian().Concat(wd.NumberInBlock.To_4_bytes_array_BigEndian())); foreach (var tmpwrd in tmpWrds) { its.words.Insert <string, byte[]>(tmpwrd.Key, tmpwrd.Value); } tmpWrds.Clear(); } } tpl = new Tuple <HashSet <int>, HashSet <int>, WordInDocs>(new HashSet <int>(), new HashSet <int>(), wd); ds[word] = tpl; } }; //List<byte[]> docs2Change = new List<byte[]>(); Dictionary <byte[], byte[]> docs2Change = new Dictionary <byte[], byte[]>(); Tuple <HashSet <string>, HashSet <string> > diff; //foreach (var docId in its.ChangedDocIds) foreach (var docId in its.ChangedDocIds.OrderBy(r => r)) { //diff will return list of words to be removed and list of words to be added oldSrch = its.srch.Select <byte[], byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 })).Value; newSrch = its.srch.Select <byte[], byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 1 })).Value; diff = WordsDiff( oldSrch, //Current searchables newSrch //new ); //diff = WordsDiff( // its.srch.Select<byte[], byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 }), true).Value, //Current searchables // newSrch //new // ); //Copying new searchables to current searchables docs2Change.Add(docId.To_4_bytes_array_BigEndian(), newSrch); //its.srch.ChangeKey<byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 1 }), docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 })); //To be removed foreach (var word in diff.Item1) { if (!ds.TryGetValue(word, out tpl)) { createNew(word); } tpl.Item1.Add(docId); } //To be added foreach (var word in diff.Item2) { if (!ds.TryGetValue(word, out tpl)) { createNew(word); } tpl.Item2.Add(docId); } }//eo foreach new searchables, end of document itteration foreach (var d2c in docs2Change.OrderBy(r => r.Key.ToBytesString())) { its.srch.RemoveKey <byte[]>(d2c.Key.Concat(new byte[] { 1 })); its.srch.Insert <byte[], byte[]>(d2c.Key.Concat(new byte[] { 0 }), d2c.Value); // its.srch.ChangeKey<byte[]>(d2c.Concat(new byte[] { 1 }), d2c.Concat(new byte[] { 0 })); } //foreach (var eeel in its.srch.SelectForward<byte[], byte[]>(false).Take(50)) // Console.WriteLine(eeel.Key.ToBytesString()); foreach (var tmpwrd in tmpWrds) { its.words.Insert <string, byte[]>(tmpwrd.Key, tmpwrd.Value); } tmpWrds.Clear(); #region "S1" //Inserting WAH blocks //Going through the list of collected words order by blockID, fill blocks and save them block.Clear(); iterBlockId = 0; foreach (var wd1 in ds.OrderBy(r => r.Value.Item3.BlockId)) { //reading block if it's not loaded if (wd1.Value.Item3.BlockId != iterBlockId) { if (iterBlockId > 0) { //We must save current datablock if (block.Count() > 0) { btBlock = block.Encode_DICT_PROTO_UINT_BYTEARRAY(Compression.eCompressionMethod.Gzip); if ((btBlock.Length + 4) < itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes) //Minimal reserv { tmp = new byte[itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else if ((btBlock.Length + 4) > iterBlockLen) { //Doubling reserve tmp = new byte[btBlock.Length * 2]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else { //Filling existing space tmp = new byte[btBlock.Length + 4]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } //Saving into DB its.blocks.Insert <uint, byte[]>(iterBlockId, tmp); } block.Clear(); } val = its.blocks.Select <uint, byte[]>(wd1.Value.Item3.BlockId).Value; iterBlockId = wd1.Value.Item3.BlockId; iterBlockLen = val == null ? 0 : val.Length; if (val != null) { blockSize = val.Substring(0, 4).To_Int32_BigEndian(); if (blockSize > 0) { btBlock = val.Substring(4, blockSize); block.Clear(); btBlock.Decode_DICT_PROTO_UINT_BYTEARRAY(block, Compression.eCompressionMethod.Gzip); } else { block.Clear(); } } else { block.Clear(); } } //Getting from Block if (block.TryGetValue((uint)wd1.Value.Item3.NumberInBlock, out btWah)) { wah = new WABI(btWah); } else { wah = new WABI(null); } //Adding documents foreach (var dId in wd1.Value.Item2) { wah.Add(dId, true); } //Removing documents foreach (var dId in wd1.Value.Item1) { wah.Add(dId, false); } block[wd1.Value.Item3.NumberInBlock] = wah.GetCompressedByteArray(); }//eo foreach wds //Saving last element //saving current block if (block.Count() > 0) { //!!!!!!!!!!! Remake it for smoothing storage btBlock = block.Encode_DICT_PROTO_UINT_BYTEARRAY(Compression.eCompressionMethod.Gzip); if ((btBlock.Length + 4) < itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes) //Minimal reserve { tmp = new byte[itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else if ((btBlock.Length + 4) > iterBlockLen) { //Doubling reserve tmp = new byte[btBlock.Length * 2]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else { //Filling existing space tmp = new byte[btBlock.Length + 4]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } //Saving into DB its.blocks.Insert <uint, byte[]>(iterBlockId, tmp); } block.Clear(); #endregion itran.Insert <int, uint>(tbl.Key, 11, its.currentBlock); itran.Insert <int, uint>(tbl.Key, 12, its.numberInBlock); //Setting last indexing time itran.Insert <byte, byte[]>(tbl.Key, 4, btUdtStart); }//eo foreach tablesToIndex }
/// <summary> /// itbls and transaction must be supplied, to make it working from outside /// </summary> internal void DoIndexing(Transaction itran, Dictionary<string, ITS> xitbls) { byte[] btUdtStart = DateTime.UtcNow.Ticks.To_8_bytes_array_BigEndian(); ITS its = null; byte[] kA = null; byte[] kZ = null; byte[] newSrch = null; Row<string, byte[]> rWord = null; //Dictionary<string, WordInDocs> wds = new Dictionary<string, WordInDocs>(); WordInDocs wd = null; uint iterBlockId = 0; int iterBlockLen = 0; int blockSize = 0; byte[] btBlock = null; Dictionary<uint, byte[]> block = new Dictionary<uint, byte[]>(); byte[] btWah = null; byte[] tmp = null; byte[] val = null; WABI wah = null; foreach (var tbl in xitbls) { its = tbl.Value; if (its.srch == null) //Can be instantiated in insert procedure, depending how we use indexer { its.srch = itran.InsertTable<byte>(tbl.Key, 3, 0); its.srch.ValuesLazyLoadingIsOn = false; } //Are instantiated only hear its.blocks = itran.InsertTable<byte>(tbl.Key, 10, 0); its.words = itran.InsertTable<byte>(tbl.Key, 20, 0); its.currentBlock = itran.Select<int, uint>(tbl.Key, 11).Value; its.numberInBlock = itran.Select<int, uint>(tbl.Key, 12).Value; its.blocks.ValuesLazyLoadingIsOn = false; its.words.ValuesLazyLoadingIsOn = false; if (its.currentBlock == 0) { its.numberInBlock = 0; its.currentBlock = 1; } //Getting latest indexing time for that table var litRow = itran.Select<byte, byte[]>(tbl.Key, 4); byte[] lastIndexed = DateTime.MinValue.Ticks.To_8_bytes_array_BigEndian(); if (litRow.Exists) lastIndexed = litRow.Value; kA = lastIndexed.Concat(int.MinValue.To_4_bytes_array_BigEndian()); kZ = DateTime.MaxValue.Ticks.To_8_bytes_array_BigEndian().Concat(int.MaxValue.To_4_bytes_array_BigEndian()); //Key is word, Value.Item1 is documents list from which this word must be removed, Value.Item2 is documents List where word must be added Dictionary<string, Tuple<HashSet<int>, HashSet<int>, WordInDocs>> ds = new Dictionary<string, Tuple<HashSet<int>, HashSet<int>, WordInDocs>>(); Tuple<HashSet<int>, HashSet<int>, WordInDocs> tpl = null; //Dictionary<string, byte[]> tmpWrds = new Dictionary<string, byte[]>(StringComparison.Ordinal); var tmpWrds = new SortedDictionary<string, byte[]>(StringComparer.Ordinal); foreach (var docId in its.ChangedDocIds) { //diff will return list of words to be removed and list of words to be added newSrch = its.srch.Select<byte[], byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 1 })).Value; var diff = WordsDiff( its.srch.Select<byte[], byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 }), true).Value, //Current searchables newSrch //new ); //Copying new searchables to current searchables its.srch.ChangeKey<byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 1 }), docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 })); //its.srch.Insert<byte[], byte[]>(docId.To_4_bytes_array_BigEndian().Concat(new byte[] { 0 }), newSrch); Action <string> createNew = (word) => { if (!tmpWrds.ContainsKey(word)) { rWord = its.words.Select<string, byte[]>(word, true); wd = new WordInDocs(); if (rWord.Exists) { wd.BlockId = rWord.Value.Substring(0, 4).To_UInt32_BigEndian(); wd.NumberInBlock = rWord.Value.Substring(4, 4).To_UInt32_BigEndian(); } else { its.numberInBlock++; if (its.numberInBlock > itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.QuantityOfWordsInBlock) //Quantity of words (WAHs) in block { its.currentBlock++; its.numberInBlock = 1; } wd.BlockId = its.currentBlock; wd.NumberInBlock = its.numberInBlock; //Inserting new definition // its.words.Insert<string, byte[]>(word, wd.BlockId.To_4_bytes_array_BigEndian().Concat(wd.NumberInBlock.To_4_bytes_array_BigEndian())); if (tmpWrds.Count < 100000) tmpWrds[word] = wd.BlockId.To_4_bytes_array_BigEndian().Concat(wd.NumberInBlock.To_4_bytes_array_BigEndian()); else { // its.words.Insert<string, byte[]>(word, wd.BlockId.To_4_bytes_array_BigEndian().Concat(wd.NumberInBlock.To_4_bytes_array_BigEndian())); foreach (var tmpwrd in tmpWrds) { its.words.Insert<string, byte[]>(tmpwrd.Key, tmpwrd.Value); } tmpWrds.Clear(); } } tpl = new Tuple<HashSet<int>, HashSet<int>, WordInDocs>(new HashSet<int>(), new HashSet<int>(), wd); ds[word] = tpl; } }; //To be removed foreach (var word in diff.Item1) { if (!ds.TryGetValue(word, out tpl)) createNew(word); tpl.Item1.Add(docId); } //To be added foreach (var word in diff.Item2) { if (!ds.TryGetValue(word, out tpl)) createNew(word); tpl.Item2.Add(docId); } }//eo foreach new searchables, end of document itteration foreach (var tmpwrd in tmpWrds) { its.words.Insert<string, byte[]>(tmpwrd.Key, tmpwrd.Value); } tmpWrds.Clear(); #region "S1" //Inserting WAH blocks //Going through the list of collected words order by blockID, fill blocks and save them block.Clear(); iterBlockId = 0; foreach (var wd1 in ds.OrderBy(r => r.Value.Item3.BlockId)) { //reading block if it's not loaded if (wd1.Value.Item3.BlockId != iterBlockId) { if (iterBlockId > 0) { //We must save current datablock if (block.Count() > 0) { btBlock = block.Encode_DICT_PROTO_UINT_BYTEARRAY(Compression.eCompressionMethod.Gzip); if ((btBlock.Length + 4) < itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes) //Minimal reserv { tmp = new byte[itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else if ((btBlock.Length + 4) > iterBlockLen) { //Doubling reserve tmp = new byte[btBlock.Length * 2]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else { //Filling existing space tmp = new byte[btBlock.Length + 4]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } //Saving into DB its.blocks.Insert<uint, byte[]>(iterBlockId, tmp); } block.Clear(); } val = its.blocks.Select<uint, byte[]>(wd1.Value.Item3.BlockId).Value; iterBlockId = wd1.Value.Item3.BlockId; iterBlockLen = val == null ? 0 : val.Length; if (val != null) { blockSize = val.Substring(0, 4).To_Int32_BigEndian(); if (blockSize > 0) { btBlock = val.Substring(4, blockSize); block.Clear(); btBlock.Decode_DICT_PROTO_UINT_BYTEARRAY(block, Compression.eCompressionMethod.Gzip); } else block.Clear(); } else block.Clear(); } //Getting from Block if (block.TryGetValue((uint)wd1.Value.Item3.NumberInBlock, out btWah)) { wah = new WABI(btWah); } else wah = new WABI(null); //Adding documents foreach (var dId in wd1.Value.Item2) wah.Add(dId, true); //Removing documents foreach (var dId in wd1.Value.Item1) wah.Add(dId, false); block[wd1.Value.Item3.NumberInBlock] = wah.GetCompressedByteArray(); }//eo foreach wds //Saving last element //saving current block if (block.Count() > 0) { //!!!!!!!!!!! Remake it for smoothing storage btBlock = block.Encode_DICT_PROTO_UINT_BYTEARRAY(Compression.eCompressionMethod.Gzip); if ((btBlock.Length + 4) < itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes) //Minimal reserve { tmp = new byte[itran._transactionUnit.TransactionsCoordinator._engine.Configuration.TextSearchConfig.MinimalBlockReservInBytes]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else if ((btBlock.Length + 4) > iterBlockLen) { //Doubling reserve tmp = new byte[btBlock.Length * 2]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } else { //Filling existing space tmp = new byte[btBlock.Length + 4]; tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian()); tmp.CopyInside(4, btBlock); } //Saving into DB its.blocks.Insert<uint, byte[]>(iterBlockId, tmp); } block.Clear(); #endregion itran.Insert<int, uint>(tbl.Key, 11, its.currentBlock); itran.Insert<int, uint>(tbl.Key, 12, its.numberInBlock); //Setting last indexing time itran.Insert<byte, byte[]>(tbl.Key, 4, btUdtStart); }//eo foreach tablesToIndex }