Example #1
0
        /// <summary>
        /// 
        /// </summary>
        void ProcessDocsBlock(List<I2> i2s)
        {
            try
            {
                Verbose("Block processing is in progress...");
                System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                sw.Start();

                //Console.WriteLine("ProcessDocsBlock started with {0} elements", i2s.Count());

                System.Diagnostics.Stopwatch swSelect = new System.Diagnostics.Stopwatch();
                System.Diagnostics.Stopwatch swInsert = new System.Diagnostics.Stopwatch();

                WAH2 wah = null;
                byte[] val = null;
                int uniqueWordsFound = 0;

                WordInDoc wd = null;
                Dictionary<string, WordInDoc> wds = new Dictionary<string, WordInDoc>();

                List<byte[]> PRemoves = new List<byte[]>();
                List<byte[]> VTRemoves = new List<byte[]>();

                using (var tran = DBreezeEngine.GetTransaction())
                {
                    string searchTable = DocumentsStorageTablesPrefix + "s" + i2s.First().doc.DocumentSpaceId.ToString();

                    tran.SynchronizeTables(searchTable);

                    //Setting WAH index table
                    var tbOneWordWAH = tran.InsertTable<int>(searchTable, 2, 0);
                    tbOneWordWAH.ValuesLazyLoadingIsOn = false;
                    tbOneWordWAH.Technical_SetTable_OverwriteIsNotAllowed();

                    //Nested table with blocks
                    var tbBlocks = tran.InsertTable<int>(searchTable, 10, 0);   //Overwrite is needed
                    tbBlocks.ValuesLazyLoadingIsOn = false;

                    int currentBlock = tran.Select<int, int>(searchTable, 11).Value;
                    int numberInBlock = tran.Select<int, int>(searchTable, 12).Value;

                    if (currentBlock == 0)
                    {
                        numberInBlock = 0;
                        currentBlock = 1;
                    }

                    bool DocumentIsAdded = true;

                    foreach (var i2 in i2s)
                    {
                        //Removing from "p"
                        PRemoves.Add(i2.PReference);

                        //Removing from Version table
                        DocumentIsAdded = true;
                        if (i2.VersionDocumentToRemove != null)
                        {
                            VTRemoves.Add(i2.VersionDocumentToRemove);
                            DocumentIsAdded = false;
                        }

                        //doc can be null, doc can be without searchables
                        if (i2.doc == null || String.IsNullOrEmpty(i2.doc.Searchables))
                            continue;

                        var wordsCounter = GetWordsDefinitionFromText(i2.doc.Searchables);

                        foreach (var el in wordsCounter.OrderBy(r => r.Key))
                        {
                            //Trying to get from Dictionary
                            if (!wds.TryGetValue(el.Key, out wd))
                            {
                                //getting from db
                                swSelect.Start();
                                var row1 = tbOneWordWAH.Select<string, byte[]>(el.Key, true);
                                swSelect.Stop();

                                if (row1.Exists)
                                {
                                    val = row1.Value;

                                    wd = new WordInDoc()
                                    {
                                        BlockId = val.Substring(0,4).To_Int32_BigEndian(),
                                        NumberInBlock = val.Substring(4, 4).To_Int32_BigEndian(),
                                        ExistsInDb = true   //We don't need to save this word again (only its WAH in block)

                                    };
                                }
                                else
                                {
                                    numberInBlock++;

                                    if (numberInBlock > QuantityOfWordsInBlock)  //Quantity of words (WAHs) in block
                                    {
                                        currentBlock++;
                                        numberInBlock = 1;
                                    }

                                    wd = new WordInDoc()
                                    {
                                        BlockId = currentBlock,
                                        NumberInBlock = numberInBlock,
                                    };

                                   // Console.WriteLine(el.Key + " " + wd.NumberInBlock);

                                    uniqueWordsFound++;
                                }
                            }

                            //Adding to wah document id
                            if (DocumentIsAdded)
                            {
                                if (!wd.docsAdded.Contains(i2.doc.DocumentSequentialId))
                                    wd.docsAdded.Add(i2.doc.DocumentSequentialId);
                            }
                            else
                            {
                                if (!wd.docsRemoved.Contains(i2.doc.DocumentSequentialId))
                                    wd.docsRemoved.Add(i2.doc.DocumentSequentialId);
                            }

                            //Applying it to the memory wah storage
                            wds[el.Key] = wd;

                        }//eo foreach words in document

                    }//eo foreach documnent

                    //Inserting new words
                    foreach (var wd1 in wds.OrderBy(r => r.Key))
                    {
                        if (!wd1.Value.ExistsInDb)
                        {
                            swInsert.Start();
                            //Console.WriteLine("{0} {1}", wd1.Key, wd1.Value.NumberInBlock);
                            tbOneWordWAH.Insert<string, byte[]>(wd1.Key, wd1.Value.BlockId.To_4_bytes_array_BigEndian().Concat(wd1.Value.NumberInBlock.To_4_bytes_array_BigEndian()));
                            swInsert.Stop();
                        }
                    }

                    //Inserting WAH blocks
                    //Going through the list of collected words order by blockID, fill blocks and save them
                    int iterBlockId = 0;
                    int iterBlockLen = 0;
                    int blockSize = 0;
                    byte[] btBlock = null;
                    Dictionary<int, byte[]> block = new Dictionary<int, byte[]>();
                    byte[] btWah = null;
                    byte[] tmp = null;

                    foreach (var wd1 in wds.OrderBy(r => r.Value.BlockId))
                    {

                        //reading block if it's not loaded
                        if (wd1.Value.BlockId != iterBlockId)
                        {
                            if (iterBlockId > 0)
                            {
                                //We must save current datablock
                                if (block.Count() > 0)
                                {

                                    btBlock = block.SerializeProtobuf();
                                    btBlock = btBlock.CompressGZip();

                                   // Console.WriteLine("Block {0} Len {1}",iterBlockId, btBlock.Length);

                                    if ((btBlock.Length + 4) < MinimalBlockReservInBytes)    //Minimal reserv
                                    {
                                        tmp = new byte[MinimalBlockReservInBytes];
                                        tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian());
                                        tmp.CopyInside(4, btBlock);
                                    }
                                    else if ((btBlock.Length + 4) > iterBlockLen)
                                    {
                                        //Doubling reserve
                                        tmp = new byte[btBlock.Length * 2];
                                        tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian());
                                        tmp.CopyInside(4, btBlock);
                                    }
                                    else
                                    {
                                        //Filling existing space
                                        tmp = new byte[btBlock.Length + 4];
                                        tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian());
                                        tmp.CopyInside(4, btBlock);
                                    }

                                    //Saving into DB
                                    swInsert.Start();
                                    tbBlocks.Insert<int, byte[]>(iterBlockId, tmp);
                                    swInsert.Stop();
                                }

                                block = null;
                            }

                            val = tbBlocks.Select<int, byte[]>(wd1.Value.BlockId).Value;
                            iterBlockId = wd1.Value.BlockId;
                            iterBlockLen = val == null ? 0 : val.Length;

                            if (val != null)
                            {
                                blockSize = val.Substring(0, 4).To_Int32_BigEndian();
                                if (blockSize > 0)
                                {
                                    btBlock = val.Substring(4, blockSize);
                                    btBlock = btBlock.DecompressGZip();
                                    block = btBlock.DeserializeProtobuf<Dictionary<int, byte[]>>();
                                }
                                else
                                    block = new Dictionary<int, byte[]>();
                            }
                            else
                                block = new Dictionary<int, byte[]>();
                        }

                        //Getting from Block
                        if (block.TryGetValue(wd1.Value.NumberInBlock, out btWah))
                        {
                            wah = new WAH2(btWah);
                        }
                        else
                            wah = new WAH2(null);

                        //Adding documents
                        foreach (var d in wd1.Value.docsAdded)
                            wah.Add(d, true);

                        //Removing documents
                        foreach (var d in wd1.Value.docsRemoved)
                            wah.Add(d, false);

                        block[wd1.Value.NumberInBlock] = wah.GetCompressedByteArray();

                    }//eo foreach

                    //Saving last element
                    if (block != null)
                    {
                        //saving current block
                        if (block.Count() > 0)
                        {
                            //!!!!!!!!!!! Remake it for smoothing storage
                            btBlock = block.SerializeProtobuf();
                            btBlock = btBlock.CompressGZip();

                            if ((btBlock.Length + 4) < MinimalBlockReservInBytes)    //Minimal reserve
                            {
                                tmp = new byte[MinimalBlockReservInBytes];
                                tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian());
                                tmp.CopyInside(4, btBlock);
                            }
                            else if ((btBlock.Length + 4) > iterBlockLen)
                            {
                                //Doubling reserve
                                tmp = new byte[btBlock.Length * 2];
                                tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian());
                                tmp.CopyInside(4, btBlock);
                            }
                            else
                            {
                                //Filling existing space
                                tmp = new byte[btBlock.Length + 4];
                                tmp.CopyInside(0, btBlock.Length.To_4_bytes_array_BigEndian());
                                tmp.CopyInside(4, btBlock);
                            }

                            //Saving into DB
                            swInsert.Start();
                            tbBlocks.Insert<int, byte[]>(iterBlockId, tmp);
                            swInsert.Stop();
                        }

                        block = null;
                    }

                    tran.Insert<int, int>(searchTable, 11,currentBlock);
                    tran.Insert<int, int>(searchTable, 12, numberInBlock);

                    tran.Commit();
                }//eo tran

                //Moved away P and VT tables Removes to avoid suspending of AddDocuments thread
                if(PRemoves.Count()>0 || VTRemoves.Count()>0)
                {
                    swInsert.Start();
                    using (var tran = DBreezeEngine.GetTransaction())
                    {
                        string docTable = DocumentsStorageTablesPrefix + "d" + i2s.First().doc.DocumentSpaceId.ToString();
                        //Console.WriteLine("{0}> started process sync", DateTime.Now.ToString("mm:ss.ms"));
                        tran.SynchronizeTables(docTable, DocumentsStorageTablesPrefix + "p");
                        //Console.WriteLine("{0}> ended process sync", DateTime.Now.ToString("mm:ss.ms"));

                        var vt = tran.InsertTable<int>(docTable, 3, 0);

                        foreach (var el in VTRemoves)
                        {
                            //Console.WriteLine("Updating_V_" + el.Substring(0, 4).To_Int32_BigEndian() + "_" + el.Substring(4, 4).To_Int32_BigEndian() + "_" + el.Substring(8, 4).To_Int32_BigEndian());
                            //Setting version as deleted from keyword blocks
                            vt.Insert<byte[], byte[]>(el, new byte[] { 1 });
                        }

                        foreach (var el in PRemoves)
                        {
                            //Console.WriteLine("Removing_P_" + el.Substring(8, 4).To_Int32_BigEndian());
                            tran.RemoveKey<byte[]>(DocumentsStorageTablesPrefix + "p", el);
                        }

                        tran.Commit();
                    }
                    swInsert.Stop();
                }

                sw.Stop();

                Verbose("Processed {0} documents with {3} words in DocuSpace {1}. Took {2} ms; Select {4} ms; Insert {5} ms; UniqueWords: {6}", i2s.Count(), i2s.First().doc.DocumentSpaceId, sw.ElapsedMilliseconds, wds.Count(), swSelect.ElapsedMilliseconds, swInsert.ElapsedMilliseconds, uniqueWordsFound);

            }
            catch (Exception ex)
            {
              //  throw ThrowException("ProcessDocsBlock", ex.ToString());
            }
            finally
            {

            }

               // Console.WriteLine("ProcessDocsBlock finished");
        }