コード例 #1
0
ファイル: Storage.cs プロジェクト: enginekit/DBreezeBased
        /// <summary>
        /// Constructor (automatically starts unfinished indexing job)
        /// </summary>
        /// <param name="DBreezeEngine">must be already initialized</param>
        public Storage(DBreezeEngine DBreezeEngine)
        {
            this.OnProcessingStarted += Storage_OnProcessingStarted;
            this.OnProcessingStopped += Storage_OnProcessingStopped;
            if (DBreezeEngine == null)
                throw ThrowException("Storage", "DBreezeEngine must be instantiated");
            //if(SearchWordMinimalLength < 1)
            //    throw ThrowException("Storage", "SearchWordMinimalLength must be > 0");
            //if (DocumentsStorageTablesPrefix.Length < 1)
            //    throw ThrowException("Storage", "DocumentsStorageTablesPrefix.Length must be > 0");

            this.DBreezeEngine = DBreezeEngine;

            //Preparing Protobuf
            ProtoBuf.Serializer.PrepareSerializer<Document>();
            ProtoBuf.Serializer.PrepareSerializer<SearchRequest>();
            ProtoBuf.Serializer.PrepareSerializer<SearchResponse>();
            Document o1 = new Document();
            o1.SerializeProtobuf();
            SearchRequest o2 = new SearchRequest();
            o2.SerializeProtobuf();
            SearchResponse o3 = new SearchResponse();
            o3.SerializeProtobuf();

            //Automatic indexing of unfinished documents
            StartDocumentsIndexing();
        }
コード例 #2
0
ファイル: Storage.cs プロジェクト: enginekit/DBreezeBased
        /// <summary>
        /// 
        /// </summary>
        /// <param name="req"></param>
        public SearchResponse SearchDocumentSpace(SearchRequest req)
        {
            SearchResponse resp = new SearchResponse();
            try
            {
                if (req == null || String.IsNullOrEmpty(req.DocumentSpace) || String.IsNullOrEmpty(req.SearchWords))
                    return resp;

                resp.DocumentSpace = req.DocumentSpace;

                Dictionary<int, Document> dmnts = new Dictionary<int, Document>();

                Action repack = () =>
                {
                    //Repacking dmnts into resp
                    if (req.IncludeDocuments)
                    {
                        foreach (var el in dmnts)
                        {
                            resp.Documents.Add(el.Value);
                        }
                    }
                    else
                    {
                        foreach (var el in dmnts)
                        {
                            resp.DocumentsInternalIds.Add(el.Key);
                        }
                    }
                };

                System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                sw.Start();

                using (var tran = DBreezeEngine.GetTransaction())
                {
                    var mt = tran.SelectTable<int>(DocumentsStorageTablesPrefix + "m", 1, 0);
                    var docSpaceId = mt.Select<string, long>(req.DocumentSpace).Value;

                    if (docSpaceId == 0)
                        return resp;    //Not found document space

                    var Words = this.PrepareSearchKeyWords(req.SearchWords);

                    string docTable = DocumentsStorageTablesPrefix + "d" + docSpaceId.ToString();
                    var vt = tran.SelectTable<int>(docTable, 3, 0); //Version table Key
                    var dt = tran.SelectTable<int>(docTable, 1, 0); //Document table Key
                    dt.ValuesLazyLoadingIsOn = !req.IncludeDocuments;

                    DBreeze.DataTypes.Row<int, byte[]> docRow = null;
                    Document doc = null;
                    //byte[] btDoc = null;
                    int qOutput = 0;

                    //-----------------------------------------------------------------   ONE/MULTIPLE WORDS SEARCH then one word is supplied, using AND/OR LOGIC

                    #region "Multiple Words"

                    int j = -1;
                    List<byte[]> foundArrays = new List<byte[]>();
                    List<byte[]> oneWordFoundArrays = new List<byte[]>();
                    //WAH2 wh = null;
                    var tbOneWordWAH = tran.SelectTable<int>(DocumentsStorageTablesPrefix + "s" + docSpaceId.ToString(), 2, 0);
                    tbOneWordWAH.ValuesLazyLoadingIsOn = false;

                    resp.UniqueWordsInDataSpace = (int)tbOneWordWAH.Count();

                    bool anyWordFound = false;
                    int totalFoundWords = 0;

                    Dictionary<string, WordInDoc> words = new Dictionary<string, WordInDoc>();
                    int foundOrigin = 1;

                    Dictionary<string, WordInDoc> perWord = new Dictionary<string, WordInDoc>();
                    Dictionary<string, WordInDoc> firstHighOccuranceWord = new Dictionary<string, WordInDoc>();

                    //Currently we ignore these words and do nothing with them
                    List<string> highOccuranceWordParts = new List<string>();

                    foreach (var word in Words.Take(10)) //Maximum 10 words for search
                    {
                        anyWordFound = false;
                        totalFoundWords = 0;
                        perWord = new Dictionary<string, WordInDoc>();

                        foreach (var row1 in tbOneWordWAH.SelectForwardStartsWith<string, byte[]>(word))
                        {
                            anyWordFound = true;
                            totalFoundWords++;

                            if (Words.Count() == 1 && totalFoundWords > req.Quantity)
                            {
                                //In case if only one search word, then we don't need to make any comparation
                                break;
                            }
                            else if (totalFoundWords >= req.MaximalExcludingOccuranceOfTheSearchPattern)  //Found lots of words with such mask inside
                            {
                                //Too much found docs have this word-part inside, better to enhance search
                                if (firstHighOccuranceWord.Count() == 0)
                                {
                                    //Only first HighOccurance word part come to the list. It can be used later in case if all search words are of HighOccurance (then we will visualize only this one)
                                    firstHighOccuranceWord = perWord.ToDictionary(r => r.Key, r => r.Value);
                                }
                                //Clearing repack element
                                perWord.Clear();
                                //Adding word into List of High-Occurance word-part
                                highOccuranceWordParts.Add(word);
                                break;
                            }

                            perWord.Add(row1.Key, new WordInDoc()
                            {
                                 BlockId = row1.Value.Substring(0,4).To_Int32_BigEndian(),
                                 NumberInBlock = row1.Value.Substring(4, 4).To_Int32_BigEndian(),
                                 foundOrigin = foundOrigin
                            });
                        }

                        //Repacking occurances
                        foreach (var pw in perWord)
                            words.Add(pw.Key, pw.Value);

                        foundOrigin++;

                        if (
                            req.SearchLogicType == SearchRequest.eSearchLogicType.AND
                            &&
                            !anyWordFound
                            )
                        {
                            //Non of words found corresponding to AND logic
                            sw.Stop();
                            resp.SearchDurationMs = sw.ElapsedMilliseconds;
                            return resp;
                        }
                    }

                    if (words.Count() == 0)
                    {
                        //In case of multiple search words and each of them of HighOccurance.
                        //We will form result only from the first HighOccurance list

                        //Repacking occurances
                        foreach (var pw in firstHighOccuranceWord.Take(req.Quantity))
                            words.Add(pw.Key, pw.Value);

                        //In this case highOccuranceWordParts must be cleared, because the returning result is very approximate
                        highOccuranceWordParts.Clear();
                    }

                    //Here we must start get data from blocks
                    //Nested table with blocks
                    var tbBlocks = tran.SelectTable<int>(DocumentsStorageTablesPrefix + "s" + docSpaceId.ToString(), 10, 0);
                    tbBlocks.ValuesLazyLoadingIsOn = false;

                    Dictionary<int,byte[]> block=null;
                    byte[] btBlock=null;
                    int currentBlockId = 0;

                    //DBreeze.Diagnostic.SpeedStatistic.StartCounter("LoadBlocks");

                    foreach (var wrd in words.OrderBy(r=>r.Value.BlockId))
                    {
                        if (currentBlockId != wrd.Value.BlockId)
                        {
                            currentBlockId = wrd.Value.BlockId;

                                //DBreeze.Diagnostic.SpeedStatistic.StartCounter("SelectBlocks");
                            btBlock = tbBlocks.Select<int, byte[]>(wrd.Value.BlockId).Value;
                                //DBreeze.Diagnostic.SpeedStatistic.StopCounter("SelectBlocks");
                            btBlock = btBlock.Substring(4, btBlock.Substring(0, 4).To_Int32_BigEndian());
                                //DBreeze.Diagnostic.SpeedStatistic.StartCounter("DecomDeserBlocks");
                            btBlock = btBlock.DecompressGZip();
                            block = btBlock.DeserializeProtobuf<Dictionary<int, byte[]>>();
                                //DBreeze.Diagnostic.SpeedStatistic.StopCounter("DecomDeserBlocks");
                        }

                        wrd.Value.wah = new WAH2(block[wrd.Value.NumberInBlock]);
                    }
                    //DBreeze.Diagnostic.SpeedStatistic.PrintOut("LoadBlocks", true);
                    //DBreeze.Diagnostic.SpeedStatistic.PrintOut("SelectBlocks", true);
                    //DBreeze.Diagnostic.SpeedStatistic.PrintOut("DecomDeserBlocks", true);

                    foundOrigin = 0;

                    foreach (var wrd in words.OrderBy(r => r.Value.foundOrigin))
                    {
                        //Console.WriteLine(wrd.Value.foundOrigin);

                        if (foundOrigin != wrd.Value.foundOrigin)
                        {
                            if (oneWordFoundArrays.Count() > 0)
                            {
                                j++;
                                foundArrays.Add(WAH2.MergeAllUncompressedIntoOne(oneWordFoundArrays));
                                oneWordFoundArrays = new List<byte[]>();
                            }

                            foundOrigin = wrd.Value.foundOrigin;
                        }
                        else
                        {

                        }

                        oneWordFoundArrays.Add(wrd.Value.wah.GetUncompressedByteArray());
                    }

                    //The last
                    if (oneWordFoundArrays.Count() > 0)
                    {
                        j++;
                        foundArrays.Add(WAH2.MergeAllUncompressedIntoOne(oneWordFoundArrays));
                        oneWordFoundArrays = new List<byte[]>();
                    }

                    //////////  final results

                    if (j >= 0)
                    {
                        var q = WAH2.TextSearch_OR_logic(foundArrays, req.Quantity);

                        if (req.SearchLogicType == SearchRequest.eSearchLogicType.AND)
                            q = WAH2.TextSearch_AND_logic(foundArrays).Take(req.Quantity);

                        foreach (var el in q)
                        {
                            //Getting document
                            docRow = dt.Select<int, byte[]>((int)el);
                            if (docRow.Exists)
                            {
                                if (!dmnts.ContainsKey((int)el))
                                {
                                    if (highOccuranceWordParts.Count() > 0)
                                    {
                                        //We got some noisy word-parts of high occurance together with strongly found words.
                                        //We must be sure that these word parts are also inside of returned docs
                                        doc = this.RetrieveDocument(req.IncludeDocumentsContent, true, dt, docRow);
                                        if (doc != null)
                                        {
                                            //Checking doc.Searchables must have all word parts from the occurance in case of AND
                                            if (req.SearchLogicType == SearchRequest.eSearchLogicType.AND)
                                            {
                                                if (String.IsNullOrEmpty(doc.Searchables))
                                                    continue;
                                                if (!highOccuranceWordParts.All(doc.Searchables.ToLower().Contains))
                                                    continue;
                                            }

                                            if (req.IncludeDocuments)
                                            {
                                                if (!req.IncludeDocumentsSearchanbles)
                                                    doc.Searchables = String.Empty;

                                                dmnts.Add((int)el, doc);
                                            }
                                            else
                                            {
                                                dmnts.Add((int)el, null);
                                            }

                                        }
                                        else
                                            continue;
                                    }
                                    else
                                    {
                                        if (req.IncludeDocuments)
                                        {

                                            doc = this.RetrieveDocument(req.IncludeDocumentsContent, req.IncludeDocumentsSearchanbles, dt, docRow);
                                            if (doc == null) //If doc is deleted, while search was in progress and we received its id in the list
                                                continue;

                                            dmnts.Add((int)el, doc);
                                        }
                                        else
                                        {
                                            dmnts.Add((int)el, null);
                                        }

                                    }

                                    qOutput++;
                                }
                            }

                            if (qOutput > req.Quantity)
                                break;

                        }

                    }
                    #endregion

                }//eo using

                //Repacking dmnts into resp
                repack();
                sw.Stop();

                resp.SearchDurationMs = sw.ElapsedMilliseconds;
            }
            catch (Exception ex)
            {
                throw ThrowException("SearchDocumentSpace", ex.ToString());
            }

            return resp;
        }