public bool buildIndexDic()
        {
            indexDic = new SortedDictionary <string, PosInfo>();
            var fm = new FileManager();
            var tp = new TextParser(Algorithm.InvertedIndexSearch);

            foreach (var doc in FileManager.docsIds)
            {
                var rawText = fm.ReadFileText(doc.Value);
                if (rawText != string.Empty)
                {
                    var textTokens    = tp.parseText(rawText);
                    var tokenPosition = 0;
                    foreach (var token in textTokens)
                    {
                        PosInfo pi;
                        if (indexDic.TryGetValue(token, out pi))                            //already existed
                        {
                            if (indexDic[token].PostingDic.Keys.ToList().Contains(doc.Key)) //doc exist
                            {                                                               //append new position
                                indexDic[token].PostingDic[doc.Key].Add(tokenPosition);
                            }
                            else //doc not exist
                            { //append new doc & position then increase Freq.
                                indexDic[token].PostingDic.Add(doc.Key, new List <int>()
                                {
                                    tokenPosition
                                });
                                indexDic[token].Frequency++;
                            }
                        }
                        else // New token
                        {
                            pi            = new PosInfo();
                            pi.Frequency  = 1;
                            pi.PostingDic = new SortedDictionary <int, List <int> >();
                            pi.PostingDic.Add(doc.Key, new List <int>()
                            {
                                tokenPosition
                            });
                            indexDic.Add(token, pi);
                        }
                        tokenPosition++;
                    }
                }
            }
            return(true);
        }
Beispiel #2
0
        public void replaceQueryTokens(ref List <string> tokens, int docsCount, dynamic indexDictionary)
        {
            for (int i = 0; i < tokens.Count; i++)
            {
                switch (tokens[i])
                {
                case "(":
                case ")":
                    break;

                case "NOT":
                    tokens[i] = "~";
                    break;

                case "AND":
                    tokens[i] = "&";
                    break;

                case "OR":
                    tokens[i] = "|";
                    break;

                default:
                    if (_Algorithm == Algorithm.InvertedIndexSearch)
                    {
                        var wi = new WordInfo();
                        //search dictionary for the word
                        if (indexDictionary.TryGetValue(tokens[i], out wi))
                        {
                            //get base2 equivelant word string
                            tokens[i] = PosToBase2(wi.Posting, docsCount);
                        }
                        else     //word not found
                        {
                            tokens[i] = "0";
                        }
                    }
                    else if (_Algorithm == Algorithm.BooleanSearch)
                    {
                        bool[] boolVector;
                        //search dictionary for the word
                        if (indexDictionary.TryGetValue(tokens[i], out boolVector))
                        {
                            //get base2 equivelant word string
                            tokens[i] = BoolToBase2(boolVector.ToList());
                        }
                        else     //word not found
                        {
                            tokens[i] = "0";
                        }
                    }
                    else if (_Algorithm == Algorithm.PositionalIndexSearch)
                    {
                        var pi = new PosInfo();
                        //search dictionary for the word
                        if (indexDictionary.TryGetValue(tokens[i], out pi))
                        {
                            //get base2 equivelant word string
                            tokens[i] = PosToBase2(pi.PostingDic.Keys.ToList(), docsCount);
                        }
                        else     //word not found
                        {
                            tokens[i] = "0";
                        }
                    }
                    break;
                }
            }
        }