public bool buildIndexDic()
        {
            indexDic = new SortedDictionary <string, WordInfo>();
            var fm = new FileManager();
            var tp = new TextParser(Algorithm.InvertedIndexSearch);

            foreach (var doc in FileManager.docsIds)
            {
                var rawText = fm.ReadFileText(doc.Value);
                if (rawText != string.Empty)
                {
                    var textTokens = tp.parseText(rawText);
                    foreach (var token in textTokens)
                    {
                        WordInfo wi;
                        if (indexDic.TryGetValue(token, out wi))
                        {
                            if (!indexDic[token].Posting.Contains(doc.Key))
                            {
                                indexDic[token].Frequency++;
                                indexDic[token].Posting.Add(doc.Key);
                            }
                        }
                        else // New token
                        {
                            wi           = new WordInfo();
                            wi.Frequency = 1;
                            wi.Posting   = new List <int>()
                            {
                                doc.Key
                            };
                            indexDic.Add(token, wi);
                        }
                    }
                }
            }
            //Sort all postings
            foreach (var record in indexDic)
            {
                record.Value.Posting.Sort();
            }
            return(true);
        }
Beispiel #2
0
        public void replaceQueryTokens(ref List <string> tokens, int docsCount, dynamic indexDictionary)
        {
            for (int i = 0; i < tokens.Count; i++)
            {
                switch (tokens[i])
                {
                case "(":
                case ")":
                    break;

                case "NOT":
                    tokens[i] = "~";
                    break;

                case "AND":
                    tokens[i] = "&";
                    break;

                case "OR":
                    tokens[i] = "|";
                    break;

                default:
                    if (_Algorithm == Algorithm.InvertedIndexSearch)
                    {
                        var wi = new WordInfo();
                        //search dictionary for the word
                        if (indexDictionary.TryGetValue(tokens[i], out wi))
                        {
                            //get base2 equivelant word string
                            tokens[i] = PosToBase2(wi.Posting, docsCount);
                        }
                        else     //word not found
                        {
                            tokens[i] = "0";
                        }
                    }
                    else if (_Algorithm == Algorithm.BooleanSearch)
                    {
                        bool[] boolVector;
                        //search dictionary for the word
                        if (indexDictionary.TryGetValue(tokens[i], out boolVector))
                        {
                            //get base2 equivelant word string
                            tokens[i] = BoolToBase2(boolVector.ToList());
                        }
                        else     //word not found
                        {
                            tokens[i] = "0";
                        }
                    }
                    else if (_Algorithm == Algorithm.PositionalIndexSearch)
                    {
                        var pi = new PosInfo();
                        //search dictionary for the word
                        if (indexDictionary.TryGetValue(tokens[i], out pi))
                        {
                            //get base2 equivelant word string
                            tokens[i] = PosToBase2(pi.PostingDic.Keys.ToList(), docsCount);
                        }
                        else     //word not found
                        {
                            tokens[i] = "0";
                        }
                    }
                    break;
                }
            }
        }