public bool buildIndexDic() { indexDic = new SortedDictionary <string, WordInfo>(); var fm = new FileManager(); var tp = new TextParser(Algorithm.InvertedIndexSearch); foreach (var doc in FileManager.docsIds) { var rawText = fm.ReadFileText(doc.Value); if (rawText != string.Empty) { var textTokens = tp.parseText(rawText); foreach (var token in textTokens) { WordInfo wi; if (indexDic.TryGetValue(token, out wi)) { if (!indexDic[token].Posting.Contains(doc.Key)) { indexDic[token].Frequency++; indexDic[token].Posting.Add(doc.Key); } } else // New token { wi = new WordInfo(); wi.Frequency = 1; wi.Posting = new List <int>() { doc.Key }; indexDic.Add(token, wi); } } } } //Sort all postings foreach (var record in indexDic) { record.Value.Posting.Sort(); } return(true); }
public void replaceQueryTokens(ref List <string> tokens, int docsCount, dynamic indexDictionary) { for (int i = 0; i < tokens.Count; i++) { switch (tokens[i]) { case "(": case ")": break; case "NOT": tokens[i] = "~"; break; case "AND": tokens[i] = "&"; break; case "OR": tokens[i] = "|"; break; default: if (_Algorithm == Algorithm.InvertedIndexSearch) { var wi = new WordInfo(); //search dictionary for the word if (indexDictionary.TryGetValue(tokens[i], out wi)) { //get base2 equivelant word string tokens[i] = PosToBase2(wi.Posting, docsCount); } else //word not found { tokens[i] = "0"; } } else if (_Algorithm == Algorithm.BooleanSearch) { bool[] boolVector; //search dictionary for the word if (indexDictionary.TryGetValue(tokens[i], out boolVector)) { //get base2 equivelant word string tokens[i] = BoolToBase2(boolVector.ToList()); } else //word not found { tokens[i] = "0"; } } else if (_Algorithm == Algorithm.PositionalIndexSearch) { var pi = new PosInfo(); //search dictionary for the word if (indexDictionary.TryGetValue(tokens[i], out pi)) { //get base2 equivelant word string tokens[i] = PosToBase2(pi.PostingDic.Keys.ToList(), docsCount); } else //word not found { tokens[i] = "0"; } } break; } } }