public bool buildIndexDic() { indexDic = new SortedDictionary <string, PosInfo>(); var fm = new FileManager(); var tp = new TextParser(Algorithm.InvertedIndexSearch); foreach (var doc in FileManager.docsIds) { var rawText = fm.ReadFileText(doc.Value); if (rawText != string.Empty) { var textTokens = tp.parseText(rawText); var tokenPosition = 0; foreach (var token in textTokens) { PosInfo pi; if (indexDic.TryGetValue(token, out pi)) //already existed { if (indexDic[token].PostingDic.Keys.ToList().Contains(doc.Key)) //doc exist { //append new position indexDic[token].PostingDic[doc.Key].Add(tokenPosition); } else //doc not exist { //append new doc & position then increase Freq. indexDic[token].PostingDic.Add(doc.Key, new List <int>() { tokenPosition }); indexDic[token].Frequency++; } } else // New token { pi = new PosInfo(); pi.Frequency = 1; pi.PostingDic = new SortedDictionary <int, List <int> >(); pi.PostingDic.Add(doc.Key, new List <int>() { tokenPosition }); indexDic.Add(token, pi); } tokenPosition++; } } } return(true); }
public void replaceQueryTokens(ref List <string> tokens, int docsCount, dynamic indexDictionary) { for (int i = 0; i < tokens.Count; i++) { switch (tokens[i]) { case "(": case ")": break; case "NOT": tokens[i] = "~"; break; case "AND": tokens[i] = "&"; break; case "OR": tokens[i] = "|"; break; default: if (_Algorithm == Algorithm.InvertedIndexSearch) { var wi = new WordInfo(); //search dictionary for the word if (indexDictionary.TryGetValue(tokens[i], out wi)) { //get base2 equivelant word string tokens[i] = PosToBase2(wi.Posting, docsCount); } else //word not found { tokens[i] = "0"; } } else if (_Algorithm == Algorithm.BooleanSearch) { bool[] boolVector; //search dictionary for the word if (indexDictionary.TryGetValue(tokens[i], out boolVector)) { //get base2 equivelant word string tokens[i] = BoolToBase2(boolVector.ToList()); } else //word not found { tokens[i] = "0"; } } else if (_Algorithm == Algorithm.PositionalIndexSearch) { var pi = new PosInfo(); //search dictionary for the word if (indexDictionary.TryGetValue(tokens[i], out pi)) { //get base2 equivelant word string tokens[i] = PosToBase2(pi.PostingDic.Keys.ToList(), docsCount); } else //word not found { tokens[i] = "0"; } } break; } } }