private static List <int> DocumentsOccuredInQuery(WordsVector wordsVector, Vector queryVector) { List <int> documents = new List <int>(); foreach (var pair in queryVector.Dictionary) { var key = pair.Key; if (wordsVector.GetVector().ContainsKey(key)) { var newDocuments = wordsVector.GetVector()[key].GetDocuments(); documents = AddDocuments(documents, newDocuments); } } return(documents); }
private static Vector ComputeQueryTfIdfVector(string query, WordsVector wordsVector, int totalDocuments) { Vector vector = new Vector(query); foreach (var pair in vector.Dictionary) { var key = pair.Key; double inverseDocumentFrequency = 0; if (wordsVector.GetVector().ContainsKey(key)) { var documentFrequency = wordsVector.GetVector()[key].DocumentCount; inverseDocumentFrequency = Math.Log10(documentFrequency) / totalDocuments; } vector.Dictionary[key].Count *= inverseDocumentFrequency; } return(vector); }
public static IList <int> Parse(string query, WordsVector wordsVector) { var ProximityQueryRegex = new Regex(@"^(\w+)\s(\w+)\s*/\s*(\d+)$"); var groups = ProximityQueryRegex.Match(query).Groups; var operand1 = groups[1].Value; var operand2 = groups[2].Value; var displacement = Convert.ToInt32(groups[3].Value); List <int> answer = new List <int>(); try { var document1 = wordsVector.GetVector()[operand1]; var document2 = wordsVector.GetVector()[operand2]; var commonDocuments = document1.GetDocuments(); commonDocuments.IntersectWith(document2.GetDocuments()); foreach (int doc in commonDocuments) { var list1 = document1.DocumentsDictionary[doc].Positions; var list2 = document2.DocumentsDictionary[doc].Positions; foreach (var position in list1) { if (list2.Contains(position + displacement) || list2.Contains(position - displacement)) { answer.Add(doc); break; } } } answer.Sort(); return(answer); } catch { return(new List <int>()); } }
public static IList <int> Parse(string query, HashSet <int> universalSet, WordsVector wordsVector) { var PostfixQuery = InfixToPostfixConvertor.Convert(query); Stack <HashSet <int> > stack = new Stack <HashSet <int> >(); foreach (var item in PostfixQuery) { if (item == "!") { var operand = stack.Pop(); var remaining = new HashSet <int>(universalSet); remaining.ExceptWith(operand); stack.Push(remaining); } else if (item == "&") { var operand1 = stack.Pop(); var operand2 = stack.Pop(); operand1.IntersectWith(operand2); stack.Push(operand1); } else if (item == "|") { var operand1 = stack.Pop(); var operand2 = stack.Pop(); operand1.UnionWith(operand2); stack.Push(operand1); } else { try { var documents = wordsVector.GetVector()[item].GetDocuments(); stack.Push(documents); } catch { stack.Push(new HashSet <int>()); } } } var answer = stack.Pop().ToList(); answer.Sort(); return(answer); }