Exemplo n.º 1
0
        private static List <int> DocumentsOccuredInQuery(WordsVector wordsVector, Vector queryVector)
        {
            List <int> documents = new List <int>();

            foreach (var pair in queryVector.Dictionary)
            {
                var key = pair.Key;
                if (wordsVector.GetVector().ContainsKey(key))
                {
                    var newDocuments = wordsVector.GetVector()[key].GetDocuments();
                    documents = AddDocuments(documents, newDocuments);
                }
            }
            return(documents);
        }
Exemplo n.º 2
0
        private static Vector ComputeQueryTfIdfVector(string query, WordsVector wordsVector, int totalDocuments)
        {
            Vector vector = new Vector(query);

            foreach (var pair in vector.Dictionary)
            {
                var    key = pair.Key;
                double inverseDocumentFrequency = 0;
                if (wordsVector.GetVector().ContainsKey(key))
                {
                    var documentFrequency = wordsVector.GetVector()[key].DocumentCount;
                    inverseDocumentFrequency = Math.Log10(documentFrequency) / totalDocuments;
                }
                vector.Dictionary[key].Count *= inverseDocumentFrequency;
            }

            return(vector);
        }
Exemplo n.º 3
0
        public static IList <int> Parse(string query, WordsVector wordsVector)
        {
            var ProximityQueryRegex = new Regex(@"^(\w+)\s(\w+)\s*/\s*(\d+)$");
            var groups       = ProximityQueryRegex.Match(query).Groups;
            var operand1     = groups[1].Value;
            var operand2     = groups[2].Value;
            var displacement = Convert.ToInt32(groups[3].Value);

            List <int> answer = new List <int>();

            try
            {
                var document1 = wordsVector.GetVector()[operand1];
                var document2 = wordsVector.GetVector()[operand2];

                var commonDocuments = document1.GetDocuments();
                commonDocuments.IntersectWith(document2.GetDocuments());

                foreach (int doc in commonDocuments)
                {
                    var list1 = document1.DocumentsDictionary[doc].Positions;
                    var list2 = document2.DocumentsDictionary[doc].Positions;

                    foreach (var position in list1)
                    {
                        if (list2.Contains(position + displacement) || list2.Contains(position - displacement))
                        {
                            answer.Add(doc);
                            break;
                        }
                    }
                }
                answer.Sort();
                return(answer);
            }
            catch
            {
                return(new List <int>());
            }
        }
Exemplo n.º 4
0
        public static IList <int> Parse(string query, HashSet <int> universalSet, WordsVector wordsVector)
        {
            var PostfixQuery = InfixToPostfixConvertor.Convert(query);

            Stack <HashSet <int> > stack = new Stack <HashSet <int> >();

            foreach (var item in PostfixQuery)
            {
                if (item == "!")
                {
                    var operand = stack.Pop();

                    var remaining = new HashSet <int>(universalSet);
                    remaining.ExceptWith(operand);
                    stack.Push(remaining);
                }
                else if (item == "&")
                {
                    var operand1 = stack.Pop();
                    var operand2 = stack.Pop();
                    operand1.IntersectWith(operand2);
                    stack.Push(operand1);
                }
                else if (item == "|")
                {
                    var operand1 = stack.Pop();
                    var operand2 = stack.Pop();
                    operand1.UnionWith(operand2);
                    stack.Push(operand1);
                }
                else
                {
                    try
                    {
                        var documents = wordsVector.GetVector()[item].GetDocuments();
                        stack.Push(documents);
                    } catch
                    {
                        stack.Push(new HashSet <int>());
                    }
                }
            }
            var answer = stack.Pop().ToList();

            answer.Sort();
            return(answer);
        }