Exemplo n.º 1
0
        private static List <int> DocumentsOccuredInQuery(WordsVector wordsVector, Vector queryVector)
        {
            List <int> documents = new List <int>();

            foreach (var pair in queryVector.Dictionary)
            {
                var key = pair.Key;
                if (wordsVector.GetVector().ContainsKey(key))
                {
                    var newDocuments = wordsVector.GetVector()[key].GetDocuments();
                    documents = AddDocuments(documents, newDocuments);
                }
            }
            return(documents);
        }
Exemplo n.º 2
0
        public static IList <int> Parse(string query, HashSet <int> universalSet, WordsVector wordsVector)
        {
            var PostfixQuery = InfixToPostfixConvertor.Convert(query);

            Stack <HashSet <int> > stack = new Stack <HashSet <int> >();

            foreach (var item in PostfixQuery)
            {
                if (item == "!")
                {
                    var operand = stack.Pop();

                    var remaining = new HashSet <int>(universalSet);
                    remaining.ExceptWith(operand);
                    stack.Push(remaining);
                }
                else if (item == "&")
                {
                    var operand1 = stack.Pop();
                    var operand2 = stack.Pop();
                    operand1.IntersectWith(operand2);
                    stack.Push(operand1);
                }
                else if (item == "|")
                {
                    var operand1 = stack.Pop();
                    var operand2 = stack.Pop();
                    operand1.UnionWith(operand2);
                    stack.Push(operand1);
                }
                else
                {
                    try
                    {
                        var documents = wordsVector.GetVector()[item].GetDocuments();
                        stack.Push(documents);
                    } catch
                    {
                        stack.Push(new HashSet <int>());
                    }
                }
            }
            var answer = stack.Pop().ToList();

            answer.Sort();
            return(answer);
        }
Exemplo n.º 3
0
        public static IList <int> Parse(string query, WordsVector wordsVector, DocumentsVector documentsVector)
        {
            var query_tfidf = ComputeQueryTfIdfVector(query, wordsVector, documentsVector.DocumentsIndex().Count);

            var documentsIndex = DocumentsOccuredInQuery(wordsVector, query_tfidf);

            var similarities = new Dictionary <int, double>();

            foreach (var index in documentsIndex)
            {
                var similarity = ComputeCosineSimilarity(
                    documentsVector.GetVectorOfDocumentIndex(index),
                    query_tfidf);
                similarities.Add(index, similarity);
            }
            return(SortTheDictionaryOnValue(similarities));
        }
Exemplo n.º 4
0
        private static Vector ComputeQueryTfIdfVector(string query, WordsVector wordsVector, int totalDocuments)
        {
            Vector vector = new Vector(query);

            foreach (var pair in vector.Dictionary)
            {
                var    key = pair.Key;
                double inverseDocumentFrequency = 0;
                if (wordsVector.GetVector().ContainsKey(key))
                {
                    var documentFrequency = wordsVector.GetVector()[key].DocumentCount;
                    inverseDocumentFrequency = Math.Log10(documentFrequency) / totalDocuments;
                }
                vector.Dictionary[key].Count *= inverseDocumentFrequency;
            }

            return(vector);
        }
Exemplo n.º 5
0
        public static IList <int> Parse(string query, WordsVector wordsVector)
        {
            var ProximityQueryRegex = new Regex(@"^(\w+)\s(\w+)\s*/\s*(\d+)$");
            var groups       = ProximityQueryRegex.Match(query).Groups;
            var operand1     = groups[1].Value;
            var operand2     = groups[2].Value;
            var displacement = Convert.ToInt32(groups[3].Value);

            List <int> answer = new List <int>();

            try
            {
                var document1 = wordsVector.GetVector()[operand1];
                var document2 = wordsVector.GetVector()[operand2];

                var commonDocuments = document1.GetDocuments();
                commonDocuments.IntersectWith(document2.GetDocuments());

                foreach (int doc in commonDocuments)
                {
                    var list1 = document1.DocumentsDictionary[doc].Positions;
                    var list2 = document2.DocumentsDictionary[doc].Positions;

                    foreach (var position in list1)
                    {
                        if (list2.Contains(position + displacement) || list2.Contains(position - displacement))
                        {
                            answer.Add(doc);
                            break;
                        }
                    }
                }
                answer.Sort();
                return(answer);
            }
            catch
            {
                return(new List <int>());
            }
        }
Exemplo n.º 6
0
        public IList <int> Parse(string _query, WordsVector wordsVector, DocumentsVector documentsVector, HashSet <int> universalSet)
        {
            var query = Preprocessor.GetInstance().PreprocessQuery(_query);

            if (ProximityQueryRegex.IsMatch(query))
            {
                return(ProximityQuery.Parse(query, wordsVector));
            }
            else if (GeneralTextQueryRegex.IsMatch(query))
            {
                return(GeneralTextQueryParser.Parse(query, wordsVector, documentsVector));
            }
            else if (BooleanQueryRegex.IsMatch(query))
            {
                return(BooleanQueryParser.Parse(query, universalSet, wordsVector));
            }


            else
            {
                throw new Exception("Invalid Query");
            }
        }
Exemplo n.º 7
0
        public void test()
        {
            string[] sentences = { "Articles Articles the English.",
                                   "Articles in the Arabic language.",
                                   "I love Playing cricket." };
            var      expected = new WordsVector();

            for (int i = 0; i < 3; i++)
            {
                var vector = new Vector(sentences[i]);
                expected.Update(i, vector);
            }

            Dictionary <string, WordDocuments> WV_dictionary = new Dictionary <string, WordDocuments>();

            var index = new Index();

            index.Word = Preprocessor.GetInstance().Preprocess("Articles");
            index.AddOccurrence(0); index.AddOccurrence(1);
            var wordDocument = new WordDocuments(0, index);

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("Articles");
            index.AddOccurrence(0);
            wordDocument.Update(1, index);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("Articles"), wordDocument);

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("English");
            index.AddOccurrence(3);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("English"), new WordDocuments(0, index));

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("Arabic");
            index.AddOccurrence(3);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("Arabic"), new WordDocuments(1, index));

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("language");
            index.AddOccurrence(4);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("language"), new WordDocuments(1, index));

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("love");
            index.AddOccurrence(1);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("love"), new WordDocuments(2, index));

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("Playing");
            index.AddOccurrence(2);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("Playing"), new WordDocuments(2, index));

            index      = new Index();
            index.Word = Preprocessor.GetInstance().Preprocess("cricket");
            index.AddOccurrence(3);
            WV_dictionary.Add(Preprocessor.GetInstance().Preprocess("cricket"), new WordDocuments(2, index));

            var actual = new WordsVector(WV_dictionary);

            Assert.AreEqual(expected, actual);
        }
Exemplo n.º 8
0
 private DataStorage()
 {
     FYP_Data        = new Dictionary <int, FYPSearchModel>();
     wordsVector     = new WordsVector();
     documentsVector = new DocumentsVector();
 }