Beispiel #1
0
        private void PrintTopResults(List <Page> topKPages, string[] userQuery)
        {
            var englishStopWords = StopWord.GetEnglishStopwords();

            foreach (var item in topKPages)
            {
                Console.WriteLine("\n" + item.Url);
                string[] htmlSplit = item.Html.Split(" ");
                htmlSplit = htmlSplit.Except(englishStopWords).ToArray();
                string text = "";

                for (int i = 0; i < htmlSplit.Length; i++)
                {
                    if (userQuery.Contains(htmlSplit[i]))
                    {
                        text += "...";
                        for (int j = i - 4; j < i + 4; j++)
                        {
                            if (j < 0 || j > htmlSplit.Length)
                            {
                                continue;
                            }
                            else
                            {
                                text += htmlSplit[j] + " ";
                            }
                        }
                    }
                }
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine(text + "\n\n");
            }
        }
Beispiel #2
0
        public void HandleUserQuery(string userQuery)
        {
            userQuery = userQuery.ToLower();
            List <string> stopWords                 = StopWord.GetEnglishStopwords();
            string        userQueryNoStopwords      = string.Join(" ", userQuery.Split(" ").Except(stopWords).ToArray());
            List <Page>   pagesContainingQueryTerms = PruneContenders(userQueryNoStopwords);

            CalculateQueryTfIdf(userQueryNoStopwords);
            Dictionary <string, double> normalisedQueryVector = NormaliseQueryVector();

            Dictionary <int, double> cosineScores = CalculateCosineSimilarity(normalisedQueryVector, pagesContainingQueryTerms);
            Dictionary <int, double> pageRanks    = GetPageRank();
            IOrderedEnumerable <KeyValuePair <int, double> > sortedCosineScores     = cosineScores.OrderByDescending(key => key.Value);
            IOrderedEnumerable <KeyValuePair <int, double> > sortedPageRanks        = pageRanks.OrderByDescending(key => key.Value);
            IOrderedEnumerable <KeyValuePair <int, double> > sortedAggregatedScores = AggregateCosineAndPageRank(cosineScores, pageRanks);

            List <Page> topKPagesForCosineScore     = TakeTopKResults(sortedCosineScores, 10);
            List <Page> topKPagesForPageRanks       = TakeTopKResults(sortedPageRanks, 10);
            List <Page> topKPagesForAggregatedScore = TakeTopKResults(sortedAggregatedScores, 10);

            PrintTopResults(topKPagesForCosineScore, userQuery.Split(" "));
            PrintTopResults(topKPagesForPageRanks, userQuery.Split(" "));
            PrintTopResults(topKPagesForAggregatedScore, userQuery.Split(" "));
        }