private void PrintTopResults(List <Page> topKPages, string[] userQuery) { var englishStopWords = StopWord.GetEnglishStopwords(); foreach (var item in topKPages) { Console.WriteLine("\n" + item.Url); string[] htmlSplit = item.Html.Split(" "); htmlSplit = htmlSplit.Except(englishStopWords).ToArray(); string text = ""; for (int i = 0; i < htmlSplit.Length; i++) { if (userQuery.Contains(htmlSplit[i])) { text += "..."; for (int j = i - 4; j < i + 4; j++) { if (j < 0 || j > htmlSplit.Length) { continue; } else { text += htmlSplit[j] + " "; } } } } Console.WriteLine("-------------------------------------------"); Console.WriteLine(text + "\n\n"); } }
public void HandleUserQuery(string userQuery) { userQuery = userQuery.ToLower(); List <string> stopWords = StopWord.GetEnglishStopwords(); string userQueryNoStopwords = string.Join(" ", userQuery.Split(" ").Except(stopWords).ToArray()); List <Page> pagesContainingQueryTerms = PruneContenders(userQueryNoStopwords); CalculateQueryTfIdf(userQueryNoStopwords); Dictionary <string, double> normalisedQueryVector = NormaliseQueryVector(); Dictionary <int, double> cosineScores = CalculateCosineSimilarity(normalisedQueryVector, pagesContainingQueryTerms); Dictionary <int, double> pageRanks = GetPageRank(); IOrderedEnumerable <KeyValuePair <int, double> > sortedCosineScores = cosineScores.OrderByDescending(key => key.Value); IOrderedEnumerable <KeyValuePair <int, double> > sortedPageRanks = pageRanks.OrderByDescending(key => key.Value); IOrderedEnumerable <KeyValuePair <int, double> > sortedAggregatedScores = AggregateCosineAndPageRank(cosineScores, pageRanks); List <Page> topKPagesForCosineScore = TakeTopKResults(sortedCosineScores, 10); List <Page> topKPagesForPageRanks = TakeTopKResults(sortedPageRanks, 10); List <Page> topKPagesForAggregatedScore = TakeTopKResults(sortedAggregatedScores, 10); PrintTopResults(topKPagesForCosineScore, userQuery.Split(" ")); PrintTopResults(topKPagesForPageRanks, userQuery.Split(" ")); PrintTopResults(topKPagesForAggregatedScore, userQuery.Split(" ")); }