override public SuggestionResult[] GetSuggestions(string query) { // to bigrams var bigrams = GetBigrams(query); var aggregated = new Dictionary <int, SuggestionResult>(); foreach (var gram in bigrams) { var gramLabel = gram.Key; // Console.WriteLine("label: " + gramLabel); HashSet <int> docs; if (bigramToSuggestions.TryGetValue(gramLabel, out docs)) { foreach (var docId in docs) { SuggestionResult tempSuggestionResult; // add to aggregated result if (!aggregated.TryGetValue(docId, out tempSuggestionResult)) { aggregated[docId] = new SuggestionResult(documents[docId], 1); } else { tempSuggestionResult.score++; } continue; } } } return(aggregated.Values.OrderBy(x => - x.score).ToArray()); }
public SuggestionResult[] SuggestToken(string pollutedToken, int limit) { // naive method var grams = GetBigrams(pollutedToken); var result = new Dictionary <int, SuggestionResult>(); foreach (var gram in grams) { SuggestionResult suggest; HashSet <int> tokenIndexes; if (!bigrams.TryGetValue(gram.Key, out tokenIndexes)) { continue; } foreach (var tokenId in tokenIndexes) { // add candidate words to dict if (!result.TryGetValue(tokenId, out suggest)) { result[tokenId] = new SuggestionResult(invertedDict[tokenId], 1); } else { suggest.score++; } } } return(result.Values.OrderBy(x => - x.score).Take(limit).ToArray()); }
override public SuggestionResult[] GetSuggestions(string query) { query = query.Trim(); // tokenizer var tokens = query.Split(' ').ToList(); var result = new Dictionary <int, SuggestionResult>(); for (int i = 0; i < tokens.Count; ++i) { int tokenIndex; if (dict.TryGetValue(tokens[i], out tokenIndex)) { foreach (var candidate in invertedIndex[tokenIndex]) { SuggestionResult suggestion; if (!result.TryGetValue(candidate, out suggestion)) { result[candidate] = new SuggestionResult(idToSuggestion[candidate].content, 1); } else { suggestion.score++; } // if (result.Count >= 10) { // Console.WriteLine("Break due to limit exceeding"); // break; // } } continue; } // else go for a suggestion for 3 small word: var suggestions = SuggestToken(tokens[i], 3); for (int j = 0; j < suggestions.Length; ++j) { tokens.Add(suggestions[j].value); } } return(result.Values.OrderBy(x => - x.score).Take(10).ToArray()); }
public SuggestionResult[] GetFastSuggestions(string query, int tolerance = 100) { // tokenizer var tokens = query.Split(' ').ToList(); var result = new Dictionary <int, SuggestionResult>(); for (int i = 0; i < tokens.Count; ++i) { int tokenIndex; if (dict.TryGetValue(tokens[i], out tokenIndex)) { foreach (var candidate in invertedIndex[tokenIndex]) { SuggestionResult suggestion; if (!result.TryGetValue(candidate, out suggestion)) { result[candidate] = new SuggestionResult(idToSuggestion[candidate].content, 1); } else { suggestion.score++; } } continue; } var watch = new Stopwatch(); watch.Start(); // else go for a suggestion for matches small words: var suggestions = SuggestToken(tokens[i], 2); watch.Stop(); Console.WriteLine("Suggestion ellapsed time: " + watch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms"); for (int j = 0; j < suggestions.Length; ++j) { tokens.Add(suggestions[j].value); } } return(result.Values.OrderBy(x => - x.score).Take(10).ToArray()); }