Пример #1
0
        override public SuggestionResult[] GetSuggestions(string query)
        {
            // to bigrams
            var bigrams    = GetBigrams(query);
            var aggregated = new Dictionary <int, SuggestionResult>();

            foreach (var gram in bigrams)
            {
                var gramLabel = gram.Key;
                // Console.WriteLine("label: " + gramLabel);
                HashSet <int> docs;
                if (bigramToSuggestions.TryGetValue(gramLabel, out docs))
                {
                    foreach (var docId in docs)
                    {
                        SuggestionResult tempSuggestionResult;
                        // add to aggregated result
                        if (!aggregated.TryGetValue(docId, out tempSuggestionResult))
                        {
                            aggregated[docId] = new SuggestionResult(documents[docId], 1);
                        }
                        else
                        {
                            tempSuggestionResult.score++;
                        }
                        continue;
                    }
                }
            }
            return(aggregated.Values.OrderBy(x => - x.score).ToArray());
        }
Пример #2
0
        public SuggestionResult[] SuggestToken(string pollutedToken, int limit)
        {
            // naive method
            var grams  = GetBigrams(pollutedToken);
            var result = new Dictionary <int, SuggestionResult>();

            foreach (var gram in grams)
            {
                SuggestionResult suggest;
                HashSet <int>    tokenIndexes;
                if (!bigrams.TryGetValue(gram.Key, out tokenIndexes))
                {
                    continue;
                }
                foreach (var tokenId in tokenIndexes)
                {
                    // add candidate words to dict
                    if (!result.TryGetValue(tokenId, out suggest))
                    {
                        result[tokenId] = new SuggestionResult(invertedDict[tokenId], 1);
                    }
                    else
                    {
                        suggest.score++;
                    }
                }
            }
            return(result.Values.OrderBy(x => - x.score).Take(limit).ToArray());
        }
Пример #3
0
        override public SuggestionResult[] GetSuggestions(string query)
        {
            query = query.Trim();
            // tokenizer
            var tokens = query.Split(' ').ToList();
            var result = new Dictionary <int, SuggestionResult>();

            for (int i = 0; i < tokens.Count; ++i)
            {
                int tokenIndex;
                if (dict.TryGetValue(tokens[i], out tokenIndex))
                {
                    foreach (var candidate in invertedIndex[tokenIndex])
                    {
                        SuggestionResult suggestion;
                        if (!result.TryGetValue(candidate, out suggestion))
                        {
                            result[candidate] = new SuggestionResult(idToSuggestion[candidate].content, 1);
                        }
                        else
                        {
                            suggestion.score++;
                        }

                        // if (result.Count >= 10) {
                        //     Console.WriteLine("Break due to limit exceeding");
                        //     break;
                        // }
                    }

                    continue;
                }

                // else go for a suggestion for 3 small word:
                var suggestions = SuggestToken(tokens[i], 3);
                for (int j = 0; j < suggestions.Length; ++j)
                {
                    tokens.Add(suggestions[j].value);
                }
            }
            return(result.Values.OrderBy(x => - x.score).Take(10).ToArray());
        }
Пример #4
0
        public SuggestionResult[] GetFastSuggestions(string query, int tolerance = 100)
        {
            // tokenizer
            var tokens = query.Split(' ').ToList();
            var result = new Dictionary <int, SuggestionResult>();

            for (int i = 0; i < tokens.Count; ++i)
            {
                int tokenIndex;
                if (dict.TryGetValue(tokens[i], out tokenIndex))
                {
                    foreach (var candidate in invertedIndex[tokenIndex])
                    {
                        SuggestionResult suggestion;
                        if (!result.TryGetValue(candidate, out suggestion))
                        {
                            result[candidate] = new SuggestionResult(idToSuggestion[candidate].content, 1);
                        }
                        else
                        {
                            suggestion.score++;
                        }
                    }

                    continue;
                }

                var watch = new Stopwatch();
                watch.Start();
                // else go for a suggestion for matches small words:
                var suggestions = SuggestToken(tokens[i], 2);
                watch.Stop();
                Console.WriteLine("Suggestion ellapsed time: " + watch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms");
                for (int j = 0; j < suggestions.Length; ++j)
                {
                    tokens.Add(suggestions[j].value);
                }
            }
            return(result.Values.OrderBy(x => - x.score).Take(10).ToArray());
        }