internal IEnumerable <Score> GetTopScores(Func <int, bool> tokenFilter, bool includeSpeculative)
        {
            /// <summary>
            /// Tokens already found and returned.
            /// </summary>
            HashSet <int> _foundTokens = new HashSet <int>();

            bool IsNewToken(int token)
            {
                var value = tokenFilter(token);

                if (value && !_foundTokens.Contains(token))
                {
                    _foundTokens.Add(token);
                }
                else
                {
                    value = false;
                }

                return(value);
            }

            // Produce scores with three or more elements - those that may not entirely be ordered
            // by their final ordinal.
            for (var index = _contextDatabases.Length - 1; 1 <= index; index--)
            {
                var values   = new int[1 + 1 + index];
                var groupSet = new SortedSet <Score>(ScoreReverseComparer.Instance);
                var group    = int.MaxValue;
                foreach (var info in _contextDatabases[index].SortedEnumerable)
                {
                    var token = info.Token;

                    if (IsNewToken(token))
                    {
                        values[0]         = token;
                        values[1 + index] = info.Count;

                        for (var inbetweenIndex = 0; inbetweenIndex < index; inbetweenIndex++)
                        {
                            if (_contextDatabases[inbetweenIndex].TryGetValue(token, out var inbetweenInfo))
                            {
                                values[1 + inbetweenIndex] = inbetweenInfo.Count;
                            }
                            else
                            {
                                Debug.Fail("Cannot not find an inbetween");
                                values[1 + inbetweenIndex] = 0;
                            }
                        }

                        if (group != info.Count)
                        {
                            foreach (var s in groupSet)
                            {
                                yield return(s);
                            }
                            groupSet.Clear();

                            Debug.Assert(info.Count < group);
                            group = info.Count;
                        }

                        var score = new Score(values);
                        groupSet.Add(score);
                        values = new int[1 + 1 + index];
                    }
                }

                foreach (var s in groupSet)
                {
                    yield return(s);
                }
            }

            if (includeSpeculative)
            {
                // Produce scores with two ordinls - those guaranteed to be produced in order.
                {
                    foreach (var info in _contextDatabases[0].SortedEnumerable)
                    {
                        var token = info.Token;

                        if (IsNewToken(token))
                        {
                            var score = new Score(token, info.Count);

                            yield return(score);
                        }
                    }
                }

                // As a first fallback produce single ordinal results.
                var tokens = _source.GetTokens();

                using (var enumerator = tokens.GetEnumerator())
                {
                    while (enumerator.MoveNext())
                    {
                        var token = enumerator.Current;

                        if (IsNewToken(token))
                        {
                            var score = new Score(token);

                            yield return(score);
                        }
                    }
                }

                // TODO: We should now perhaps disregard index position constraints and just yield anything.
            }
        }
Beispiel #2
0
        internal IEnumerable <int> GetTopIndices <T>(PredictiveVocabularySource <T> source, ITokenTileFilter filter, int[] context, int minIndex, int limIndex, int count)
            where T : ISuggestionItem
        {
            var toFindCount = count;
            var foundTokens = new HashSet <int>();

            var contextLimit = context.Length;
            var contextStart = Math.Max(0, contextLimit - _width + 1);

            var scanIndex = contextStart;

            while (toFindCount != 0 && scanIndex <= contextLimit)
            {
                var processed = true;
                var database  = _database.GetChild(context, scanIndex, contextLimit - scanIndex);

                if (database != null)
                {
                    var candidates = new List <CandidatePair>();

                    var acceptedMin = int.MinValue;
                    foreach (var pair in database)
                    {
                        var token = pair.Key;

                        if (!foundTokens.Contains(token))
                        {
                            var tokenCount = pair.Value.Count;

                            var index = source.GetTokenIndex(token);
                            if (minIndex <= index && index < limIndex && acceptedMin <= tokenCount)
                            {
                                var candidateLimit = candidates.Count;
                                while (0 < candidateLimit && candidates[candidateLimit - 1].Count < tokenCount)
                                {
                                    candidateLimit--;
                                }

                                candidates.Insert(candidateLimit, new CandidatePair(token, tokenCount));

                                if (toFindCount == candidates.Count)
                                {
                                    acceptedMin = candidates[candidates.Count - 1].Count;
                                }

                                if (toFindCount < candidates.Count &&
                                    candidates[candidates.Count - 1].Count < candidates[toFindCount - 1].Count)
                                {
                                    Debug.Assert(candidates[toFindCount].Count < candidates[toFindCount - 1].Count);
                                    candidates.RemoveRange(toFindCount, candidates.Count - toFindCount);
                                    acceptedMin = candidates[candidates.Count - 1].Count;
                                }
                            }
                        }
                    }

                    var sortableCandidates = new List <int[]>();
                    foreach (var candidate in candidates)
                    {
                        var counts = new int[contextLimit - scanIndex + 2];
                        counts[0] = candidate.Count;
                        counts[contextLimit - scanIndex + 1] = candidate.Token;
                        sortableCandidates.Add(counts);
                    }
                    for (var subIndex = scanIndex + 1; subIndex <= contextLimit; subIndex++)
                    {
                        var dictionary = _database.GetChild(context, subIndex, contextLimit - subIndex);

                        if (dictionary != null)
                        {
                            foreach (var counts in sortableCandidates)
                            {
                                Debug.Assert(counts[subIndex - scanIndex] == 0);

                                if (dictionary.TryGetValue(counts[counts.Length - 1], out var subCount))
                                {
                                    counts[subIndex - scanIndex] = subCount.Count;
                                }
                            }
                        }
                    }
                    sortableCandidates.Sort(RankSort);

                    var sliceCount = Math.Min(sortableCandidates.Count, toFindCount);
                    for (var i = 0; i < sliceCount; i++)
                    {
                        var counts = sortableCandidates[i];
                        var token  = counts[counts.Length - 1];
                        var index  = source.GetTokenIndex(token);

                        foundTokens.Add(token);

                        if (filter.IsTokenVisible(token))
                        {
                            yield return(index);

                            toFindCount--;
                        }
                        else
                        {
                            processed = false;
                        }
                    }
                }

                if (processed)
                {
                    scanIndex++;
                }
            }

            if (0 < toFindCount)
            {
                var tokens = source.GetTokens();

                using (var enumerator = tokens.GetEnumerator())
                {
                    while (0 < toFindCount && enumerator.MoveNext())
                    {
                        var token = enumerator.Current;

                        if (!foundTokens.Contains(token))
                        {
                            var index = source.GetTokenIndex(token);
                            if (minIndex <= index && index < limIndex)
                            {
                                foundTokens.Add(token);

                                if (filter.IsTokenVisible(token))
                                {
                                    yield return(index);

                                    toFindCount--;
                                }
                            }
                        }
                    }
                }
            }

            int RankSort(int[] l, int[] r)
            {
                Debug.Assert(l.Length == r.Length);
                var lim = l.Length;

                var i = 0;

                while (i < lim && l[i] == r[i])
                {
                    i++;
                }
                var comparison = i == lim ? 0 : r[i].CompareTo(l[i]);

                return(comparison);
            }
        }