Esempio n. 1
0
        /// <summary>
        ///     Add a tokenized line to the full-text index
        /// </summary>
        /// <param name="line"></param>
        /// <param name="lineIndex"></param>
        /// <param name="primaryKey"></param>
        private void IndexLine(TokenizedLine line, int lineIndex, KeyValue primaryKey)
        {
            var pointer = new LinePointer(lineIndex, primaryKey);

            foreach (var token in line.Tokens)
            {
                var tooFrequentToken = false;

                if (!PositionsByToken.TryGetValue(token, out var positions))
                {
                    positions = new HashSet <LinePointer>();
                    PositionsByToken[token] = positions;
                }
                else
                {
                    if (positions.Count == 0)
                    {
                        tooFrequentToken = true;
                    }
                }

                if (!tooFrequentToken)
                {
                    if (positions.Add(pointer))
                    {
                        Entries = Entries + 1;

                        AddToSecondaryIndex(pointer);
                    }
                }
            }


            // Remove the most frequent (less discriminant) tokens in the index if the index is too big
            // Limit the entries in the index: try to limit to MaxCapacity but without removing more than MaxTokensToIgnore tokens
            if (NeedsCleanup())
            {
                string mostFrequentToken = null;

                var maxFrequency = 0;

                foreach (var p in PositionsByToken)
                {
                    if (p.Value.Count > maxFrequency)
                    {
                        mostFrequentToken = p.Key;
                        maxFrequency      = p.Value.Count;
                    }
                }

                Debug.Assert(mostFrequentToken != null);

                IgnoreToken(mostFrequentToken);


                Entries = Entries - maxFrequency;

                IgnoredTokens++;
            }
        }
Esempio n. 2
0
        /// <summary>
        ///     The secondary index is used when a document is deleted or updated
        /// </summary>
        /// <param name="pointer"></param>
        private void AddToSecondaryIndex(LinePointer pointer)
        {
            if (!PositionsByDocument.TryGetValue(pointer.PrimaryKey, out var list))
            {
                list = new List <LinePointer>();
                PositionsByDocument.Add(pointer.PrimaryKey, list);
            }

            list.Add(pointer);
        }
Esempio n. 3
0
            public double this[LinePointer pointer]
            {
                get
                {
                    if (_scoreByPointer.TryGetValue(pointer, out var score))
                    {
                        return(score);
                    }

                    return(0);
                }

                set => _scoreByPointer[pointer] = value;
Esempio n. 4
0
 private bool Equals(LinePointer other)
 {
     return(Line == other.Line && PrimaryKey == other.PrimaryKey && Deleted == other.Deleted);
 }