/// <summary> /// Add a tokenized line to the full-text index /// </summary> /// <param name="line"></param> /// <param name="lineIndex"></param> /// <param name="primaryKey"></param> private void IndexLine(TokenizedLine line, int lineIndex, KeyValue primaryKey) { var pointer = new LinePointer(lineIndex, primaryKey); foreach (var token in line.Tokens) { var tooFrequentToken = false; if (!PositionsByToken.TryGetValue(token, out var positions)) { positions = new HashSet <LinePointer>(); PositionsByToken[token] = positions; } else { if (positions.Count == 0) { tooFrequentToken = true; } } if (!tooFrequentToken) { if (positions.Add(pointer)) { Entries = Entries + 1; AddToSecondaryIndex(pointer); } } } // Remove the most frequent (less discriminant) tokens in the index if the index is too big // Limit the entries in the index: try to limit to MaxCapacity but without removing more than MaxTokensToIgnore tokens if (NeedsCleanup()) { string mostFrequentToken = null; var maxFrequency = 0; foreach (var p in PositionsByToken) { if (p.Value.Count > maxFrequency) { mostFrequentToken = p.Key; maxFrequency = p.Value.Count; } } Debug.Assert(mostFrequentToken != null); IgnoreToken(mostFrequentToken); Entries = Entries - maxFrequency; IgnoredTokens++; } }
/// <summary> /// The secondary index is used when a document is deleted or updated /// </summary> /// <param name="pointer"></param> private void AddToSecondaryIndex(LinePointer pointer) { if (!PositionsByDocument.TryGetValue(pointer.PrimaryKey, out var list)) { list = new List <LinePointer>(); PositionsByDocument.Add(pointer.PrimaryKey, list); } list.Add(pointer); }
public double this[LinePointer pointer] { get { if (_scoreByPointer.TryGetValue(pointer, out var score)) { return(score); } return(0); } set => _scoreByPointer[pointer] = value;
private bool Equals(LinePointer other) { return(Line == other.Line && PrimaryKey == other.PrimaryKey && Deleted == other.Deleted); }