/// <summary> /// Creates an <see cref="T:IDictionary{string, WeightedSpanTerm}"/> from the given <see cref="Query"/> and <see cref="Analysis.TokenStream"/>. Uses a supplied /// <see cref="IndexReader"/> to properly Weight terms (for gradient highlighting). /// </summary> /// <param name="query"><see cref="Query"/> that caused hit</param> /// <param name="tokenStream"><see cref="Analysis.TokenStream"/> of text to be highlighted</param> /// <param name="fieldName">restricts Term's used based on field name</param> /// <param name="reader">to use for scoring</param> /// <returns>Map of <see cref="WeightedSpanTerm"/>s with quasi tf/idf scores</returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public virtual IDictionary <string, WeightedSpanTerm> GetWeightedSpanTermsWithScores( Query query, TokenStream tokenStream, string fieldName, IndexReader reader) { this.fieldName = fieldName?.Intern(); this.tokenStream = tokenStream; IDictionary <string, WeightedSpanTerm> terms = new PositionCheckingMap <string>(); Extract(query, terms); int totalNumDocs = reader.MaxDoc; var weightedTerms = terms.Keys; try { foreach (var wt in weightedTerms) { terms.TryGetValue(wt, out WeightedSpanTerm weightedSpanTerm); int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term)); // IDF algorithm taken from DefaultSimilarity class float idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0); weightedSpanTerm.Weight *= idf; } } finally { IOUtils.Dispose(internalReader); } return(terms); }
/// <summary> /// Creates an <see cref="T:IDictionary{string, WeightedSpanTerm}"/> from the given <see cref="Query"/> and <see cref="Analysis.TokenStream"/>. /// </summary> /// <param name="query"><see cref="Query"/> that caused hit</param> /// <param name="tokenStream"><see cref="Analysis.TokenStream"/> of text to be highlighted</param> /// <param name="fieldName">restricts Term's used based on field name</param> /// <returns>Map containing <see cref="WeightedSpanTerm"/>s</returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public virtual IDictionary <string, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream, string fieldName) { if (fieldName != null) { this.fieldName = fieldName.Intern(); } else { this.fieldName = null; } IDictionary <string, WeightedSpanTerm> terms = new PositionCheckingMap <string>(); this.tokenStream = tokenStream; try { Extract(query, terms); } finally { IOUtils.Dispose(internalReader); } return(terms); }
/// <summary> /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. /// </summary> /// <param name="query">query that caused hit</param> /// <param name="tokenStream">tokenStream of text to be highlighted</param> /// <param name="fieldName">restricts Term's used based on field name</param> /// <returns>Map containing WeightedSpanTerms</returns> public IDictionary <String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream, String fieldName) { if (fieldName != null) { this.fieldName = StringHelper.Intern(fieldName); } else { this.fieldName = null; } IDictionary <String, WeightedSpanTerm> terms = new PositionCheckingMap <String>(); this.tokenStream = tokenStream; try { Extract(query, terms); } finally { CloseReaders(); } return(terms); }
/// <summary> /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. Uses a supplied /// <c>IndexReader</c> to properly Weight terms (for gradient highlighting). /// </summary> /// <param name="query">Query that caused hit</param> /// <param name="tokenStream">Tokenstream of text to be highlighted</param> /// <param name="fieldName">restricts Term's used based on field name</param> /// <param name="reader">to use for scoring</param> /// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns> public IDictionary <String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName, IndexReader reader) { if (fieldName != null) { this.fieldName = StringHelper.Intern(fieldName); } else { this.fieldName = null; } this.tokenStream = tokenStream; IDictionary <String, WeightedSpanTerm> terms = new PositionCheckingMap <String>(); Extract(query, terms); int totalNumDocs = reader.NumDocs(); var weightedTerms = terms.Keys; try { foreach (var wt in weightedTerms) { WeightedSpanTerm weightedSpanTerm = terms[wt]; int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term)); // docFreq counts deletes if (totalNumDocs < docFreq) { docFreq = totalNumDocs; } // IDF algorithm taken from DefaultSimilarity class float idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0); weightedSpanTerm.Weight *= idf; } } finally { CloseReaders(); } return(terms); }