/// <summary> /// Extracts the weighted terms. /// </summary> /// <param name="terms">The terms.</param> /// <param name="query">The query.</param> private void ExtractWeightedTerms(IDictionary <String, WeightedSpanTerm> terms, Query query) { var nonWeightedTerms = new HashSet <Term>(); query.ExtractTerms(nonWeightedTerms); foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparator(queryTerm.Field)) { WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text); terms[queryTerm.Text] = weightedSpanTerm; } } }
public override void Add(K key, WeightedSpanTerm value) { base.Add(key, value); WeightedSpanTerm prev = this[key]; if (prev == null) { return; } WeightedSpanTerm prevTerm = prev; WeightedSpanTerm newTerm = value; if (!prevTerm.IsPositionSensitive()) { newTerm.SetPositionSensitive(false); } }
/// <summary> /// Initializes a new instance of the <see cref="QueryScorer"/> class. /// </summary> /// <param name="weightedTerms">The weighted terms.</param> public QueryScorer(WeightedSpanTerm[] weightedTerms) { this.fieldWeightedSpanTerms = new HashMap <String, WeightedSpanTerm>(weightedTerms.Length); foreach (WeightedSpanTerm t in weightedTerms) { WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term]; if ((existingTerm == null) || (existingTerm.Weight < t.Weight)) { // if a term is defined more than once, always use the highest // scoring Weight fieldWeightedSpanTerms[t.Term] = t; maxTermWeight = Math.Max(maxTermWeight, t.Weight); } } skipInitExtractor = true; }
/// <summary> /// Gets the weighted span terms with scores. /// </summary> /// <param name="query">The query.</param> /// <param name="tokenStream">The token stream.</param> /// <param name="fieldName">Name of the field.</param> /// <param name="reader">The reader.</param> /// <returns></returns> public IDictionary <String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName, IndexReader reader) { if (fieldName != null) { this.fieldName = StringHelper.Intern(fieldName); } else { this.fieldName = null; } this.tokenStream = tokenStream; IDictionary <String, WeightedSpanTerm> terms = new PositionCheckingMap <String>(); Extract(query, terms); int totalNumDocs = reader.NumDocs(); var weightedTerms = terms.Keys; try { foreach (var wt in weightedTerms) { WeightedSpanTerm weightedSpanTerm = terms[wt]; int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term)); // docFreq counts deletes if (totalNumDocs < docFreq) { docFreq = totalNumDocs; } // IDF algorithm taken from DefaultSimilarity class float idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0); weightedSpanTerm.Weight *= idf; } } finally { CloseReaders(); } return(terms); }
public void TestGetBestSingleFragmentWithWeights() { var helper = new TestHighlightRunner(); helper.TestAction = () => { WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2]; wTerms[0] = new WeightedSpanTerm(10f, "hello"); var positionSpans = new List<PositionSpan> {new PositionSpan(0, 0)}; wTerms[0].AddPositionSpans(positionSpans); wTerms[1] = new WeightedSpanTerm(1f, "kennedy"); positionSpans = new List<PositionSpan> {new PositionSpan(14, 14)}; wTerms[1].AddPositionSpans(positionSpans); Highlighter highlighter = helper.GetHighlighter(wTerms, this); // new // Highlighter(new // QueryTermScorer(wTerms)); TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0])); highlighter.TextFragmenter = new SimpleFragmenter(2); String result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim(); Assert.IsTrue("<B>Hello</B>".Equals(result), "Failed to find best section using weighted terms. Found: [" + result + "]"); // readjust weights wTerms[1].Weight = 50f; tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0])); highlighter = helper.GetHighlighter(wTerms, this); highlighter.TextFragmenter = new SimpleFragmenter(2); result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim(); Assert.IsTrue("<B>kennedy</B>".Equals(result), "Failed to find best section using weighted terms. Found: " + result); }; helper.Start(); }
/// <summary> /// Extracts the weighted span terms. /// </summary> /// <param name="terms">The terms.</param> /// <param name="spanQuery">The span query.</param> private void ExtractWeightedSpanTerms(IDictionary <String, WeightedSpanTerm> terms, SpanQuery spanQuery) { HashSet <String> fieldNames; if (fieldName == null) { fieldNames = new HashSet <String>(); CollectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames = new HashSet <String>(); fieldNames.Add(fieldName); } // To support the use of the default field name if (defaultField != null) { fieldNames.Add(defaultField); } IDictionary <String, SpanQuery> queries = new HashMap <String, SpanQuery>(); var nonWeightedTerms = new HashSet <Term>(); bool mustRewriteQuery = MustRewriteQuery(spanQuery); if (mustRewriteQuery) { foreach (String field in fieldNames) { SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetReaderForField(field)); queries[field] = rewrittenQuery; rewrittenQuery.ExtractTerms(nonWeightedTerms); } } else { spanQuery.ExtractTerms(nonWeightedTerms); } List <PositionSpan> spanPositions = new List <PositionSpan>(); foreach (String field in fieldNames) { IndexReader reader = GetReaderForField(field); Spans spans; if (mustRewriteQuery) { spans = queries[field].GetSpans(reader); } else { spans = spanQuery.GetSpans(reader); } // collect span positions while (spans.Next()) { spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1)); } } if (spanPositions.Count == 0) { // no spans found return; } foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparator(queryTerm.Field)) { WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text]; if (weightedSpanTerm == null) { weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text); weightedSpanTerm.AddPositionSpans(spanPositions); weightedSpanTerm.SetPositionSensitive(true); terms[queryTerm.Text] = weightedSpanTerm; } else { if (spanPositions.Count > 0) { weightedSpanTerm.AddPositionSpans(spanPositions); } } } } }