public WeightedSpanTerm this[K key] { get { return(wrapped[key]); } set { WeightedSpanTerm prev = null; wrapped.TryGetValue(key, out prev); wrapped[key] = value; if (prev == null) { return; } WeightedSpanTerm prevTerm = prev; WeightedSpanTerm newTerm = value; if (!prevTerm.IsPositionSensitive) { newTerm.IsPositionSensitive = false; } } }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from /// the supplied <see cref="Search.Spans.SpanQuery"/>. /// </summary> /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param> /// <param name="query"><see cref="Query"/> to extract Terms from</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> protected virtual void ExtractWeightedTerms(IDictionary <string, WeightedSpanTerm> terms, Query query) { var nonWeightedTerms = new JCG.HashSet <Term>(); query.ExtractTerms(nonWeightedTerms); foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparer(queryTerm.Field)) { WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text); terms[queryTerm.Text] = weightedSpanTerm; } } }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from /// the supplied <see cref="Search.Spans.SpanQuery"/>. /// </summary> /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param> /// <param name="query"><see cref="Query"/> to extract Terms from</param> /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception> protected virtual void ExtractWeightedTerms(IDictionary <string, WeightedSpanTerm> terms, Query query) { var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet <Term>(); query.ExtractTerms(nonWeightedTerms); foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparer(queryTerm.Field)) { WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text()); terms[queryTerm.Text()] = weightedSpanTerm; } } }
public override void Add(K key, WeightedSpanTerm value) { base.Add(key, value); WeightedSpanTerm prev = this[key]; if (prev == null) { return; } WeightedSpanTerm prevTerm = prev; WeightedSpanTerm newTerm = value; if (!prevTerm.IsPositionSensitive()) { newTerm.SetPositionSensitive(false); } }
/// <summary> /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. Uses a supplied /// <c>IndexReader</c> to properly Weight terms (for gradient highlighting). /// </summary> /// <param name="query">Query that caused hit</param> /// <param name="tokenStream">Tokenstream of text to be highlighted</param> /// <param name="fieldName">restricts Term's used based on field name</param> /// <param name="reader">to use for scoring</param> /// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns> public IDictionary <String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName, IndexReader reader) { if (fieldName != null) { this.fieldName = StringHelper.Intern(fieldName); } else { this.fieldName = null; } this.tokenStream = tokenStream; IDictionary <String, WeightedSpanTerm> terms = new PositionCheckingMap <String>(); Extract(query, terms); int totalNumDocs = reader.NumDocs(); var weightedTerms = terms.Keys; try { foreach (var wt in weightedTerms) { WeightedSpanTerm weightedSpanTerm = terms[wt]; int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term)); // docFreq counts deletes if (totalNumDocs < docFreq) { docFreq = totalNumDocs; } // IDF algorithm taken from DefaultSimilarity class float idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0); weightedSpanTerm.Weight *= idf; } } finally { CloseReaders(); } return(terms); }
/// <summary> /// Constructs a new <see cref="QueryScorer"/> instance /// </summary> /// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param> public QueryScorer(WeightedSpanTerm[] weightedTerms) { this.fieldWeightedSpanTerms = new HashMap <string, WeightedSpanTerm>(weightedTerms.Length); foreach (WeightedSpanTerm t in weightedTerms) { WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term]; if ((existingTerm == null) || (existingTerm.Weight < t.Weight)) { // if a term is defined more than once, always use the highest // scoring Weight fieldWeightedSpanTerms[t.Term] = t; maxTermWeight = Math.Max(maxTermWeight, t.Weight); } } skipInitExtractor = true; }
/// <seealso cref="IFragmenter.IsNewFragment()"/> public virtual bool IsNewFragment() { position += posIncAtt.PositionIncrement; if (waitForPos == position) { waitForPos = -1; } else if (waitForPos != -1) { return(false); } WeightedSpanTerm wSpanTerm = queryScorer.GetWeightedSpanTerm(termAtt.ToString()); if (wSpanTerm != null) { IList <PositionSpan> positionSpans = wSpanTerm.PositionSpans; for (int i = 0; i < positionSpans.Count; i++) { if (positionSpans[i].Start == position) { waitForPos = positionSpans[i].End + 1; break; } } } bool isNewFrag = offsetAtt.EndOffset >= (fragmentSize * currentNumFrags) && (textSize - offsetAtt.EndOffset) >= (int)((uint)fragmentSize >> 1); if (isNewFrag) { currentNumFrags++; } return(isNewFrag); }
/// <summary> /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <see cref="SpanQuery"/>. /// </summary> /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param> /// <param name="spanQuery"><see cref="SpanQuery"/> to extract Terms from</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> protected virtual void ExtractWeightedSpanTerms(IDictionary <string, WeightedSpanTerm> terms, SpanQuery spanQuery) { ISet <string> fieldNames; if (fieldName == null) { fieldNames = new JCG.HashSet <string>(); CollectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames = new JCG.HashSet <string> { fieldName }; } // To support the use of the default field name if (defaultField != null) { fieldNames.Add(defaultField); } IDictionary <string, SpanQuery> queries = new JCG.Dictionary <string, SpanQuery>(); var nonWeightedTerms = new JCG.HashSet <Term>(); bool mustRewriteQuery = MustRewriteQuery(spanQuery); if (mustRewriteQuery) { foreach (string field in fieldNames) { SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetLeafContext().Reader); queries[field] = rewrittenQuery; rewrittenQuery.ExtractTerms(nonWeightedTerms); } } else { spanQuery.ExtractTerms(nonWeightedTerms); } List <PositionSpan> spanPositions = new List <PositionSpan>(); foreach (string field in fieldNames) { SpanQuery q; q = mustRewriteQuery ? queries[field] : spanQuery; AtomicReaderContext context = GetLeafContext(); var termContexts = new JCG.Dictionary <Term, TermContext>(); ISet <Term> extractedTerms = new JCG.SortedSet <Term>(); q.ExtractTerms(extractedTerms); foreach (Term term in extractedTerms) { termContexts[term] = TermContext.Build(context, term); } IBits acceptDocs = context.AtomicReader.LiveDocs; Spans.Spans spans = q.GetSpans(context, acceptDocs, termContexts); // collect span positions while (spans.MoveNext()) { spanPositions.Add(new PositionSpan(spans.Start, spans.End - 1)); } } if (spanPositions.Count == 0) { // no spans found return; } foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparer(queryTerm.Field)) { if (!terms.TryGetValue(queryTerm.Text, out WeightedSpanTerm weightedSpanTerm) || weightedSpanTerm == null) { weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text); weightedSpanTerm.AddPositionSpans(spanPositions); weightedSpanTerm.IsPositionSensitive = true; terms[queryTerm.Text] = weightedSpanTerm; } else { if (spanPositions.Count > 0) { weightedSpanTerm.AddPositionSpans(spanPositions); } } } } }
public bool TryGetValue(K key, out WeightedSpanTerm value) { return(wrapped.TryGetValue(key, out value)); }
public void Add(K key, WeightedSpanTerm value) { this[key] = value; }
/// <summary> /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>SpanQuery</c>. /// </summary> /// <param name="terms">Map to place created WeightedSpanTerms in</param> /// <param name="spanQuery">SpanQuery to extract Terms from</param> private void ExtractWeightedSpanTerms(IDictionary <String, WeightedSpanTerm> terms, SpanQuery spanQuery) { HashSet <String> fieldNames; if (fieldName == null) { fieldNames = new HashSet <String>(); CollectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames = new HashSet <String>(); fieldNames.Add(fieldName); } // To support the use of the default field name if (defaultField != null) { fieldNames.Add(defaultField); } IDictionary <String, SpanQuery> queries = new HashMap <String, SpanQuery>(); var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet <Term>(); bool mustRewriteQuery = MustRewriteQuery(spanQuery); if (mustRewriteQuery) { foreach (String field in fieldNames) { SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetReaderForField(field)); queries[field] = rewrittenQuery; rewrittenQuery.ExtractTerms(nonWeightedTerms); } } else { spanQuery.ExtractTerms(nonWeightedTerms); } List <PositionSpan> spanPositions = new List <PositionSpan>(); foreach (String field in fieldNames) { IndexReader reader = GetReaderForField(field); Spans.Spans spans; if (mustRewriteQuery) { spans = queries[field].GetSpans(reader); } else { spans = spanQuery.GetSpans(reader); } // collect span positions while (spans.Next()) { spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1)); } } if (spanPositions.Count == 0) { // no spans found return; } foreach (Term queryTerm in nonWeightedTerms) { if (FieldNameComparator(queryTerm.Field)) { WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text]; if (weightedSpanTerm == null) { weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text); weightedSpanTerm.AddPositionSpans(spanPositions); weightedSpanTerm.SetPositionSensitive(true); terms[queryTerm.Text] = weightedSpanTerm; } else { if (spanPositions.Count > 0) { weightedSpanTerm.AddPositionSpans(spanPositions); } } } } }
/// <summary> /// Constructs a new QueryScorer instance /// </summary> /// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param> public QueryScorer(WeightedSpanTerm[] weightedTerms) { this.fieldWeightedSpanTerms = new HashMap<String, WeightedSpanTerm>(weightedTerms.Length); foreach (WeightedSpanTerm t in weightedTerms) { WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term]; if ((existingTerm == null) || (existingTerm.Weight < t.Weight)) { // if a term is defined more than once, always use the highest // scoring Weight fieldWeightedSpanTerms[t.Term] = t; maxTermWeight = Math.Max(maxTermWeight, t.Weight); } } skipInitExtractor = true; }