Lightweight class to hold term, Weight, and positions used for scoring this term.
Inheritance: Lucene.Net.Search.Highlight.WeightedTerm
            public WeightedSpanTerm this[K key]
            {
                get
                {
                    return(wrapped[key]);
                }

                set
                {
                    WeightedSpanTerm prev = null;
                    wrapped.TryGetValue(key, out prev);
                    wrapped[key] = value;

                    if (prev == null)
                    {
                        return;
                    }

                    WeightedSpanTerm prevTerm = prev;
                    WeightedSpanTerm newTerm  = value;
                    if (!prevTerm.IsPositionSensitive)
                    {
                        newTerm.IsPositionSensitive = false;
                    }
                }
            }
Beispiel #2
0
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from
        /// the supplied <see cref="Search.Spans.SpanQuery"/>.
        /// </summary>
        /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <param name="query"><see cref="Query"/> to extract Terms from</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void ExtractWeightedTerms(IDictionary <string, WeightedSpanTerm> terms, Query query)
        {
            var nonWeightedTerms = new JCG.HashSet <Term>();

            query.ExtractTerms(nonWeightedTerms);

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparer(queryTerm.Field))
                {
                    WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text);
                    terms[queryTerm.Text] = weightedSpanTerm;
                }
            }
        }
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from
        /// the supplied <see cref="Search.Spans.SpanQuery"/>.
        /// </summary>
        /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <param name="query"><see cref="Query"/> to extract Terms from</param>
        /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception>
        protected virtual void ExtractWeightedTerms(IDictionary <string, WeightedSpanTerm> terms, Query query)
        {
            var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet <Term>();

            query.ExtractTerms(nonWeightedTerms);

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparer(queryTerm.Field))
                {
                    WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text());
                    terms[queryTerm.Text()] = weightedSpanTerm;
                }
            }
        }
            public override void Add(K key, WeightedSpanTerm value)
            {
                base.Add(key, value);
                WeightedSpanTerm prev = this[key];

                if (prev == null)
                {
                    return;
                }

                WeightedSpanTerm prevTerm = prev;
                WeightedSpanTerm newTerm  = value;

                if (!prevTerm.IsPositionSensitive())
                {
                    newTerm.SetPositionSensitive(false);
                }
            }
        /// <summary>
        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. Uses a supplied
        /// <c>IndexReader</c> to properly Weight terms (for gradient highlighting).
        /// </summary>
        /// <param name="query">Query that caused hit</param>
        /// <param name="tokenStream">Tokenstream of text to be highlighted</param>
        /// <param name="fieldName">restricts Term's used based on field name</param>
        /// <param name="reader">to use for scoring</param>
        /// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns>
        public IDictionary <String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream,
                                                                                     String fieldName, IndexReader reader)
        {
            if (fieldName != null)
            {
                this.fieldName = StringHelper.Intern(fieldName);
            }
            else
            {
                this.fieldName = null;
            }
            this.tokenStream = tokenStream;

            IDictionary <String, WeightedSpanTerm> terms = new PositionCheckingMap <String>();

            Extract(query, terms);

            int totalNumDocs  = reader.NumDocs();
            var weightedTerms = terms.Keys;

            try
            {
                foreach (var wt in weightedTerms)
                {
                    WeightedSpanTerm weightedSpanTerm = terms[wt];
                    int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term));
                    // docFreq counts deletes
                    if (totalNumDocs < docFreq)
                    {
                        docFreq = totalNumDocs;
                    }
                    // IDF algorithm taken from DefaultSimilarity class
                    float idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0);
                    weightedSpanTerm.Weight *= idf;
                }
            }
            finally
            {
                CloseReaders();
            }

            return(terms);
        }
Beispiel #6
0
        /// <summary>
        /// Constructs a new <see cref="QueryScorer"/> instance
        /// </summary>
        /// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param>
        public QueryScorer(WeightedSpanTerm[] weightedTerms)
        {
            this.fieldWeightedSpanTerms = new HashMap <string, WeightedSpanTerm>(weightedTerms.Length);

            foreach (WeightedSpanTerm t in weightedTerms)
            {
                WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term];

                if ((existingTerm == null) ||
                    (existingTerm.Weight < t.Weight))
                {
                    // if a term is defined more than once, always use the highest
                    // scoring Weight
                    fieldWeightedSpanTerms[t.Term] = t;
                    maxTermWeight = Math.Max(maxTermWeight, t.Weight);
                }
            }
            skipInitExtractor = true;
        }
Beispiel #7
0
        /// <seealso cref="IFragmenter.IsNewFragment()"/>
        public virtual bool IsNewFragment()
        {
            position += posIncAtt.PositionIncrement;

            if (waitForPos == position)
            {
                waitForPos = -1;
            }
            else if (waitForPos != -1)
            {
                return(false);
            }

            WeightedSpanTerm wSpanTerm = queryScorer.GetWeightedSpanTerm(termAtt.ToString());

            if (wSpanTerm != null)
            {
                IList <PositionSpan> positionSpans = wSpanTerm.PositionSpans;

                for (int i = 0; i < positionSpans.Count; i++)
                {
                    if (positionSpans[i].Start == position)
                    {
                        waitForPos = positionSpans[i].End + 1;
                        break;
                    }
                }
            }

            bool isNewFrag = offsetAtt.EndOffset >= (fragmentSize * currentNumFrags) &&
                             (textSize - offsetAtt.EndOffset) >= (int)((uint)fragmentSize >> 1);


            if (isNewFrag)
            {
                currentNumFrags++;
            }

            return(isNewFrag);
        }
Beispiel #8
0
        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <see cref="SpanQuery"/>.
        /// </summary>
        /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <param name="spanQuery"><see cref="SpanQuery"/> to extract Terms from</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void ExtractWeightedSpanTerms(IDictionary <string, WeightedSpanTerm> terms, SpanQuery spanQuery)
        {
            ISet <string> fieldNames;

            if (fieldName == null)
            {
                fieldNames = new JCG.HashSet <string>();
                CollectSpanQueryFields(spanQuery, fieldNames);
            }
            else
            {
                fieldNames = new JCG.HashSet <string>
                {
                    fieldName
                };
            }
            // To support the use of the default field name
            if (defaultField != null)
            {
                fieldNames.Add(defaultField);
            }

            IDictionary <string, SpanQuery> queries = new JCG.Dictionary <string, SpanQuery>();

            var  nonWeightedTerms = new JCG.HashSet <Term>();
            bool mustRewriteQuery = MustRewriteQuery(spanQuery);

            if (mustRewriteQuery)
            {
                foreach (string field in fieldNames)
                {
                    SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetLeafContext().Reader);
                    queries[field] = rewrittenQuery;
                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
                }
            }
            else
            {
                spanQuery.ExtractTerms(nonWeightedTerms);
            }

            List <PositionSpan> spanPositions = new List <PositionSpan>();

            foreach (string field in fieldNames)
            {
                SpanQuery q;
                q = mustRewriteQuery ? queries[field] : spanQuery;

                AtomicReaderContext context = GetLeafContext();
                var         termContexts    = new JCG.Dictionary <Term, TermContext>();
                ISet <Term> extractedTerms  = new JCG.SortedSet <Term>();
                q.ExtractTerms(extractedTerms);
                foreach (Term term in extractedTerms)
                {
                    termContexts[term] = TermContext.Build(context, term);
                }
                IBits       acceptDocs = context.AtomicReader.LiveDocs;
                Spans.Spans spans      = q.GetSpans(context, acceptDocs, termContexts);

                // collect span positions
                while (spans.MoveNext())
                {
                    spanPositions.Add(new PositionSpan(spans.Start, spans.End - 1));
                }
            }

            if (spanPositions.Count == 0)
            {
                // no spans found
                return;
            }

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparer(queryTerm.Field))
                {
                    if (!terms.TryGetValue(queryTerm.Text, out WeightedSpanTerm weightedSpanTerm) || weightedSpanTerm == null)
                    {
                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
                        weightedSpanTerm.AddPositionSpans(spanPositions);
                        weightedSpanTerm.IsPositionSensitive = true;
                        terms[queryTerm.Text] = weightedSpanTerm;
                    }
                    else
                    {
                        if (spanPositions.Count > 0)
                        {
                            weightedSpanTerm.AddPositionSpans(spanPositions);
                        }
                    }
                }
            }
        }
 public bool TryGetValue(K key, out WeightedSpanTerm value)
 {
     return(wrapped.TryGetValue(key, out value));
 }
 public void Add(K key, WeightedSpanTerm value)
 {
     this[key] = value;
 }
        /// <summary>
        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>SpanQuery</c>.
        /// </summary>
        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
        /// <param name="spanQuery">SpanQuery to extract Terms from</param>
        private void ExtractWeightedSpanTerms(IDictionary <String, WeightedSpanTerm> terms, SpanQuery spanQuery)
        {
            HashSet <String> fieldNames;

            if (fieldName == null)
            {
                fieldNames = new HashSet <String>();
                CollectSpanQueryFields(spanQuery, fieldNames);
            }
            else
            {
                fieldNames = new HashSet <String>();
                fieldNames.Add(fieldName);
            }
            // To support the use of the default field name
            if (defaultField != null)
            {
                fieldNames.Add(defaultField);
            }

            IDictionary <String, SpanQuery> queries = new HashMap <String, SpanQuery>();

            var  nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet <Term>();
            bool mustRewriteQuery = MustRewriteQuery(spanQuery);

            if (mustRewriteQuery)
            {
                foreach (String field in fieldNames)
                {
                    SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetReaderForField(field));
                    queries[field] = rewrittenQuery;
                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
                }
            }
            else
            {
                spanQuery.ExtractTerms(nonWeightedTerms);
            }

            List <PositionSpan> spanPositions = new List <PositionSpan>();

            foreach (String field in fieldNames)
            {
                IndexReader reader = GetReaderForField(field);
                Spans.Spans spans;
                if (mustRewriteQuery)
                {
                    spans = queries[field].GetSpans(reader);
                }
                else
                {
                    spans = spanQuery.GetSpans(reader);
                }


                // collect span positions
                while (spans.Next())
                {
                    spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1));
                }
            }

            if (spanPositions.Count == 0)
            {
                // no spans found
                return;
            }

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparator(queryTerm.Field))
                {
                    WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];

                    if (weightedSpanTerm == null)
                    {
                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
                        weightedSpanTerm.AddPositionSpans(spanPositions);
                        weightedSpanTerm.SetPositionSensitive(true);
                        terms[queryTerm.Text] = weightedSpanTerm;
                    }
                    else
                    {
                        if (spanPositions.Count > 0)
                        {
                            weightedSpanTerm.AddPositionSpans(spanPositions);
                        }
                    }
                }
            }
        }
Beispiel #12
0
        /// <summary>
        /// Constructs a new QueryScorer instance
        /// </summary>
        /// <param name="weightedTerms">an array of pre-created <see cref="WeightedSpanTerm"/>s</param>
        public QueryScorer(WeightedSpanTerm[] weightedTerms)
        {
            this.fieldWeightedSpanTerms = new HashMap<String, WeightedSpanTerm>(weightedTerms.Length);

            foreach (WeightedSpanTerm t in weightedTerms)
            {
                WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term];

                if ((existingTerm == null) ||
                    (existingTerm.Weight < t.Weight))
                {
                    // if a term is defined more than once, always use the highest
                    // scoring Weight
                    fieldWeightedSpanTerms[t.Term] = t;
                    maxTermWeight = Math.Max(maxTermWeight, t.Weight);
                }
            }
            skipInitExtractor = true;
        }