Esempio n. 1
0
        /// <summary>
        /// Extracts the weighted terms.
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="query">The query.</param>
        private void ExtractWeightedTerms(IDictionary <String, WeightedSpanTerm> terms, Query query)
        {
            var nonWeightedTerms = new HashSet <Term>();

            query.ExtractTerms(nonWeightedTerms);

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparator(queryTerm.Field))
                {
                    WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text);
                    terms[queryTerm.Text] = weightedSpanTerm;
                }
            }
        }
Esempio n. 2
0
            public override void Add(K key, WeightedSpanTerm value)
            {
                base.Add(key, value);
                WeightedSpanTerm prev = this[key];

                if (prev == null)
                {
                    return;
                }

                WeightedSpanTerm prevTerm = prev;
                WeightedSpanTerm newTerm  = value;

                if (!prevTerm.IsPositionSensitive())
                {
                    newTerm.SetPositionSensitive(false);
                }
            }
Esempio n. 3
0
        /// <summary>
        /// Initializes a new instance of the <see cref="QueryScorer"/> class.
        /// </summary>
        /// <param name="weightedTerms">The weighted terms.</param>
        public QueryScorer(WeightedSpanTerm[] weightedTerms)
        {
            this.fieldWeightedSpanTerms = new HashMap <String, WeightedSpanTerm>(weightedTerms.Length);

            foreach (WeightedSpanTerm t in weightedTerms)
            {
                WeightedSpanTerm existingTerm = fieldWeightedSpanTerms[t.Term];

                if ((existingTerm == null) ||
                    (existingTerm.Weight < t.Weight))
                {
                    // if a term is defined more than once, always use the highest
                    // scoring Weight
                    fieldWeightedSpanTerms[t.Term] = t;
                    maxTermWeight = Math.Max(maxTermWeight, t.Weight);
                }
            }
            skipInitExtractor = true;
        }
Esempio n. 4
0
        /// <summary>
        /// Gets the weighted span terms with scores.
        /// </summary>
        /// <param name="query">The query.</param>
        /// <param name="tokenStream">The token stream.</param>
        /// <param name="fieldName">Name of the field.</param>
        /// <param name="reader">The reader.</param>
        /// <returns></returns>
        public IDictionary <String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream,
                                                                                     String fieldName, IndexReader reader)
        {
            if (fieldName != null)
            {
                this.fieldName = StringHelper.Intern(fieldName);
            }
            else
            {
                this.fieldName = null;
            }
            this.tokenStream = tokenStream;

            IDictionary <String, WeightedSpanTerm> terms = new PositionCheckingMap <String>();

            Extract(query, terms);

            int totalNumDocs  = reader.NumDocs();
            var weightedTerms = terms.Keys;

            try
            {
                foreach (var wt in weightedTerms)
                {
                    WeightedSpanTerm weightedSpanTerm = terms[wt];
                    int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term));
                    // docFreq counts deletes
                    if (totalNumDocs < docFreq)
                    {
                        docFreq = totalNumDocs;
                    }
                    // IDF algorithm taken from DefaultSimilarity class
                    float idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0);
                    weightedSpanTerm.Weight *= idf;
                }
            }
            finally
            {
                CloseReaders();
            }

            return(terms);
        }
Esempio n. 5
0
        public void TestGetBestSingleFragmentWithWeights()
        {

            var helper = new TestHighlightRunner();
            helper.TestAction = () =>
                                    {
                                        WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
                                        wTerms[0] = new WeightedSpanTerm(10f, "hello");

                                        var positionSpans = new List<PositionSpan> {new PositionSpan(0, 0)};
                                        wTerms[0].AddPositionSpans(positionSpans);

                                        wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
                                        positionSpans = new List<PositionSpan> {new PositionSpan(14, 14)};
                                        wTerms[1].AddPositionSpans(positionSpans);

                                        Highlighter highlighter = helper.GetHighlighter(wTerms, this); // new
                                        // Highlighter(new
                                        // QueryTermScorer(wTerms));
                                        TokenStream tokenStream = analyzer.TokenStream(FIELD_NAME,
                                                                                       new StringReader(texts[0]));
                                        highlighter.TextFragmenter = new SimpleFragmenter(2);

                                        String result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim();
                                        Assert.IsTrue("<B>Hello</B>".Equals(result),
                                                      "Failed to find best section using weighted terms. Found: [" +
                                                      result + "]");

                                        // readjust weights
                                        wTerms[1].Weight = 50f;
                                        tokenStream = analyzer.TokenStream(FIELD_NAME, new StringReader(texts[0]));
                                        highlighter = helper.GetHighlighter(wTerms, this);
                                        highlighter.TextFragmenter = new SimpleFragmenter(2);

                                        result = highlighter.GetBestFragment(tokenStream, texts[0]).Trim();
                                        Assert.IsTrue("<B>kennedy</B>".Equals(result),
                                                      "Failed to find best section using weighted terms. Found: " +
                                                      result);
                                    };

            helper.Start();

        }
Esempio n. 6
0
        /// <summary>
        /// Extracts the weighted span terms.
        /// </summary>
        /// <param name="terms">The terms.</param>
        /// <param name="spanQuery">The span query.</param>
        private void ExtractWeightedSpanTerms(IDictionary <String, WeightedSpanTerm> terms, SpanQuery spanQuery)
        {
            HashSet <String> fieldNames;

            if (fieldName == null)
            {
                fieldNames = new HashSet <String>();
                CollectSpanQueryFields(spanQuery, fieldNames);
            }
            else
            {
                fieldNames = new HashSet <String>();
                fieldNames.Add(fieldName);
            }
            // To support the use of the default field name
            if (defaultField != null)
            {
                fieldNames.Add(defaultField);
            }

            IDictionary <String, SpanQuery> queries = new HashMap <String, SpanQuery>();

            var  nonWeightedTerms = new HashSet <Term>();
            bool mustRewriteQuery = MustRewriteQuery(spanQuery);

            if (mustRewriteQuery)
            {
                foreach (String field in fieldNames)
                {
                    SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetReaderForField(field));
                    queries[field] = rewrittenQuery;
                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
                }
            }
            else
            {
                spanQuery.ExtractTerms(nonWeightedTerms);
            }

            List <PositionSpan> spanPositions = new List <PositionSpan>();

            foreach (String field in fieldNames)
            {
                IndexReader reader = GetReaderForField(field);
                Spans       spans;
                if (mustRewriteQuery)
                {
                    spans = queries[field].GetSpans(reader);
                }
                else
                {
                    spans = spanQuery.GetSpans(reader);
                }


                // collect span positions
                while (spans.Next())
                {
                    spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1));
                }
            }

            if (spanPositions.Count == 0)
            {
                // no spans found
                return;
            }

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparator(queryTerm.Field))
                {
                    WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];

                    if (weightedSpanTerm == null)
                    {
                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
                        weightedSpanTerm.AddPositionSpans(spanPositions);
                        weightedSpanTerm.SetPositionSensitive(true);
                        terms[queryTerm.Text] = weightedSpanTerm;
                    }
                    else
                    {
                        if (spanPositions.Count > 0)
                        {
                            weightedSpanTerm.AddPositionSpans(spanPositions);
                        }
                    }
                }
            }
        }