internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity) : base(similarity)
				this.reader = reader;
				count = - 1;
				maxDoc = reader.MaxDoc();
		private float freq; //prhase frequency in current doc as computed by phraseFreq().
		internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity)
			this.norms = norms;
			this.weight = weight;
			this.value_Renamed = weight.Value;
			// convert tps to a list of phrase positions.
			// note: phrase-position differs from term-position in that its position
			// reflects the phrase offset: pp.pos = tp.pos - offset.
			// this allows to easily identify a matching (exact) phrase 
			// when all PhrasePositions have exactly the same position.
			for (int i = 0; i < tps.Length; i++)
				PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
				if (last != null)
					// add next to end of list = pp;
					first = pp;
				last = pp;
			pq = new PhraseQueue(tps.Length); // construct empty pq
			first.doc = - 1;
			internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms):base(similarity)
				this.termDocs = reader.TermDocs(null);
				score = w.Value;
				this.norms = norms;
			public TermWeight(TermQuery enclosingInstance, Searcher searcher)
				this.similarity = Enclosing_Instance.GetSimilarity(searcher);
				idfExp = similarity.IdfExplain(Enclosing_Instance.term, searcher);
				idf = idfExp.Idf;
Beispiel #5
 public TermWeight(TermQuery outerInstance, IndexSearcher searcher, TermContext termStates)
     this.OuterInstance = outerInstance;
     Debug.Assert(termStates != null, "TermContext must not be null");
     this.TermStates = termStates;
     this.Similarity = searcher.Similarity;
     this.Stats = Similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.Term_Renamed.Field()), searcher.TermStatistics(outerInstance.Term_Renamed, termStates));
 internal DocSetIteratorWeight(Query query, Similarity similarity, DocIdSetIterator iter)
     _query = query;
     _similarity = similarity;
     _iter = iter;
     _queryNorm = 1.0f;
     _queryWeight = _query.Boost;
			internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w):base(similarity)
				this.reader = reader;
				id = - 1;
				maxId = reader.MaxDoc() - 1;
				score = w.GetValue();
		public SpanWeight(SpanQuery query, Searcher searcher)
			this.similarity = query.GetSimilarity(searcher);
			this.query = query;
            terms = new Support.Set<Lucene.Net.Index.Term>();
			idfExp = similarity.idfExplain(terms.ToArray(), searcher);
			idf = idfExp.GetIdf();
Beispiel #9
        /// class constructor
        public LuceneIREngine()
            luceneIndexDirectory = null;
            writer = null;
            ISet <string> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

            analyzer     = new SnowballAnalyzer(VERSION, "English", stopWords);
            mySimilarity = new CustomSimilarity();
Beispiel #10
		public SpanWeight(SpanQuery query, Searcher searcher)
			this.similarity = query.GetSimilarity(searcher);
			this.query = query;
			terms = new System.Collections.Hashtable();
			idfExp = similarity.idfExplain(new System.Collections.ArrayList(terms.Values), searcher);
			idf = idfExp.GetIdf();
        public ConjunctionScorer(Similarity similarity, Scorer[] scorers)
            : base(similarity)
            this.scorers = scorers;
            coord = similarity.Coord(scorers.Length, scorers.Length);

            for (int i = 0; i < scorers.Length; i++)
                if (scorers[i].NextDoc() == NO_MORE_DOCS)
                    // If even one of the sub-scorers does not have any documents, this
                    // scorer should not attempt to do any more work.
                    lastDoc = NO_MORE_DOCS;
                    return ;

            // Sort the array the first time...
            // We don't need to sort the array in any future calls because we know
            // it will already start off sorted (all scorers on same doc).

            // note that this comparator is not consistent with equals!
            System.Array.Sort(scorers, new AnonymousClassComparator(this));

            // NOTE: doNext() must be called before the re-sorting of the array later on.
            // The reason is this: assume there are 5 scorers, whose first docs are 1,
            // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling
            // doNext() here advances all the first scorers to 5 (or a larger doc ID
            // they all agree on).
            // However, if we re-sort before doNext() is called, the order will be 5, 3,
            // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's
            // docs equals the last one. So the invariant that after calling doNext()
            // all scorers are on the same doc ID is broken.
            if (DoNext() == NO_MORE_DOCS)
                // The scorers did not agree on any document.
                lastDoc = NO_MORE_DOCS;
                return ;

            // If first-time skip distance is any predictor of
            // scorer sparseness, then we should always try to skip first on
            // those scorers.
            // Keep last scorer in it's last place (it will be the first
            // to be skipped on), but reverse all of the others so that
            // they will be skipped on in order of original high skip.
            int end = scorers.Length - 1;
            int max = end >> 1;
            for (int i = 0; i < max; i++)
                Scorer tmp = scorers[i];
                int idx = end - i - 1;
                scorers[i] = scorers[idx];
                scorers[idx] = tmp;
Beispiel #12
		/// <summary> Construct a <code>TermScorer</code>.
		/// </summary>
		/// <param name="weight">The weight of the <code>Term</code> in the query.
		/// </param>
		/// <param name="td">An iterator over the documents matching the <code>Term</code>.
		/// </param>
		/// <param name="similarity">The <code>Similarity</code> implementation to be used for score
		/// computations.
		/// </param>
		/// <param name="norms">The field norms of the document fields for the <code>Term</code>.
		/// </param>
		public /*internal*/ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms):base(similarity)
			this.weight = weight;
			this.termDocs = td;
			this.norms = norms;
			this.weightValue = weight.GetValue();
			for (int i = 0; i < SCORE_CACHE_SIZE; i++)
				scoreCache[i] = GetSimilarity().Tf(i) * weightValue;
 /// <summary>Create a BooleanScorer2.</summary>
 /// <param name="similarity">The similarity to be used.
 /// </param>
 /// <param name="minNrShouldMatch">The minimum number of optional added scorers
 /// that should match during the search.
 /// In case no required scorers are added,
 /// at least one of the optional scorers will have to
 /// match during the search.
 /// </param>
 public BooleanScorer2(Similarity similarity, int minNrShouldMatch)
     : base(similarity)
     if (minNrShouldMatch < 0)
         throw new System.ArgumentException("Minimum number of optional scorers should not be negative");
     coordinator = new Coordinator(this);
     this.minNrShouldMatch = minNrShouldMatch;
Beispiel #14
        public SpanWeight(SpanQuery query, Searcher searcher)
            this.similarity = query.GetSimilarity(searcher);
            this.internalQuery = query;

            terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<Term>();

            idfExp = similarity.IdfExplain(terms, searcher);
            idf = idfExp.Idf;
 public FastMatchAllScorer(int maxdoc, int[] delDocs, Similarity similarity, float score)
     : base(similarity)
     doc = -1;
     deletedDocs = delDocs;
     deletedIndex = 0;
     moreDeletions = deletedDocs != null && deletedDocs.Length > 0;
     delLen = deletedDocs != null ? deletedDocs.Length : 0;
     this.score = score;
     maxDoc = maxdoc;
        public SpanWeight(SpanQuery query, Searcher searcher)
            this.similarity = query.GetSimilarity(searcher);
            this.query = query;
            terms = new System.Collections.Hashtable();

            System.Collections.ArrayList tmp = new System.Collections.ArrayList(terms.Values);

            idf = this.query.GetSimilarity(searcher).Idf(tmp, searcher);
		/// <summary> Creates a new instance of DisjunctionMaxScorer
		/// </summary>
		/// <param name="tieBreakerMultiplier">Multiplier applied to non-maximum-scoring subqueries for a
		/// document as they are summed into the result.
		/// </param>
		/// <param name="similarity">-- not used since our definition involves neither coord nor terms
		/// directly
		/// </param>
		/// <param name="subScorers">The sub scorers this Scorer should iterate on
		/// </param>
		/// <param name="numScorers">The actual number of scorers to iterate on. Note that the array's
		/// length may be larger than the actual number of scorers.
		/// </param>
		public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity, Scorer[] subScorers, int numScorers):base(similarity)
			this.tieBreakerMultiplier = tieBreakerMultiplier;
			// The passed subScorers array includes only scorers which have documents
			// (DisjunctionMaxQuery takes care of that), and their nextDoc() was already
			// called.
			this.subScorers = subScorers;
			this.numScorers = numScorers;
            public SectionSearchScorer(Query query, Similarity similarity, float score, IndexReader reader)
                : base(similarity)
                _curScr = score;

                SectionSearchQueryPlanBuilder builer = new SectionSearchQueryPlanBuilder(reader);
                _plan = builer.GetPlan(query);
                if (_plan != null)
                    _curDoc = -1;
                    //_more = true; // NOT USED
                    _curDoc = DocIdSetIterator.NO_MORE_DOCS;
                    //_more = false;  // NOT USED
            public FacetBasedBoostingScorer(FacetBasedBoostScorerBuilder parent, BoboIndexReader reader, Similarity similarity, Scorer innerScorer)
                : base(similarity)
                _innerScorer = innerScorer;

                List<BoboDocScorer> list = new List<BoboDocScorer>();

                foreach (var boostEntry in parent._boostMaps)
                    string facetName = boostEntry.Key;
                    IFacetHandler handler = reader.GetFacetHandler(facetName);
                    if (!(handler is IFacetScoreable))
                        throw new ArgumentException(facetName + " does not implement FacetScoreable");
                    IFacetScoreable facetScoreable = (IFacetScoreable)handler;
                    BoboDocScorer scorer = facetScoreable.GetDocScorer(reader, parent._scoringFunctionFactory, boostEntry.Value);
                    if (scorer != null) list.Add(scorer);
                _facetScorers = list.ToArray();
                _docid = -1;
        public BooleanScorer(Similarity similarity, int minNrShouldMatch, System.Collections.IList optionalScorers, System.Collections.IList prohibitedScorers)
            : base(similarity)
            this.minNrShouldMatch = minNrShouldMatch;

            if (optionalScorers != null && optionalScorers.Count > 0)
                for (System.Collections.IEnumerator si = optionalScorers.GetEnumerator(); si.MoveNext(); )
                    Scorer scorer = (Scorer) si.Current;
                    if (scorer.NextDoc() != NO_MORE_DOCS)
                        scorers = new SubScorer(scorer, false, false, bucketTable.NewCollector(0), scorers);

            if (prohibitedScorers != null && prohibitedScorers.Count > 0)
                for (System.Collections.IEnumerator si = prohibitedScorers.GetEnumerator(); si.MoveNext(); )
                    Scorer scorer = (Scorer) si.Current;
                    int mask = nextMask;
                    nextMask = nextMask << 1;
                    prohibitedMask |= mask; // update prohibited mask
                    if (scorer.NextDoc() != NO_MORE_DOCS)
                        scorers = new SubScorer(scorer, false, true, bucketTable.NewCollector(mask), scorers);

            coordFactors = new float[maxCoord];
            Similarity sim = GetSimilarity();
            for (int i = 0; i < maxCoord; i++)
                coordFactors[i] = sim.Coord(i, maxCoord - 1);
        /// <summary> Creates a {@link Scorer} with the given similarity and lists of required,
        /// prohibited and optional scorers. In no required scorers are added, at least
        /// one of the optional scorers will have to match during the search.
        /// </summary>
        /// <param name="similarity">The similarity to be used.
        /// </param>
        /// <param name="minNrShouldMatch">The minimum number of optional added scorers that should match
        /// during the search. In case no required scorers are added, at least
        /// one of the optional scorers will have to match during the search.
        /// </param>
        /// <param name="required">the list of required scorers.
        /// </param>
        /// <param name="prohibited">the list of prohibited scorers.
        /// </param>
        /// <param name="optional">the list of optional scorers.
        /// </param>
        public BooleanScorer2(Similarity similarity, int minNrShouldMatch, System.Collections.IList required, System.Collections.IList prohibited, System.Collections.IList optional)
            : base(similarity)
            if (minNrShouldMatch < 0)
                throw new System.ArgumentException("Minimum number of optional scorers should not be negative");
            coordinator = new Coordinator(this);
            this.minNrShouldMatch = minNrShouldMatch;

            optionalScorers = optional;
            coordinator.maxCoord += optional.Count;

            requiredScorers = required;
            coordinator.maxCoord += required.Count;

            prohibitedScorers = prohibited;

            countingSumScorer = MakeCountingSumScorer();
Beispiel #22
        public LuceneCore()
            //the stopword is adapted from

            string STOPWORDS = "a able about across after all almost also am among an and " +
                               "any are as at be because been but by can cannot  could dear " +
                               "did do does either else ever every for from get got had has " +
                               "have he her hers him his how however i if in into is it its " +
                               "just least let like likely may me might most must my " +
                               "neither no nor not of off often on only or other our own " +
                               "rather said say says she should since so some than that the " +
                               "their them then there these they this tis to too twas us " +
                               "wants was we were what when where which while who whom why " +
                               "will with would  yet you your";

            string[]         stopArray = STOPWORDS.Split();
            int              myC       = 0;
            HashSet <String> stopSet   = new HashSet <String>();

            foreach (var a in stopArray)
            //System.IO.FileInfo[] stopArray = STOPWORDS.Split();
            luceneIndexDirectory = null;
            writer       = null;
            AsIsanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer();
            analyzer     = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION, stopSet);
            //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
            //analyzer = new SnowballAnalyzer(VERSION, "English", stopArray);

            //parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer);

            string[] fieldList = new string[] { uRL, qText };
            MultiParser     = new MultiFieldQueryParser(VERSION, fieldList, analyzer);
            MultiAsIsparser = new MultiFieldQueryParser(VERSION, fieldList, AsIsanalyzer);
            newSimilarity   = new NewSimilarity();
Beispiel #23
		internal PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) : base(similarity)
			this.norms = norms;
			this.weight = weight;
			this.value_Renamed = weight.GetValue();
			// convert tps to a list
			for (int i = 0; i < tps.Length; i++)
				PhrasePositions pp = new PhrasePositions(tps[i], positions[i]);
				if (last != null)
					// add next to end of list = pp;
					first = pp;
				last = pp;
			pq = new PhraseQueue(tps.Length); // construct empty pq
Beispiel #24
        public SpanWeight(SpanQuery query, IndexSearcher searcher)
            this.Similarity = searcher.Similarity;
            this.query = query;

            TermContexts = new Dictionary<Term, TermContext>();
            SortedSet<Term> terms = new SortedSet<Term>();
            IndexReaderContext context = searcher.TopReaderContext;
            TermStatistics[] termStats = new TermStatistics[terms.Count];
            int i = 0;
            foreach (Term term in terms)
                TermContext state = TermContext.Build(context, term);
                termStats[i] = searcher.TermStatistics(term, state);
                TermContexts[term] = state;
            string field = query.Field;
            if (field != null)
                Stats = Similarity.ComputeWeight(query.Boost, searcher.CollectionStatistics(query.Field), termStats);
Beispiel #25
            private int maxDoc;                           // document count

            public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc, Similarity similarity)
                this.dfMap  = dfMap;
                this.maxDoc = maxDoc;
Beispiel #26
			private void  InitBlock(int df, int max, float idf, Similarity enclosingInstance)
				this.df = df;
				this.max = max;
				this.idf = idf;
				this.enclosingInstance = enclosingInstance;
Beispiel #27
			private void  InitBlock(float idf, Similarity enclosingInstance)
				this.idf = idf;
				this.enclosingInstance = enclosingInstance;
Beispiel #28
 public MatchAllDocsWeight(MatchAllDocsQuery enclosingInstance, Searcher searcher)
     this.similarity = searcher.GetSimilarity();
 public BooleanWeight(BooleanQuery enclosingInstance, Searcher searcher)
     this.similarity = Enclosing_Instance.GetSimilarity(searcher);
     for (int i = 0; i < Enclosing_Instance.clauses.Count; i++)
         BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
Beispiel #30
 public TermWeight(TermQuery enclosingInstance, Searcher searcher)
     this.similarity = Enclosing_Instance.GetSimilarity(searcher);
     idf             = similarity.Idf(Enclosing_Instance.term, searcher);     // compute idf
Beispiel #31
        public virtual void  TestKnownSetOfDocuments()
            System.String test1 = "eating chocolate in a computer lab";                                             //6 terms
            System.String test2 = "computer in a computer lab";                                                     //5 terms
            System.String test3 = "a chocolate lab grows old";                                                      //5 terms
            System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
            System.Collections.IDictionary test4Map = new System.Collections.Hashtable();
            test4Map["chocolate"] = 3;
            test4Map["lab"]       = 2;
            test4Map["eating"]    = 1;
            test4Map["computer"]  = 1;
            test4Map["with"]      = 1;
            test4Map["a"]         = 1;
            test4Map["colored"]   = 1;
            test4Map["in"]        = 1;
            test4Map["an"]        = 1;
            test4Map["computer"]  = 1;
            test4Map["old"]       = 1;

            Document testDoc1 = new Document();

            SetupDoc(testDoc1, test1);
            Document testDoc2 = new Document();

            SetupDoc(testDoc2, test2);
            Document testDoc3 = new Document();

            SetupDoc(testDoc3, test3);
            Document testDoc4 = new Document();

            SetupDoc(testDoc4, test4);

            Directory dir = new MockRAMDirectory();

                IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
                Assert.IsTrue(writer != null);
                writer.AddDocument(testDoc1, null);
                writer.AddDocument(testDoc2, null);
                writer.AddDocument(testDoc3, null);
                writer.AddDocument(testDoc4, null);
                IndexSearcher knownSearcher = new IndexSearcher(dir, true, null);
                TermEnum      termEnum      = knownSearcher.reader_ForNUnit.Terms(null);
                TermDocs      termDocs      = knownSearcher.reader_ForNUnit.TermDocs(null);
                //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);

                Similarity sim = knownSearcher.Similarity;
                while (termEnum.Next(null) == true)
                    Term term = termEnum.Term;
                    //System.out.println("Term: " + term);
                    termDocs.Seek(term, null);
                    while (termDocs.Next(null))
                        int docId = termDocs.Doc;
                        int freq  = termDocs.Freq;
                        //System.out.println("Doc Id: " + docId + " freq " + freq);
                        ITermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field", null);
                        float           tf     = sim.Tf(freq);
                        float           idf    = sim.Idf(knownSearcher.DocFreq(term, null), knownSearcher.MaxDoc);
                        //float qNorm = sim.queryNorm()
                        //This is fine since we don't have stop words
                        float lNorm = sim.LengthNorm("field", vector.GetTerms().Length);
                        //float coord = sim.coord()
                        //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
                        Assert.IsTrue(vector != null);
                        System.String[] vTerms = vector.GetTerms();
                        int[]           freqs  = vector.GetTermFrequencies();
                        for (int i = 0; i < vTerms.Length; i++)
                            if (term.Text.Equals(vTerms[i]))
                                Assert.IsTrue(freqs[i] == freq);
                Query      query = new TermQuery(new Term("field", "chocolate"));
                ScoreDoc[] hits  = knownSearcher.Search(query, null, 1000, null).ScoreDocs;
                //doc 3 should be the first hit b/c it is the shortest match
                Assert.IsTrue(hits.Length == 3);
                float score = hits[0].Score;

                /*System.out.println("Hit 0: " + + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query,;
                 * System.out.println("Hit 1: " + + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query,;
                 * System.out.println("Hit 2: " + + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
                 * System.out.println("Explain: " + knownSearcher.explain(query,;*/
                Assert.IsTrue(hits[0].Doc == 2);
                Assert.IsTrue(hits[1].Doc == 3);
                Assert.IsTrue(hits[2].Doc == 0);
                ITermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, "field", null);
                Assert.IsTrue(vector2 != null);
                //System.out.println("Vector: " + vector);
                System.String[] terms  = vector2.GetTerms();
                int[]           freqs2 = vector2.GetTermFrequencies();
                Assert.IsTrue(terms != null && terms.Length == 10);
                for (int i = 0; i < terms.Length; i++)
                    System.String term = terms[i];
                    //System.out.println("Term: " + term);
                    int freq = freqs2[i];
                    Assert.IsTrue(test4.IndexOf(term) != -1);
                    System.Int32 freqInt = -1;
                        freqInt = (System.Int32)test4Map[term];
                    catch (Exception)
                    Assert.IsTrue(freqInt == freq);
                SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, mapper, null);
                var vectorEntrySet = mapper.TermVectorEntrySet;
                Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10);
                TermVectorEntry last = null;
                foreach (TermVectorEntry tve in vectorEntrySet)
                    if (tve != null && last != null)
                        Assert.IsTrue(last.Frequency >= tve.Frequency, "terms are not properly sorted");
                        System.Int32 expectedFreq = (System.Int32)test4Map[tve.Term];
                        //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
                        Assert.IsTrue(tve.Frequency == 2 * expectedFreq, "Frequency is not correct:");
                    last = tve;

                FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
                knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, fieldMapper, null);
                var map = fieldMapper.FieldToTerms;
                Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2);
                vectorEntrySet = map["field"];
                Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be");
                Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10);
            catch (System.IO.IOException e)
Beispiel #32
        /// <summary>
        /// Tests that a query matches the an expected set of documents using Hits.
        /// <p>
        /// Note that when using the Hits API, documents will only be returned
        /// if they have a positive normalized score.
        /// </p> </summary>
        /// <param name="query"> the query to test </param>
        /// <param name="searcher"> the searcher to test the query against </param>
        /// <param name="defaultFieldName"> used for displaing the query in assertion messages </param>
        /// <param name="results"> a list of documentIds that must match the query </param>
        /// <param name="similarity">
        /// LUCENENET specific
        /// Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/>
        /// </param>
        /// <seealso cref= #checkHitCollector </seealso>
        public static void DoCheckHits(Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results, Similarity similarity)
            ScoreDoc[] hits = searcher.Search(query, 1000).ScoreDocs;

            SortedSet <int?> correct = new SortedSet <int?>();

            for (int i = 0; i < results.Length; i++)
                correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture));

            SortedSet <int?> actual = new SortedSet <int?>();

            for (int i = 0; i < hits.Length; i++)
                actual.Add(Convert.ToInt32(hits[i].Doc, CultureInfo.InvariantCulture));

            Assert.AreEqual(correct, actual, query.ToString(defaultFieldName));

            QueryUtils.Check(random, query, searcher, LuceneTestCase.Rarely(random), similarity);
Beispiel #33
        /// <summary>
        /// Tests that a query matches the an expected set of documents using a
        /// HitCollector.
        /// <p>
        /// Note that when using the HitCollector API, documents will be collected
        /// if they "match" regardless of what their score is.
        /// </p> </summary>
        /// <param name="query"> the query to test </param>
        /// <param name="searcher"> the searcher to test the query against </param>
        /// <param name="defaultFieldName"> used for displaying the query in assertion messages </param>
        /// <param name="results"> a list of documentIds that must match the query </param>
        /// <param name="similarity">
        /// LUCENENET specific
        /// Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/>
        /// </param>
        /// <seealso cref=#checkHits </seealso>
        public static void CheckHitCollector(Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results, Similarity similarity)
            QueryUtils.Check(random, query, searcher, similarity);


            SortedSet <int?> correct = new SortedSet <int?>();

            for (int i = 0; i < results.Length; i++)
                correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture));
            SortedSet <int?> actual = new SortedSet <int?>();
            ICollector       c      = new SetCollector(actual);

            searcher.Search(query, c);

            Assert.AreEqual(correct, actual, "Simple: " + query.ToString(defaultFieldName));

            for (int i = -1; i < 2; i++)
                IndexSearcher s = QueryUtils.WrapUnderlyingReader(random, searcher, i, similarity);
                s.Search(query, c);
                Assert.AreEqual(correct, actual, "Wrap Reader " + i + ": " + query.ToString(defaultFieldName));
Beispiel #34
 public ConstantScorer(ConstantScoreQuery enclosingInstance, Similarity similarity, IndexReader reader, Weight w) : base(similarity)
     theScore         = w.GetValue();
     docIdSetIterator = Enclosing_Instance.filter.GetDocIdSet(reader).Iterator();
            public override Explanation Explain(IndexReader reader, int doc)
                Explanation result = new Explanation();

                result.Description = "weight(" + Query + " in " + doc + "), product of:";

                System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
                System.Text.StringBuilder query    = new System.Text.StringBuilder();
                for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
                    if (i != 0)
                        query.Append(" ");

                    Term term = Enclosing_Instance.terms[i];


                Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ":" + docFreqs + ")");

                // explain query weight
                Explanation queryExpl = new Explanation();

                queryExpl.Description = "queryWeight(" + Query + "), product of:";

                Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");

                if (Enclosing_Instance.Boost != 1.0f)

                Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");


                queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;


                // explain field weight
                Explanation fieldExpl = new Explanation();

                fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:";

                PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false);

                if (scorer == null)
                    return(new Explanation(0.0f, "no matching docs"));
                Explanation tfExplanation = new Explanation();
                int         d             = scorer.Advance(doc);
                float       phraseFreq    = (d == doc) ? scorer.CurrentFreq() : 0.0f;

                tfExplanation.Value       = similarity.Tf(phraseFreq);
                tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";


                Explanation fieldNormExpl = new Explanation();

                byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
                float  fieldNorm  = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f;

                fieldNormExpl.Value       = fieldNorm;
                fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")";

                fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value;


                // combine them
                result.Value = queryExpl.Value * fieldExpl.Value;

                if (queryExpl.Value == 1.0f)

Beispiel #36
            public virtual Explanation Explain(IndexReader reader, int doc)
                Explanation result = new Explanation();

                result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");

                System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
                System.Text.StringBuilder query    = new System.Text.StringBuilder();
                for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
                    if (i != 0)
                        docFreqs.Append(" ");
                        query.Append(" ");

                    Term term = (Term)Enclosing_Instance.terms[i];



                Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");

                // explain query weight
                Explanation queryExpl = new Explanation();

                queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");

                Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");

                if (Enclosing_Instance.GetBoost() != 1.0f)

                Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");


                queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());


                // explain field weight
                Explanation fieldExpl = new Explanation();

                fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");

                Explanation tfExpl = Scorer(reader).Explain(doc);


                Explanation fieldNormExpl = new Explanation();

                byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
                float  fieldNorm  = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;

                fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");

                fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());


                // combine them
                result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());

                if (queryExpl.GetValue() == 1.0f)

Beispiel #37
			private void  InitBlock(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance)
				this.fIdf = fIdf;
				this.exp = exp;
				this.enclosingInstance = enclosingInstance;
Beispiel #38
 public ConstantWeight(ConstantScoreQuery enclosingInstance, Searcher searcher)
     this.similarity = Enclosing_Instance.GetSimilarity(searcher);
Beispiel #39
 internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, int slop, byte[] norms) : base(weight, tps, offsets, similarity, norms)
     this.slop = slop;
 public ConjunctionScorer(Similarity similarity) : base(similarity)
Beispiel #41
 internal ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms) : base(weight, tps, offsets, similarity, norms)
Beispiel #42
            public override Explanation Explain(IndexReader reader, int doc, IState state)
                ComplexExplanation result = new ComplexExplanation();

                result.Description = "weight(" + Query + " in " + doc + "), product of:";

                Explanation expl = new Explanation(idf, idfExp.Explain());

                // explain query weight
                Explanation queryExpl = new Explanation();

                queryExpl.Description = "queryWeight(" + Query + "), product of:";

                Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");

                if (Enclosing_Instance.Boost != 1.0f)

                Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");


                queryExpl.Value = boostExpl.Value * expl.Value * queryNormExpl.Value;


                // explain field weight
                System.String      field     = Enclosing_Instance.term.Field;
                ComplexExplanation fieldExpl = new ComplexExplanation();

                fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:";

                Explanation tfExplanation = new Explanation();
                int         tf            = 0;
                TermDocs    termDocs      = reader.TermDocs(enclosingInstance.term, state);

                if (termDocs != null)
                        if (termDocs.SkipTo(doc, state) && termDocs.Doc == doc)
                            tf = termDocs.Freq;
                    tfExplanation.Value       = similarity.Tf(tf);
                    tfExplanation.Description = "tf(termFreq(" + enclosingInstance.term + ")=" + tf + ")";
                    tfExplanation.Value       = 0.0f;
                    tfExplanation.Description = "no matching term";

                Explanation fieldNormExpl = new Explanation();

                byte[] fieldNorms = reader.Norms(field, state);
                float  fieldNorm  = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f;

                fieldNormExpl.Value       = fieldNorm;
                fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")";

                fieldExpl.Match = tfExplanation.IsMatch;
                fieldExpl.Value = tfExplanation.Value * expl.Value * fieldNormExpl.Value;

                System.Boolean?tempAux = fieldExpl.Match;
                result.Match = tempAux;

                // combine them
                result.Value = queryExpl.Value * fieldExpl.Value;

                if (queryExpl.Value == 1.0f)

Beispiel #43
 public AnonymousSimilarityDelegator(FuzzyTermQuery parent, Similarity result) : base(result)
     this.parent = parent;
        public virtual void TestSimple()
            Directory         dir   = NewDirectory();
            RandomIndexWriter iw    = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc   = new Document();
            Field             field = NewTextField("foo", "", Field.Store.NO);

            Field dvField = new SingleDocValuesField("foo_boost", 0.0F);

            Field field2 = NewTextField("bar", "", Field.Store.NO);


            field.SetStringValue("quick brown fox");
            field2.SetStringValue("quick brown fox");
            dvField.SetSingleValue(2f); // boost x2
            field.SetStringValue("jumps over lazy brown dog");
            field2.SetStringValue("jumps over lazy brown dog");
            dvField.SetSingleValue(4f); // boost x4
            IndexReader ir = iw.Reader;


            // no boosting
            IndexSearcher searcher1 = NewSearcher(ir, false, Similarity);
            Similarity    @base     = searcher1.Similarity;
            // boosting
            IndexSearcher searcher2 = NewSearcher(ir, false, Similarity);

            searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base);

            // in this case, we searched on field "foo". first document should have 2x the score.
            TermQuery tq = new TermQuery(new Term("foo", "quick"));

            QueryUtils.Check(Random(), tq, searcher1, Similarity);
            QueryUtils.Check(Random(), tq, searcher2, Similarity);

            TopDocs noboost = searcher1.Search(tq, 10);
            TopDocs boost   = searcher2.Search(tq, 10);

            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc));
            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON);

            // this query matches only the second document, which should have 4x the score.
            tq = new TermQuery(new Term("foo", "jumps"));
            QueryUtils.Check(Random(), tq, searcher1, Similarity);
            QueryUtils.Check(Random(), tq, searcher2, Similarity);

            noboost = searcher1.Search(tq, 10);
            boost   = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON);

            // search on on field bar just for kicks, nothing should happen, since we setup
            // our sim provider to only use foo_boost for field foo.
            tq = new TermQuery(new Term("bar", "quick"));
            QueryUtils.Check(Random(), tq, searcher1, Similarity);
            QueryUtils.Check(Random(), tq, searcher2, Similarity);

            noboost = searcher1.Search(tq, 10);
            boost   = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON);

Beispiel #45
 internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms) : base(similarity)
     this.termDocs = reader.TermDocs(null);
     score         = w.GetValue();
     this.norms    = norms;
 public BoostingSimilarity(Similarity sim, string boostField)
     this.Sim        = sim;
     this.BoostField = boostField;
Beispiel #47
 public override float Score()
     return(norms == null?score:score *Similarity.DecodeNorm(norms[DocID()]));
 public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestDocValuesScoring outerInstance, Field field, Similarity @base)
     this.OuterInstance = outerInstance;
     this.Field         = field;
     this.@base         = @base;
     fooSim             = new BoostingSimilarity(@base, "foo_boost");
Beispiel #49
			public AnonymousClassIDFExplanation1(int df, int max, float idf, Similarity enclosingInstance)
				InitBlock(df, max, idf, enclosingInstance);
 /// <summary>Construct a {@link Similarity} that delegates all methods to another.
 /// </summary>
 /// <param name="delegee">the Similarity implementation to delegate to
 /// </param>
 public SimilarityDelegator(Similarity delegee)
     this.delegee = delegee;
Beispiel #51
			public AnonymousClassIDFExplanation2(float idf, Similarity enclosingInstance)
				InitBlock(idf, enclosingInstance);
Beispiel #52
            public override Explanation Explain(IndexReader reader, int doc)
                ComplexExplanation result = new ComplexExplanation();

                result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");

                Explanation expl = new Explanation(idf, idfExp.Explain());

                // explain query weight
                Explanation queryExpl = new Explanation();

                queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");

                Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");

                if (Enclosing_Instance.GetBoost() != 1.0f)

                Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");


                queryExpl.SetValue(boostExpl.GetValue() * expl.GetValue() * queryNormExpl.GetValue());


                // explain field weight
                System.String      field     = Enclosing_Instance.term.Field();
                ComplexExplanation fieldExpl = new ComplexExplanation();

                fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:");

                Explanation tfExpl = Scorer(reader, true, false).Explain(doc);


                Explanation fieldNormExpl = new Explanation();

                byte[] fieldNorms = reader.Norms(field);
                float  fieldNorm  = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f;

                fieldNormExpl.SetDescription("fieldNorm(field=" + field + ", doc=" + doc + ")");

                fieldExpl.SetValue(tfExpl.GetValue() * expl.GetValue() * fieldNormExpl.GetValue());

                System.Boolean?tempAux = fieldExpl.GetMatch();

                // combine them
                result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());

                if (queryExpl.GetValue() == 1.0f)

Beispiel #53
			public AnonymousClassIDFExplanation3(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance)
				InitBlock(fIdf, exp, enclosingInstance);
Beispiel #54
 public MatchNoDocsWeight(Searcher searcher, MatchNoDocsQuery enclosingInstance)
     this.enclosingInstance = enclosingInstance;
     this.similarity        = searcher.Similarity;
Beispiel #55
		/// <summary>Set the default Similarity implementation used by indexing and search
		/// code.
		/// </summary>
		/// <seealso cref="Searcher.SetSimilarity(Similarity)">
		/// </seealso>
		/// <seealso cref="Lucene.Net.Index.IndexWriter.SetSimilarity(Similarity)">
		/// </seealso>
		public static void  SetDefault(Similarity similarity)
			Similarity.defaultImpl = similarity;
Beispiel #56
            public override Explanation Explain(IndexReader reader, int doc)
                ComplexExplanation result = new ComplexExplanation();

                result.Description = "weight(" + Query + " in " + doc + "), product of:";

                Explanation idfExpl = new Explanation(idf, "idf(" + Query + ")");

                // explain query weight
                Explanation queryExpl = new Explanation();

                queryExpl.Description = "queryWeight(" + Query + "), product of:";

                Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");

                if (Enclosing_Instance.Boost != 1.0f)


                Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");


                queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value;


                // explain field weight
                ComplexExplanation fieldExpl = new ComplexExplanation();

                fieldExpl.Description = "fieldWeight(" + Query + " in " + doc + "), product of:";

                PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false);

                if (scorer == null)
                    return(new Explanation(0.0f, "no matching docs"));
                Explanation tfExplanation = new Explanation();
                int         d             = scorer.Advance(doc);
                float       phraseFreq    = (d == doc) ? scorer.CurrentFreq() : 0.0f;

                tfExplanation.Value       = similarity.Tf(phraseFreq);
                tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")";

                Explanation fieldNormExpl = new Explanation();

                byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
                float  fieldNorm  = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f;

                fieldNormExpl.Value       = fieldNorm;
                fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")";

                fieldExpl.Match = tfExplanation.IsMatch;
                fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value;

                System.Boolean?tempAux = fieldExpl.Match;
                result.Match = tempAux;

                // combine them
                result.Value = queryExpl.Value * fieldExpl.Value;

                if (queryExpl.Value == 1.0f)
