internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity) : base(similarity) { InitBlock(enclosingInstance); this.reader = reader; count = - 1; maxDoc = reader.MaxDoc(); }
private float freq; //prhase frequency in current doc as computed by phraseFreq(). internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity) { this.norms = norms; this.weight = weight; this.value_Renamed = weight.Value; // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. for (int i = 0; i < tps.Length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]); if (last != null) { // add next to end of list last.next = pp; } else { first = pp; } last = pp; } pq = new PhraseQueue(tps.Length); // construct empty pq first.doc = - 1; }
internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms):base(similarity) { InitBlock(enclosingInstance); this.termDocs = reader.TermDocs(null); score = w.Value; this.norms = norms; }
public TermWeight(TermQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = Enclosing_Instance.GetSimilarity(searcher); idfExp = similarity.IdfExplain(Enclosing_Instance.term, searcher); idf = idfExp.Idf; }
public TermWeight(TermQuery outerInstance, IndexSearcher searcher, TermContext termStates) { this.OuterInstance = outerInstance; Debug.Assert(termStates != null, "TermContext must not be null"); this.TermStates = termStates; this.Similarity = searcher.Similarity; this.Stats = Similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance.Term_Renamed.Field()), searcher.TermStatistics(outerInstance.Term_Renamed, termStates)); }
internal DocSetIteratorWeight(Query query, Similarity similarity, DocIdSetIterator iter) { _query = query; _similarity = similarity; _iter = iter; _queryNorm = 1.0f; _queryWeight = _query.Boost; }
internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w):base(similarity) { InitBlock(enclosingInstance); this.reader = reader; id = - 1; maxId = reader.MaxDoc() - 1; score = w.GetValue(); }
public SpanWeight(SpanQuery query, Searcher searcher) { this.similarity = query.GetSimilarity(searcher); this.query = query; terms = new Support.Set<Lucene.Net.Index.Term>(); query.ExtractTerms(terms); idfExp = similarity.idfExplain(terms.ToArray(), searcher); idf = idfExp.GetIdf(); }
/// class constructor public LuceneIREngine() { luceneIndexDirectory = null; writer = null; ISet <string> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; analyzer = new SnowballAnalyzer(VERSION, "English", stopWords); mySimilarity = new CustomSimilarity(); }
public SpanWeight(SpanQuery query, Searcher searcher) { this.similarity = query.GetSimilarity(searcher); this.query = query; terms = new System.Collections.Hashtable(); query.ExtractTerms(terms); idfExp = similarity.idfExplain(new System.Collections.ArrayList(terms.Values), searcher); idf = idfExp.GetIdf(); }
public ConjunctionScorer(Similarity similarity, Scorer[] scorers) : base(similarity) { this.scorers = scorers; coord = similarity.Coord(scorers.Length, scorers.Length); for (int i = 0; i < scorers.Length; i++) { if (scorers[i].NextDoc() == NO_MORE_DOCS) { // If even one of the sub-scorers does not have any documents, this // scorer should not attempt to do any more work. lastDoc = NO_MORE_DOCS; return ; } } // Sort the array the first time... // We don't need to sort the array in any future calls because we know // it will already start off sorted (all scorers on same doc). // note that this comparator is not consistent with equals! System.Array.Sort(scorers, new AnonymousClassComparator(this)); // NOTE: doNext() must be called before the re-sorting of the array later on. // The reason is this: assume there are 5 scorers, whose first docs are 1, // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling // doNext() here advances all the first scorers to 5 (or a larger doc ID // they all agree on). // However, if we re-sort before doNext() is called, the order will be 5, 3, // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's // docs equals the last one. So the invariant that after calling doNext() // all scorers are on the same doc ID is broken. if (DoNext() == NO_MORE_DOCS) { // The scorers did not agree on any document. lastDoc = NO_MORE_DOCS; return ; } // If first-time skip distance is any predictor of // scorer sparseness, then we should always try to skip first on // those scorers. // Keep last scorer in it's last place (it will be the first // to be skipped on), but reverse all of the others so that // they will be skipped on in order of original high skip. int end = scorers.Length - 1; int max = end >> 1; for (int i = 0; i < max; i++) { Scorer tmp = scorers[i]; int idx = end - i - 1; scorers[i] = scorers[idx]; scorers[idx] = tmp; } }
/// <summary> Construct a <code>TermScorer</code>. /// /// </summary> /// <param name="weight">The weight of the <code>Term</code> in the query. /// </param> /// <param name="td">An iterator over the documents matching the <code>Term</code>. /// </param> /// <param name="similarity">The <code>Similarity</code> implementation to be used for score /// computations. /// </param> /// <param name="norms">The field norms of the document fields for the <code>Term</code>. /// </param> public /*internal*/ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms):base(similarity) { this.weight = weight; this.termDocs = td; this.norms = norms; this.weightValue = weight.GetValue(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) scoreCache[i] = GetSimilarity().Tf(i) * weightValue; }
/// <summary>Create a BooleanScorer2.</summary> /// <param name="similarity">The similarity to be used. /// </param> /// <param name="minNrShouldMatch">The minimum number of optional added scorers /// that should match during the search. /// In case no required scorers are added, /// at least one of the optional scorers will have to /// match during the search. /// </param> public BooleanScorer2(Similarity similarity, int minNrShouldMatch) : base(similarity) { if (minNrShouldMatch < 0) { throw new System.ArgumentException("Minimum number of optional scorers should not be negative"); } coordinator = new Coordinator(this); this.minNrShouldMatch = minNrShouldMatch; }
public SpanWeight(SpanQuery query, Searcher searcher) { this.similarity = query.GetSimilarity(searcher); this.internalQuery = query; terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<Term>(); query.ExtractTerms(terms); idfExp = similarity.IdfExplain(terms, searcher); idf = idfExp.Idf; }
public FastMatchAllScorer(int maxdoc, int[] delDocs, Similarity similarity, float score) : base(similarity) { doc = -1; deletedDocs = delDocs; deletedIndex = 0; moreDeletions = deletedDocs != null && deletedDocs.Length > 0; delLen = deletedDocs != null ? deletedDocs.Length : 0; this.score = score; maxDoc = maxdoc; }
public SpanWeight(SpanQuery query, Searcher searcher) { this.similarity = query.GetSimilarity(searcher); this.query = query; terms = new System.Collections.Hashtable(); query.ExtractTerms(terms); System.Collections.ArrayList tmp = new System.Collections.ArrayList(terms.Values); idf = this.query.GetSimilarity(searcher).Idf(tmp, searcher); }
/// <summary> Creates a new instance of DisjunctionMaxScorer /// /// </summary> /// <param name="tieBreakerMultiplier">Multiplier applied to non-maximum-scoring subqueries for a /// document as they are summed into the result. /// </param> /// <param name="similarity">-- not used since our definition involves neither coord nor terms /// directly /// </param> /// <param name="subScorers">The sub scorers this Scorer should iterate on /// </param> /// <param name="numScorers">The actual number of scorers to iterate on. Note that the array's /// length may be larger than the actual number of scorers. /// </param> public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity, Scorer[] subScorers, int numScorers):base(similarity) { this.tieBreakerMultiplier = tieBreakerMultiplier; // The passed subScorers array includes only scorers which have documents // (DisjunctionMaxQuery takes care of that), and their nextDoc() was already // called. this.subScorers = subScorers; this.numScorers = numScorers; Heapify(); }
public SectionSearchScorer(Query query, Similarity similarity, float score, IndexReader reader) : base(similarity) { _curScr = score; SectionSearchQueryPlanBuilder builer = new SectionSearchQueryPlanBuilder(reader); _plan = builer.GetPlan(query); if (_plan != null) { _curDoc = -1; //_more = true; // NOT USED } else { _curDoc = DocIdSetIterator.NO_MORE_DOCS; //_more = false; // NOT USED } }
public FacetBasedBoostingScorer(FacetBasedBoostScorerBuilder parent, BoboIndexReader reader, Similarity similarity, Scorer innerScorer) : base(similarity) { _innerScorer = innerScorer; List<BoboDocScorer> list = new List<BoboDocScorer>(); foreach (var boostEntry in parent._boostMaps) { string facetName = boostEntry.Key; IFacetHandler handler = reader.GetFacetHandler(facetName); if (!(handler is IFacetScoreable)) throw new ArgumentException(facetName + " does not implement FacetScoreable"); IFacetScoreable facetScoreable = (IFacetScoreable)handler; BoboDocScorer scorer = facetScoreable.GetDocScorer(reader, parent._scoringFunctionFactory, boostEntry.Value); if (scorer != null) list.Add(scorer); } _facetScorers = list.ToArray(); _docid = -1; }
/*internal*/ public BooleanScorer(Similarity similarity, int minNrShouldMatch, System.Collections.IList optionalScorers, System.Collections.IList prohibitedScorers) : base(similarity) { InitBlock(); this.minNrShouldMatch = minNrShouldMatch; if (optionalScorers != null && optionalScorers.Count > 0) { for (System.Collections.IEnumerator si = optionalScorers.GetEnumerator(); si.MoveNext(); ) { Scorer scorer = (Scorer) si.Current; maxCoord++; if (scorer.NextDoc() != NO_MORE_DOCS) { scorers = new SubScorer(scorer, false, false, bucketTable.NewCollector(0), scorers); } } } if (prohibitedScorers != null && prohibitedScorers.Count > 0) { for (System.Collections.IEnumerator si = prohibitedScorers.GetEnumerator(); si.MoveNext(); ) { Scorer scorer = (Scorer) si.Current; int mask = nextMask; nextMask = nextMask << 1; prohibitedMask |= mask; // update prohibited mask if (scorer.NextDoc() != NO_MORE_DOCS) { scorers = new SubScorer(scorer, false, true, bucketTable.NewCollector(mask), scorers); } } } coordFactors = new float[maxCoord]; Similarity sim = GetSimilarity(); for (int i = 0; i < maxCoord; i++) { coordFactors[i] = sim.Coord(i, maxCoord - 1); } }
/// <summary> Creates a {@link Scorer} with the given similarity and lists of required, /// prohibited and optional scorers. In no required scorers are added, at least /// one of the optional scorers will have to match during the search. /// /// </summary> /// <param name="similarity">The similarity to be used. /// </param> /// <param name="minNrShouldMatch">The minimum number of optional added scorers that should match /// during the search. In case no required scorers are added, at least /// one of the optional scorers will have to match during the search. /// </param> /// <param name="required">the list of required scorers. /// </param> /// <param name="prohibited">the list of prohibited scorers. /// </param> /// <param name="optional">the list of optional scorers. /// </param> public BooleanScorer2(Similarity similarity, int minNrShouldMatch, System.Collections.IList required, System.Collections.IList prohibited, System.Collections.IList optional) : base(similarity) { if (minNrShouldMatch < 0) { throw new System.ArgumentException("Minimum number of optional scorers should not be negative"); } coordinator = new Coordinator(this); this.minNrShouldMatch = minNrShouldMatch; optionalScorers = optional; coordinator.maxCoord += optional.Count; requiredScorers = required; coordinator.maxCoord += required.Count; prohibitedScorers = prohibited; coordinator.Init(); countingSumScorer = MakeCountingSumScorer(); }
public LuceneCore() { //the stopword is adapted from https://github.com/pyelasticsearch/pyelasticsearch string STOPWORDS = "a able about across after all almost also am among an and " + "any are as at be because been but by can cannot could dear " + "did do does either else ever every for from get got had has " + "have he her hers him his how however i if in into is it its " + "just least let like likely may me might most must my " + "neither no nor not of off often on only or other our own " + "rather said say says she should since so some than that the " + "their them then there these they this tis to too twas us " + "wants was we were what when where which while who whom why " + "will with would yet you your"; string[] stopArray = STOPWORDS.Split(); int myC = 0; HashSet <String> stopSet = new HashSet <String>(); foreach (var a in stopArray) { stopSet.Add(stopArray[myC]); myC++; } //System.IO.FileInfo[] stopArray = STOPWORDS.Split(); luceneIndexDirectory = null; writer = null; AsIsanalyzer = new Lucene.Net.Analysis.WhitespaceAnalyzer(); analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(VERSION, stopSet); //analyzer = new Lucene.Net.Analysis.SimpleAnalyzer(); //analyzer = new SnowballAnalyzer(VERSION, "English", stopArray); //parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, TEXT_FN, analyzer); string[] fieldList = new string[] { uRL, qText }; MultiParser = new MultiFieldQueryParser(VERSION, fieldList, analyzer); MultiAsIsparser = new MultiFieldQueryParser(VERSION, fieldList, AsIsanalyzer); newSimilarity = new NewSimilarity(); }
internal PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) : base(similarity) { this.norms = norms; this.weight = weight; this.value_Renamed = weight.GetValue(); // convert tps to a list for (int i = 0; i < tps.Length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], positions[i]); if (last != null) { // add next to end of list last.next = pp; } else first = pp; last = pp; } pq = new PhraseQueue(tps.Length); // construct empty pq }
public SpanWeight(SpanQuery query, IndexSearcher searcher) { this.Similarity = searcher.Similarity; this.query = query; TermContexts = new Dictionary<Term, TermContext>(); SortedSet<Term> terms = new SortedSet<Term>(); query.ExtractTerms(terms); IndexReaderContext context = searcher.TopReaderContext; TermStatistics[] termStats = new TermStatistics[terms.Count]; int i = 0; foreach (Term term in terms) { TermContext state = TermContext.Build(context, term); termStats[i] = searcher.TermStatistics(term, state); TermContexts[term] = state; i++; } string field = query.Field; if (field != null) { Stats = Similarity.ComputeWeight(query.Boost, searcher.CollectionStatistics(query.Field), termStats); } }
private int maxDoc; // document count public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc, Similarity similarity) { this.dfMap = dfMap; this.maxDoc = maxDoc; SetSimilarity(similarity); }
private void InitBlock(int df, int max, float idf, Similarity enclosingInstance) { this.df = df; this.max = max; this.idf = idf; this.enclosingInstance = enclosingInstance; }
private void InitBlock(float idf, Similarity enclosingInstance) { this.idf = idf; this.enclosingInstance = enclosingInstance; }
public MatchAllDocsWeight(MatchAllDocsQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = searcher.GetSimilarity(); }
public BooleanWeight(BooleanQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = Enclosing_Instance.GetSimilarity(searcher); for (int i = 0; i < Enclosing_Instance.clauses.Count; i++) { BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i]; weights.Add(c.GetQuery().CreateWeight(searcher)); } }
public TermWeight(TermQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = Enclosing_Instance.GetSimilarity(searcher); idf = similarity.Idf(Enclosing_Instance.term, searcher); // compute idf }
public virtual void TestKnownSetOfDocuments() { System.String test1 = "eating chocolate in a computer lab"; //6 terms System.String test2 = "computer in a computer lab"; //5 terms System.String test3 = "a chocolate lab grows old"; //5 terms System.String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms System.Collections.IDictionary test4Map = new System.Collections.Hashtable(); test4Map["chocolate"] = 3; test4Map["lab"] = 2; test4Map["eating"] = 1; test4Map["computer"] = 1; test4Map["with"] = 1; test4Map["a"] = 1; test4Map["colored"] = 1; test4Map["in"] = 1; test4Map["an"] = 1; test4Map["computer"] = 1; test4Map["old"] = 1; Document testDoc1 = new Document(); SetupDoc(testDoc1, test1); Document testDoc2 = new Document(); SetupDoc(testDoc2, test2); Document testDoc3 = new Document(); SetupDoc(testDoc3, test3); Document testDoc4 = new Document(); SetupDoc(testDoc4, test4); Directory dir = new MockRAMDirectory(); try { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); Assert.IsTrue(writer != null); writer.AddDocument(testDoc1, null); writer.AddDocument(testDoc2, null); writer.AddDocument(testDoc3, null); writer.AddDocument(testDoc4, null); writer.Close(); IndexSearcher knownSearcher = new IndexSearcher(dir, true, null); TermEnum termEnum = knownSearcher.reader_ForNUnit.Terms(null); TermDocs termDocs = knownSearcher.reader_ForNUnit.TermDocs(null); //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); Similarity sim = knownSearcher.Similarity; while (termEnum.Next(null) == true) { Term term = termEnum.Term; //System.out.println("Term: " + term); termDocs.Seek(term, null); while (termDocs.Next(null)) { int docId = termDocs.Doc; int freq = termDocs.Freq; //System.out.println("Doc Id: " + docId + " freq " + freq); ITermFreqVector vector = knownSearcher.reader_ForNUnit.GetTermFreqVector(docId, "field", null); float tf = sim.Tf(freq); float idf = sim.Idf(knownSearcher.DocFreq(term, null), knownSearcher.MaxDoc); //float qNorm = sim.queryNorm() //This is fine since we don't have stop words float lNorm = sim.LengthNorm("field", vector.GetTerms().Length); //float coord = sim.coord() //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); Assert.IsTrue(vector != null); System.String[] vTerms = vector.GetTerms(); int[] freqs = vector.GetTermFrequencies(); for (int i = 0; i < vTerms.Length; i++) { if (term.Text.Equals(vTerms[i])) { Assert.IsTrue(freqs[i] == freq); } } } //System.out.println("--------"); } Query query = new TermQuery(new Term("field", "chocolate")); ScoreDoc[] hits = knownSearcher.Search(query, null, 1000, null).ScoreDocs; //doc 3 should be the first hit b/c it is the shortest match Assert.IsTrue(hits.Length == 3); float score = hits[0].Score; /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); * System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); * System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); * System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ Assert.IsTrue(hits[0].Doc == 2); Assert.IsTrue(hits[1].Doc == 3); Assert.IsTrue(hits[2].Doc == 0); ITermFreqVector vector2 = knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, "field", null); Assert.IsTrue(vector2 != null); //System.out.println("Vector: " + vector); System.String[] terms = vector2.GetTerms(); int[] freqs2 = vector2.GetTermFrequencies(); Assert.IsTrue(terms != null && terms.Length == 10); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; //System.out.println("Term: " + term); int freq = freqs2[i]; Assert.IsTrue(test4.IndexOf(term) != -1); System.Int32 freqInt = -1; try { freqInt = (System.Int32)test4Map[term]; } catch (Exception) { Assert.IsTrue(false); } Assert.IsTrue(freqInt == freq); } SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, mapper, null); var vectorEntrySet = mapper.TermVectorEntrySet; Assert.IsTrue(vectorEntrySet.Count == 10, "mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.Count + " is not: " + 10); TermVectorEntry last = null; foreach (TermVectorEntry tve in vectorEntrySet) { if (tve != null && last != null) { Assert.IsTrue(last.Frequency >= tve.Frequency, "terms are not properly sorted"); System.Int32 expectedFreq = (System.Int32)test4Map[tve.Term]; //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields Assert.IsTrue(tve.Frequency == 2 * expectedFreq, "Frequency is not correct:"); } last = tve; } FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); knownSearcher.reader_ForNUnit.GetTermFreqVector(hits[1].Doc, fieldMapper, null); var map = fieldMapper.FieldToTerms; Assert.IsTrue(map.Count == 2, "map Size: " + map.Count + " is not: " + 2); vectorEntrySet = map["field"]; Assert.IsTrue(vectorEntrySet != null, "vectorEntrySet is null and it shouldn't be"); Assert.IsTrue(vectorEntrySet.Count == 10, "vectorEntrySet Size: " + vectorEntrySet.Count + " is not: " + 10); knownSearcher.Close(); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
/// <summary> /// Tests that a query matches the an expected set of documents using Hits. /// /// <p> /// Note that when using the Hits API, documents will only be returned /// if they have a positive normalized score. /// </p> </summary> /// <param name="query"> the query to test </param> /// <param name="searcher"> the searcher to test the query against </param> /// <param name="defaultFieldName"> used for displaing the query in assertion messages </param> /// <param name="results"> a list of documentIds that must match the query </param> /// <param name="similarity"> /// LUCENENET specific /// Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/> /// </param> /// <seealso cref= #checkHitCollector </seealso> public static void DoCheckHits(Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results, Similarity similarity) { ScoreDoc[] hits = searcher.Search(query, 1000).ScoreDocs; SortedSet <int?> correct = new SortedSet <int?>(); for (int i = 0; i < results.Length; i++) { correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture)); } SortedSet <int?> actual = new SortedSet <int?>(); for (int i = 0; i < hits.Length; i++) { actual.Add(Convert.ToInt32(hits[i].Doc, CultureInfo.InvariantCulture)); } Assert.AreEqual(correct, actual, query.ToString(defaultFieldName)); QueryUtils.Check(random, query, searcher, LuceneTestCase.Rarely(random), similarity); }
/// <summary> /// Tests that a query matches the an expected set of documents using a /// HitCollector. /// /// <p> /// Note that when using the HitCollector API, documents will be collected /// if they "match" regardless of what their score is. /// </p> </summary> /// <param name="query"> the query to test </param> /// <param name="searcher"> the searcher to test the query against </param> /// <param name="defaultFieldName"> used for displaying the query in assertion messages </param> /// <param name="results"> a list of documentIds that must match the query </param> /// <param name="similarity"> /// LUCENENET specific /// Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/> /// </param> /// <seealso cref=#checkHits </seealso> public static void CheckHitCollector(Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results, Similarity similarity) { QueryUtils.Check(random, query, searcher, similarity); Trace.TraceInformation("Checked"); SortedSet <int?> correct = new SortedSet <int?>(); for (int i = 0; i < results.Length; i++) { correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture)); } SortedSet <int?> actual = new SortedSet <int?>(); ICollector c = new SetCollector(actual); searcher.Search(query, c); Assert.AreEqual(correct, actual, "Simple: " + query.ToString(defaultFieldName)); for (int i = -1; i < 2; i++) { actual.Clear(); IndexSearcher s = QueryUtils.WrapUnderlyingReader(random, searcher, i, similarity); s.Search(query, c); Assert.AreEqual(correct, actual, "Wrap Reader " + i + ": " + query.ToString(defaultFieldName)); } }
public ConstantScorer(ConstantScoreQuery enclosingInstance, Similarity similarity, IndexReader reader, Weight w) : base(similarity) { InitBlock(enclosingInstance); theScore = w.GetValue(); docIdSetIterator = Enclosing_Instance.filter.GetDocIdSet(reader).Iterator(); }
public override Explanation Explain(IndexReader reader, int doc) { Explanation result = new Explanation(); result.Description = "weight(" + Query + " in " + doc + "), product of:"; System.Text.StringBuilder docFreqs = new System.Text.StringBuilder(); System.Text.StringBuilder query = new System.Text.StringBuilder(); query.Append('\"'); docFreqs.Append(idfExp.Explain()); for (int i = 0; i < Enclosing_Instance.terms.Count; i++) { if (i != 0) { query.Append(" "); } Term term = Enclosing_Instance.terms[i]; query.Append(term.Text); } query.Append('\"'); Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ":" + docFreqs + ")"); // explain query weight Explanation queryExpl = new Explanation(); queryExpl.Description = "queryWeight(" + Query + "), product of:"; Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); if (Enclosing_Instance.Boost != 1.0f) { queryExpl.AddDetail(boostExpl); } queryExpl.AddDetail(idfExpl); Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); queryExpl.AddDetail(queryNormExpl); queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value; result.AddDetail(queryExpl); // explain field weight Explanation fieldExpl = new Explanation(); fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:"; PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false); if (scorer == null) { return(new Explanation(0.0f, "no matching docs")); } Explanation tfExplanation = new Explanation(); int d = scorer.Advance(doc); float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f; tfExplanation.Value = similarity.Tf(phraseFreq); tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")"; fieldExpl.AddDetail(tfExplanation); fieldExpl.AddDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.Norms(Enclosing_Instance.field); float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f; fieldNormExpl.Value = fieldNorm; fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")"; fieldExpl.AddDetail(fieldNormExpl); fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value; result.AddDetail(fieldExpl); // combine them result.Value = queryExpl.Value * fieldExpl.Value; if (queryExpl.Value == 1.0f) { return(fieldExpl); } return(result); }
public virtual Explanation Explain(IndexReader reader, int doc) { Explanation result = new Explanation(); result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:"); System.Text.StringBuilder docFreqs = new System.Text.StringBuilder(); System.Text.StringBuilder query = new System.Text.StringBuilder(); query.Append('\"'); for (int i = 0; i < Enclosing_Instance.terms.Count; i++) { if (i != 0) { docFreqs.Append(" "); query.Append(" "); } Term term = (Term)Enclosing_Instance.terms[i]; docFreqs.Append(term.Text()); docFreqs.Append("="); docFreqs.Append(reader.DocFreq(term)); query.Append(term.Text()); } query.Append('\"'); Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")"); // explain query weight Explanation queryExpl = new Explanation(); queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:"); Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost"); if (Enclosing_Instance.GetBoost() != 1.0f) { queryExpl.AddDetail(boostExpl); } queryExpl.AddDetail(idfExpl); Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); queryExpl.AddDetail(queryNormExpl); queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue()); result.AddDetail(queryExpl); // explain field weight Explanation fieldExpl = new Explanation(); fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:"); Explanation tfExpl = Scorer(reader).Explain(doc); fieldExpl.AddDetail(tfExpl); fieldExpl.AddDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.Norms(Enclosing_Instance.field); float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f; fieldNormExpl.SetValue(fieldNorm); fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")"); fieldExpl.AddDetail(fieldNormExpl); fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue()); result.AddDetail(fieldExpl); // combine them result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue()); if (queryExpl.GetValue() == 1.0f) { return(fieldExpl); } return(result); }
private void InitBlock(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance) { this.fIdf = fIdf; this.exp = exp; this.enclosingInstance = enclosingInstance; }
public ConstantWeight(ConstantScoreQuery enclosingInstance, Searcher searcher) { InitBlock(enclosingInstance); this.similarity = Enclosing_Instance.GetSimilarity(searcher); }
internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, int slop, byte[] norms) : base(weight, tps, offsets, similarity, norms) { this.slop = slop; }
public ConjunctionScorer(Similarity similarity) : base(similarity) { }
internal ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms) : base(weight, tps, offsets, similarity, norms) { }
public override Explanation Explain(IndexReader reader, int doc, IState state) { ComplexExplanation result = new ComplexExplanation(); result.Description = "weight(" + Query + " in " + doc + "), product of:"; Explanation expl = new Explanation(idf, idfExp.Explain()); // explain query weight Explanation queryExpl = new Explanation(); queryExpl.Description = "queryWeight(" + Query + "), product of:"; Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); if (Enclosing_Instance.Boost != 1.0f) { queryExpl.AddDetail(boostExpl); } queryExpl.AddDetail(expl); Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); queryExpl.AddDetail(queryNormExpl); queryExpl.Value = boostExpl.Value * expl.Value * queryNormExpl.Value; result.AddDetail(queryExpl); // explain field weight System.String field = Enclosing_Instance.term.Field; ComplexExplanation fieldExpl = new ComplexExplanation(); fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:"; Explanation tfExplanation = new Explanation(); int tf = 0; TermDocs termDocs = reader.TermDocs(enclosingInstance.term, state); if (termDocs != null) { try { if (termDocs.SkipTo(doc, state) && termDocs.Doc == doc) { tf = termDocs.Freq; } } finally { termDocs.Close(); } tfExplanation.Value = similarity.Tf(tf); tfExplanation.Description = "tf(termFreq(" + enclosingInstance.term + ")=" + tf + ")"; } else { tfExplanation.Value = 0.0f; tfExplanation.Description = "no matching term"; } fieldExpl.AddDetail(tfExplanation); fieldExpl.AddDetail(expl); Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.Norms(field, state); float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f; fieldNormExpl.Value = fieldNorm; fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")"; fieldExpl.AddDetail(fieldNormExpl); fieldExpl.Match = tfExplanation.IsMatch; fieldExpl.Value = tfExplanation.Value * expl.Value * fieldNormExpl.Value; result.AddDetail(fieldExpl); System.Boolean?tempAux = fieldExpl.Match; result.Match = tempAux; // combine them result.Value = queryExpl.Value * fieldExpl.Value; if (queryExpl.Value == 1.0f) { return(fieldExpl); } return(result); }
public AnonymousSimilarityDelegator(FuzzyTermQuery parent, Similarity result) : base(result) { this.parent = parent; }
public virtual void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); Field field = NewTextField("foo", "", Field.Store.NO); doc.Add(field); Field dvField = new SingleDocValuesField("foo_boost", 0.0F); doc.Add(dvField); Field field2 = NewTextField("bar", "", Field.Store.NO); doc.Add(field2); field.SetStringValue("quick brown fox"); field2.SetStringValue("quick brown fox"); dvField.SetSingleValue(2f); // boost x2 iw.AddDocument(doc); field.SetStringValue("jumps over lazy brown dog"); field2.SetStringValue("jumps over lazy brown dog"); dvField.SetSingleValue(4f); // boost x4 iw.AddDocument(doc); IndexReader ir = iw.Reader; iw.Dispose(); // no boosting IndexSearcher searcher1 = NewSearcher(ir, false, Similarity); Similarity @base = searcher1.Similarity; // boosting IndexSearcher searcher2 = NewSearcher(ir, false, Similarity); searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base); // in this case, we searched on field "foo". first document should have 2x the score. TermQuery tq = new TermQuery(new Term("foo", "quick")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); TopDocs noboost = searcher1.Search(tq, 10); TopDocs boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc)); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON); // this query matches only the second document, which should have 4x the score. tq = new TermQuery(new Term("foo", "jumps")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON); // search on on field bar just for kicks, nothing should happen, since we setup // our sim provider to only use foo_boost for field foo. tq = new TermQuery(new Term("bar", "quick")); QueryUtils.Check(Random(), tq, searcher1, Similarity); QueryUtils.Check(Random(), tq, searcher2, Similarity); noboost = searcher1.Search(tq, 10); boost = searcher2.Search(tq, 10); Assert.AreEqual(1, noboost.TotalHits); Assert.AreEqual(1, boost.TotalHits); Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON); ir.Dispose(); dir.Dispose(); }
internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms) : base(similarity) { InitBlock(enclosingInstance); this.termDocs = reader.TermDocs(null); score = w.GetValue(); this.norms = norms; }
public BoostingSimilarity(Similarity sim, string boostField) { this.Sim = sim; this.BoostField = boostField; }
public override float Score() { return(norms == null?score:score *Similarity.DecodeNorm(norms[DocID()])); }
public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestDocValuesScoring outerInstance, Field field, Similarity @base) { this.OuterInstance = outerInstance; this.Field = field; this.@base = @base; fooSim = new BoostingSimilarity(@base, "foo_boost"); }
public AnonymousClassIDFExplanation1(int df, int max, float idf, Similarity enclosingInstance) { InitBlock(df, max, idf, enclosingInstance); }
/// <summary>Construct a {@link Similarity} that delegates all methods to another. /// /// </summary> /// <param name="delegee">the Similarity implementation to delegate to /// </param> public SimilarityDelegator(Similarity delegee) { this.delegee = delegee; }
public AnonymousClassIDFExplanation2(float idf, Similarity enclosingInstance) { InitBlock(idf, enclosingInstance); }
public override Explanation Explain(IndexReader reader, int doc) { ComplexExplanation result = new ComplexExplanation(); result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:"); Explanation expl = new Explanation(idf, idfExp.Explain()); // explain query weight Explanation queryExpl = new Explanation(); queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:"); Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost"); if (Enclosing_Instance.GetBoost() != 1.0f) { queryExpl.AddDetail(boostExpl); } queryExpl.AddDetail(expl); Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); queryExpl.AddDetail(queryNormExpl); queryExpl.SetValue(boostExpl.GetValue() * expl.GetValue() * queryNormExpl.GetValue()); result.AddDetail(queryExpl); // explain field weight System.String field = Enclosing_Instance.term.Field(); ComplexExplanation fieldExpl = new ComplexExplanation(); fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:"); Explanation tfExpl = Scorer(reader, true, false).Explain(doc); fieldExpl.AddDetail(tfExpl); fieldExpl.AddDetail(expl); Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.Norms(field); float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f; fieldNormExpl.SetValue(fieldNorm); fieldNormExpl.SetDescription("fieldNorm(field=" + field + ", doc=" + doc + ")"); fieldExpl.AddDetail(fieldNormExpl); fieldExpl.SetMatch(tfExpl.IsMatch()); fieldExpl.SetValue(tfExpl.GetValue() * expl.GetValue() * fieldNormExpl.GetValue()); result.AddDetail(fieldExpl); System.Boolean?tempAux = fieldExpl.GetMatch(); result.SetMatch(tempAux); // combine them result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue()); if (queryExpl.GetValue() == 1.0f) { return(fieldExpl); } return(result); }
public AnonymousClassIDFExplanation3(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance) { InitBlock(fIdf, exp, enclosingInstance); }
public MatchNoDocsWeight(Searcher searcher, MatchNoDocsQuery enclosingInstance) { this.enclosingInstance = enclosingInstance; this.similarity = searcher.Similarity; }
/// <summary>Set the default Similarity implementation used by indexing and search /// code. /// /// </summary> /// <seealso cref="Searcher.SetSimilarity(Similarity)"> /// </seealso> /// <seealso cref="Lucene.Net.Index.IndexWriter.SetSimilarity(Similarity)"> /// </seealso> public static void SetDefault(Similarity similarity) { Similarity.defaultImpl = similarity; }
public override Explanation Explain(IndexReader reader, int doc) { ComplexExplanation result = new ComplexExplanation(); result.Description = "weight(" + Query + " in " + doc + "), product of:"; Explanation idfExpl = new Explanation(idf, "idf(" + Query + ")"); // explain query weight Explanation queryExpl = new Explanation(); queryExpl.Description = "queryWeight(" + Query + "), product of:"; Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); if (Enclosing_Instance.Boost != 1.0f) { queryExpl.AddDetail(boostExpl); } queryExpl.AddDetail(idfExpl); Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); queryExpl.AddDetail(queryNormExpl); queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value; result.AddDetail(queryExpl); // explain field weight ComplexExplanation fieldExpl = new ComplexExplanation(); fieldExpl.Description = "fieldWeight(" + Query + " in " + doc + "), product of:"; PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false); if (scorer == null) { return(new Explanation(0.0f, "no matching docs")); } Explanation tfExplanation = new Explanation(); int d = scorer.Advance(doc); float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f; tfExplanation.Value = similarity.Tf(phraseFreq); tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")"; fieldExpl.AddDetail(tfExplanation); fieldExpl.AddDetail(idfExpl); Explanation fieldNormExpl = new Explanation(); byte[] fieldNorms = reader.Norms(Enclosing_Instance.field); float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f; fieldNormExpl.Value = fieldNorm; fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")"; fieldExpl.AddDetail(fieldNormExpl); fieldExpl.Match = tfExplanation.IsMatch; fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value; result.AddDetail(fieldExpl); System.Boolean?tempAux = fieldExpl.Match; result.Match = tempAux; // combine them result.Value = queryExpl.Value * fieldExpl.Value; if (queryExpl.Value == 1.0f) { return(fieldExpl); } return(result); }