internal ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer) : base(weight) { this.DocScorer = docScorer; ChunkStates = new ChunkState[postings.Length]; EndMinus1 = postings.Length - 1; // min(cost) Cost_Renamed = postings[0].Postings.Cost(); for (int i = 0; i < postings.Length; i++) { // Coarse optimization: advance(target) is fairly // costly, so, if the relative freq of the 2nd // rarest term is not that much (> 1/5th) rarer than // the first term, then we just use .nextDoc() when // ANDing. this buys ~15% gain for phrases where // freq of rarest 2 terms is close: bool useAdvance = postings[i].DocFreq > 5 * postings[0].DocFreq; ChunkStates[i] = new ChunkState(postings[i].Postings, -postings[i].Position, useAdvance); if (i > 0 && postings[i].Postings.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { NoDocs = true; return; } } }
protected internal SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer) : base(weight) { this.DocScorer = docScorer; this.Spans = spans; Doc = -1; More = spans.Next(); }
public override Explanation Explain(AtomicReaderContext context, int doc) { PayloadTermSpanScorer scorer = (PayloadTermSpanScorer)Scorer(context, (context.AtomicReader).LiveDocs); if (scorer != null) { int newDoc = scorer.Advance(doc); if (newDoc == doc) { float freq = scorer.SloppyFreq(); Similarity.SimScorer docScorer = Similarity.DoSimScorer(Stats, context); Explanation expl = new Explanation(); expl.Description = "weight(" + Query + " in " + doc + ") [" + Similarity.GetType().Name + "], result of:"; Explanation scoreExplanation = docScorer.Explain(doc, new Explanation(freq, "phraseFreq=" + freq)); expl.AddDetail(scoreExplanation); expl.Value = scoreExplanation.Value; // now the payloads part // QUESTION: Is there a way to avoid this skipTo call? We need to know // whether to load the payload or not // GSI: I suppose we could toString the payload, but I don't think that // would be a good idea string field = ((SpanQuery)Query).Field; Explanation payloadExpl = OuterInstance.Function.Explain(doc, field, scorer.PayloadsSeen, scorer.PayloadScore_Renamed); payloadExpl.Value = scorer.PayloadScore; // combined ComplexExplanation result = new ComplexExplanation(); if (OuterInstance.IncludeSpanScore) { result.AddDetail(expl); result.AddDetail(payloadExpl); result.Value = expl.Value * payloadExpl.Value; result.Description = "btq, product of:"; } else { result.AddDetail(payloadExpl); result.Value = payloadExpl.Value; result.Description = "btq(includeSpanScore=false), result of:"; } result.Match = true; // LUCENE-1303 return(result); } } return(new ComplexExplanation(false, 0.0f, "no matching term")); }
protected internal virtual void ProcessPayload(Similarity similarity) { if (termSpans.IsPayloadAvailable) { DocsAndPositionsEnum postings = termSpans.Postings; m_payload = postings.GetPayload(); if (m_payload != null) { m_payloadScore = outerInstance.outerInstance.m_function.CurrentScore(m_doc, outerInstance.outerInstance.Term.Field, m_spans.Start, m_spans.End, m_payloadsSeen, m_payloadScore, m_docScorer.ComputePayloadFactor(m_doc, m_spans.Start, m_spans.End, m_payload)); } else { m_payloadScore = outerInstance.outerInstance.m_function.CurrentScore(m_doc, outerInstance.outerInstance.Term.Field, m_spans.Start, m_spans.End, m_payloadsSeen, m_payloadScore, 1F); } m_payloadsSeen++; } else { // zero out the payload? } }
protected internal virtual void ProcessPayload(Similarity similarity) { if (TermSpans.PayloadAvailable) { DocsAndPositionsEnum postings = TermSpans.Postings; Payload = postings.Payload; if (Payload != null) { PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, DocScorer.ComputePayloadFactor(Doc, Spans.Start(), Spans.End(), Payload)); } else { PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, 1F); } PayloadsSeen++; } else { // zero out the payload? } }
/// <summary> /// Sets up a RAMDirectory, and adds documents (using English.IntToEnglish()) with two fields: field and multiField /// and analyzes them using the PayloadAnalyzer </summary> /// <param name="similarity"> The Similarity class to use in the Searcher </param> /// <param name="numDocs"> The num docs to add </param> /// <returns> An IndexSearcher </returns> // TODO: randomize public virtual IndexSearcher SetUp(Random random, Similarity similarity, int numDocs) { Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); // TODO randomize this IndexWriter writer = new IndexWriter(directory, (new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, analyzer)).SetSimilarity(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new TextField(FIELD, English.IntToEnglish(i), Field.Store.YES)); doc.Add(new TextField(MULTI_FIELD, English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES)); doc.Add(new TextField(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES)); writer.AddDocument(doc); } Reader = DirectoryReader.Open(writer, true); writer.Dispose(); IndexSearcher searcher = LuceneTestCase.NewSearcher(Reader); searcher.Similarity = similarity; return searcher; }
/// <summary> /// Expert: set the <seealso cref="Similarity"/> implementation used by this IndexWriter. /// <p> /// <b>NOTE:</b> the similarity cannot be null. /// /// <p>Only takes effect when IndexWriter is first created. /// </summary> public IndexWriterConfig SetSimilarity(Similarity similarity) { if (similarity == null) { throw new System.ArgumentException("similarity must not be null"); } this.similarity = similarity; return this; }
public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance) { this.outerInstance = outerInstance; sim = new DefaultSimilarity(); }
/// <summary> /// Writes the document to the directory using the analyzer /// and the similarity score; returns the SegmentInfo /// describing the new segment /// </summary> public static SegmentCommitInfo WriteDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) { IndexWriter writer = new IndexWriter(dir, (new IndexWriterConfig(Util.LuceneTestCase.TEST_VERSION_CURRENT, analyzer)).SetSimilarity(similarity ?? IndexSearcher.DefaultSimilarity)); // LuceneTestCase.newIndexWriterConfig(random, //writer.SetNoCFSRatio(0.0); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); return info; }
public PayloadTermSpanScorer(PayloadTermQuery.PayloadTermWeight outerInstance, TermSpans spans, Weight weight, Similarity.SimScorer docScorer) : base(spans, weight, docScorer) { this.OuterInstance = outerInstance; TermSpans = spans; }
protected internal virtual void ProcessPayload(Similarity similarity) { if (TermSpans.PayloadAvailable) { DocsAndPositionsEnum postings = TermSpans.Postings; Payload = postings.Payload; if (Payload != null) { PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, DocScorer.ComputePayloadFactor(Doc, Spans.Start(), Spans.End(), Payload)); } else { PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, 1F); } PayloadsSeen++; } else { // zero out the payload? } }
public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestDocValuesScoring outerInstance, Field field, Similarity @base) { this.OuterInstance = outerInstance; this.Field = field; this.@base = @base; fooSim = new BoostingSimilarity(@base, "foo_boost"); }
public BoostingSimilarity(Similarity sim, string boostField) { this.Sim = sim; this.BoostField = boostField; }
/// <summary> /// Creates a new config that that handles the live <seealso cref="IndexWriter"/> /// settings. /// </summary> internal LiveIndexWriterConfig(IndexWriterConfig config) { maxBufferedDeleteTerms = config.MaxBufferedDeleteTerms; maxBufferedDocs = config.MaxBufferedDocs; mergedSegmentWarmer = config.MergedSegmentWarmer; RamBufferSizeMB = config.RAMBufferSizeMB; readerTermsIndexDivisor = config.ReaderTermsIndexDivisor; termIndexInterval = config.TermIndexInterval; MatchVersion = config.MatchVersion; analyzer = config.Analyzer; delPolicy = config.DelPolicy; Commit = config.IndexCommit; openMode = config.OpenMode; similarity = config.Similarity; mergeScheduler = config.MergeScheduler; writeLockTimeout = config.WriteLockTimeout; indexingChain = config.IndexingChain; codec = config.Codec; infoStream = config.InfoStream; mergePolicy = config.MergePolicy; indexerThreadPool = config.IndexerThreadPool; readerPooling = config.ReaderPooling; flushPolicy = config.FlushPolicy; PerThreadHardLimitMB = config.RAMPerThreadHardLimitMB; useCompoundFile = config.UseCompoundFile; checkIntegrityAtMerge = config.CheckIntegrityAtMerge; }
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here #endregion Fields #region Constructors // used by IndexWriterConfig internal LiveIndexWriterConfig(Analyzer analyzer, LuceneVersion matchVersion) { this.analyzer = analyzer; this.MatchVersion = matchVersion; RamBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; mergedSegmentWarmer = null; termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here delPolicy = new KeepOnlyLastCommitDeletionPolicy(); Commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; openMode = OpenMode_e.CREATE_OR_APPEND; similarity = IndexSearcher.DefaultSimilarity; mergeScheduler = new ConcurrentMergeScheduler(); writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT; indexingChain = DocumentsWriterPerThread.defaultIndexingChain; codec = Codec.Default; if (codec == null) { throw new System.NullReferenceException(); } infoStream = Util.InfoStream.Default; mergePolicy = new TieredMergePolicy(); flushPolicy = new FlushByRamOrCountsPolicy(); readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); PerThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; }
/// <summary> /// Construct a <code>TermScorer</code>. /// </summary> /// <param name="weight"> /// The weight of the <code>Term</code> in the query. </param> /// <param name="td"> /// An iterator over the documents matching the <code>Term</code>. </param> /// <param name="docScorer"> /// The </code>Similarity.SimScorer</code> implementation /// to be used for score computations. </param> internal TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) : base(weight) { this.DocScorer = docScorer; this.DocsEnum = td; }
public ExampleSimilarityProvider(TestSimilarityProvider outerInstance) { this.OuterInstance = outerInstance; Sim1 = new Sim1(OuterInstance); Sim2 = new Sim2(OuterInstance); }
protected internal JustCompileSpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer) : base(spans, weight, docScorer) { }
public TermWeight(TermQuery outerInstance, IndexSearcher searcher, TermContext termStates) { this.OuterInstance = outerInstance; Debug.Assert(termStates != null, "TermContext must not be null"); this.TermStates = termStates; this.Similarity = searcher.Similarity; this.Stats = Similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance._term.Field), searcher.TermStatistics(outerInstance._term, termStates)); }
public TestRuleSetupAndRestoreClassEnv() { /*// if verbose: print some debugging stuff about which codecs are loaded. if (LuceneTestCase.VERBOSE) { ISet<string> codecs = Codec.AvailableCodecs(); foreach (string codec in codecs) { Console.WriteLine("Loaded codec: '" + codec + "': " + Codec.ForName(codec).GetType().Name); } ISet<string> postingsFormats = PostingsFormat.AvailablePostingsFormats(); foreach (string postingsFormat in postingsFormats) { Console.WriteLine("Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.ForName(postingsFormat).GetType().Name); } } SavedInfoStream = InfoStream.Default; Random random = RandomizedContext.Current.Random; bool v = random.NextBoolean(); if (LuceneTestCase.INFOSTREAM) { InfoStream.Default = new ThreadNameFixingPrintStreamInfoStream(Console.Out); } else if (v) { InfoStream.Default = new NullInfoStream(); } Type targetClass = RandomizedContext.Current.GetTargetType; AvoidCodecs = new HashSet<string>(); // set back to default LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; SavedCodec = Codec.Default; int randomVal = random.Next(10); if ("Lucene3x".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 3 && !ShouldAvoidCodec("Lucene3x"))) // preflex-only setup { Codec = Codec.ForName("Lucene3x"); Debug.Assert((Codec is PreFlexRWCodec), "fix your classpath to have tests-framework.jar before lucene-core.jar"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; } else if ("Lucene40".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && randomVal == 0 && !ShouldAvoidCodec("Lucene40"))) // 4.0 setup { Codec = Codec.ForName("Lucene40"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert(Codec is Lucene40RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar"); Debug.Assert((PostingsFormat.ForName("Lucene40") is Lucene40RWPostingsFormat), "fix your classpath to have tests-framework.jar before lucene-core.jar"); } else if ("Lucene41".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 1 && !ShouldAvoidCodec("Lucene41"))) { Codec = Codec.ForName("Lucene41"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert(Codec is Lucene41RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar"); } else if ("Lucene42".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 2 && !ShouldAvoidCodec("Lucene42"))) { Codec = Codec.ForName("Lucene42"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert(Codec is Lucene42RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar"); } else if ("Lucene45".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 5 && !ShouldAvoidCodec("Lucene45"))) { Codec = Codec.ForName("Lucene45"); LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; Debug.Assert(Codec is Lucene45RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar"); } else if (("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) == false) || ("random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) == false)) { // the user wired postings or DV: this is messy // refactor into RandomCodec.... PostingsFormat format; if ("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT)) { format = PostingsFormat.ForName("Lucene41"); } else { format = PostingsFormat.ForName(LuceneTestCase.TEST_POSTINGSFORMAT); } DocValuesFormat dvFormat; if ("random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT)) { dvFormat = DocValuesFormat.ForName("Lucene45"); } else { dvFormat = DocValuesFormat.ForName(LuceneTestCase.TEST_DOCVALUESFORMAT); } Codec = new Lucene46CodecAnonymousInnerClassHelper(this, format, dvFormat); } else if ("Asserting".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && randomVal == 6 && !ShouldAvoidCodec("Asserting"))) { Codec = new AssertingCodec(); } else if ("Compressing".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && randomVal == 5 && !ShouldAvoidCodec("Compressing"))) { Codec = CompressingCodec.RandomInstance(random); } else if (!"random".Equals(LuceneTestCase.TEST_CODEC)) { Codec = Codec.ForName(LuceneTestCase.TEST_CODEC); } else if ("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT)) { Codec = new RandomCodec(random, AvoidCodecs); } else { Debug.Assert(false); } Codec.Default = Codec; */ Random random = new Random(1); Similarity = random.NextBoolean() ? (Similarity)new DefaultSimilarity() : new RandomSimilarityProvider(new Random(1)); /* // Check codec restrictions once at class level. try { CheckCodecRestrictions(Codec); } catch (Exception e) { Console.Error.WriteLine("NOTE: " + e.Message + " Suppressed codecs: " + Arrays.ToString(AvoidCodecs.ToArray())); throw e; }*/ }
public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs) { return(new PayloadTermSpanScorer(this, (TermSpans)query.GetSpans(context, acceptDocs, TermContexts), this, Similarity.DoSimScorer(Stats, context))); }