Ejemplo n.º 1
0
        internal ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer)
            : base(weight)
        {
            this.DocScorer = docScorer;

            ChunkStates = new ChunkState[postings.Length];

            EndMinus1 = postings.Length - 1;

            // min(cost)
            Cost_Renamed = postings[0].Postings.Cost();

            for (int i = 0; i < postings.Length; i++)
            {
                // Coarse optimization: advance(target) is fairly
                // costly, so, if the relative freq of the 2nd
                // rarest term is not that much (> 1/5th) rarer than
                // the first term, then we just use .nextDoc() when
                // ANDing.  this buys ~15% gain for phrases where
                // freq of rarest 2 terms is close:
                bool useAdvance = postings[i].DocFreq > 5 * postings[0].DocFreq;
                ChunkStates[i] = new ChunkState(postings[i].Postings, -postings[i].Position, useAdvance);
                if (i > 0 && postings[i].Postings.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    NoDocs = true;
                    return;
                }
            }
        }
Ejemplo n.º 2
0
        protected internal SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
            : base(weight)
        {
            this.DocScorer = docScorer;
            this.Spans = spans;

            Doc = -1;
            More = spans.Next();
        }
Ejemplo n.º 3
0
            public override Explanation Explain(AtomicReaderContext context, int doc)
            {
                PayloadTermSpanScorer scorer = (PayloadTermSpanScorer)Scorer(context, (context.AtomicReader).LiveDocs);

                if (scorer != null)
                {
                    int newDoc = scorer.Advance(doc);
                    if (newDoc == doc)
                    {
                        float freq = scorer.SloppyFreq();
                        Similarity.SimScorer docScorer = Similarity.DoSimScorer(Stats, context);
                        Explanation          expl      = new Explanation();
                        expl.Description = "weight(" + Query + " in " + doc + ") [" + Similarity.GetType().Name + "], result of:";
                        Explanation scoreExplanation = docScorer.Explain(doc, new Explanation(freq, "phraseFreq=" + freq));
                        expl.AddDetail(scoreExplanation);
                        expl.Value = scoreExplanation.Value;
                        // now the payloads part
                        // QUESTION: Is there a way to avoid this skipTo call? We need to know
                        // whether to load the payload or not
                        // GSI: I suppose we could toString the payload, but I don't think that
                        // would be a good idea
                        string      field       = ((SpanQuery)Query).Field;
                        Explanation payloadExpl = OuterInstance.Function.Explain(doc, field, scorer.PayloadsSeen, scorer.PayloadScore_Renamed);
                        payloadExpl.Value = scorer.PayloadScore;
                        // combined
                        ComplexExplanation result = new ComplexExplanation();
                        if (OuterInstance.IncludeSpanScore)
                        {
                            result.AddDetail(expl);
                            result.AddDetail(payloadExpl);
                            result.Value       = expl.Value * payloadExpl.Value;
                            result.Description = "btq, product of:";
                        }
                        else
                        {
                            result.AddDetail(payloadExpl);
                            result.Value       = payloadExpl.Value;
                            result.Description = "btq(includeSpanScore=false), result of:";
                        }
                        result.Match = true; // LUCENE-1303
                        return(result);
                    }
                }

                return(new ComplexExplanation(false, 0.0f, "no matching term"));
            }
Ejemplo n.º 4
0
 protected internal virtual void ProcessPayload(Similarity similarity)
 {
     if (termSpans.IsPayloadAvailable)
     {
         DocsAndPositionsEnum postings = termSpans.Postings;
         m_payload = postings.GetPayload();
         if (m_payload != null)
         {
             m_payloadScore = outerInstance.outerInstance.m_function.CurrentScore(m_doc, outerInstance.outerInstance.Term.Field, m_spans.Start, m_spans.End, m_payloadsSeen, m_payloadScore, m_docScorer.ComputePayloadFactor(m_doc, m_spans.Start, m_spans.End, m_payload));
         }
         else
         {
             m_payloadScore = outerInstance.outerInstance.m_function.CurrentScore(m_doc, outerInstance.outerInstance.Term.Field, m_spans.Start, m_spans.End, m_payloadsSeen, m_payloadScore, 1F);
         }
         m_payloadsSeen++;
     }
     else
     {
         // zero out the payload?
     }
 }
Ejemplo n.º 5
0
 protected internal virtual void ProcessPayload(Similarity similarity)
 {
     if (TermSpans.PayloadAvailable)
     {
         DocsAndPositionsEnum postings = TermSpans.Postings;
         Payload = postings.Payload;
         if (Payload != null)
         {
             PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, DocScorer.ComputePayloadFactor(Doc, Spans.Start(), Spans.End(), Payload));
         }
         else
         {
             PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, 1F);
         }
         PayloadsSeen++;
     }
     else
     {
         // zero out the payload?
     }
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Sets up a RAMDirectory, and adds documents (using English.IntToEnglish()) with two fields: field and multiField
        /// and analyzes them using the PayloadAnalyzer </summary>
        /// <param name="similarity"> The Similarity class to use in the Searcher </param>
        /// <param name="numDocs"> The num docs to add </param>
        /// <returns> An IndexSearcher </returns>
        // TODO: randomize
        public virtual IndexSearcher SetUp(Random random, Similarity similarity, int numDocs)
        {
            Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
            PayloadAnalyzer analyzer = new PayloadAnalyzer(this);

            // TODO randomize this
            IndexWriter writer = new IndexWriter(directory, (new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, analyzer)).SetSimilarity(similarity));
            // writer.infoStream = System.out;
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new TextField(FIELD, English.IntToEnglish(i), Field.Store.YES));
                doc.Add(new TextField(MULTI_FIELD, English.IntToEnglish(i) + "  " + English.IntToEnglish(i), Field.Store.YES));
                doc.Add(new TextField(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES));
                writer.AddDocument(doc);
            }
            Reader = DirectoryReader.Open(writer, true);
            writer.Dispose();

            IndexSearcher searcher = LuceneTestCase.NewSearcher(Reader);
            searcher.Similarity = similarity;
            return searcher;
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Expert: set the <seealso cref="Similarity"/> implementation used by this IndexWriter.
 /// <p>
 /// <b>NOTE:</b> the similarity cannot be null.
 ///
 /// <p>Only takes effect when IndexWriter is first created.
 /// </summary>
 public IndexWriterConfig SetSimilarity(Similarity similarity)
 {
     if (similarity == null)
     {
         throw new System.ArgumentException("similarity must not be null");
     }
     this.similarity = similarity;
     return this;
 }
Ejemplo n.º 8
0
 public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance)
 {
     this.outerInstance = outerInstance;
     sim = new DefaultSimilarity();
 }
Ejemplo n.º 9
0
 /// <summary>
 /// Writes the document to the directory using the analyzer
 /// and the similarity score; returns the SegmentInfo
 /// describing the new segment
 /// </summary>
 public static SegmentCommitInfo WriteDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
 {
     IndexWriter writer = new IndexWriter(dir, (new IndexWriterConfig(Util.LuceneTestCase.TEST_VERSION_CURRENT, analyzer)).SetSimilarity(similarity ?? IndexSearcher.DefaultSimilarity)); // LuceneTestCase.newIndexWriterConfig(random,
     //writer.SetNoCFSRatio(0.0);
     writer.AddDocument(doc);
     writer.Commit();
     SegmentCommitInfo info = writer.NewestSegment();
     writer.Dispose();
     return info;
 }
Ejemplo n.º 10
0
 public PayloadTermSpanScorer(PayloadTermQuery.PayloadTermWeight outerInstance, TermSpans spans, Weight weight, Similarity.SimScorer docScorer)
     : base(spans, weight, docScorer)
 {
     this.OuterInstance = outerInstance;
     TermSpans = spans;
 }
Ejemplo n.º 11
0
 protected internal virtual void ProcessPayload(Similarity similarity)
 {
     if (TermSpans.PayloadAvailable)
     {
         DocsAndPositionsEnum postings = TermSpans.Postings;
         Payload = postings.Payload;
         if (Payload != null)
         {
             PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, DocScorer.ComputePayloadFactor(Doc, Spans.Start(), Spans.End(), Payload));
         }
         else
         {
             PayloadScore_Renamed = OuterInstance.OuterInstance.Function.CurrentScore(Doc, OuterInstance.OuterInstance.Term.Field(), Spans.Start(), Spans.End(), PayloadsSeen, PayloadScore_Renamed, 1F);
         }
         PayloadsSeen++;
     }
     else
     {
         // zero out the payload?
     }
 }
Ejemplo n.º 12
0
 public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestDocValuesScoring outerInstance, Field field, Similarity @base)
 {
     this.OuterInstance = outerInstance;
     this.Field = field;
     this.@base = @base;
     fooSim = new BoostingSimilarity(@base, "foo_boost");
 }
Ejemplo n.º 13
0
 public BoostingSimilarity(Similarity sim, string boostField)
 {
     this.Sim = sim;
     this.BoostField = boostField;
 }
Ejemplo n.º 14
0
 /// <summary>
 /// Creates a new config that that handles the live <seealso cref="IndexWriter"/>
 /// settings.
 /// </summary>
 internal LiveIndexWriterConfig(IndexWriterConfig config)
 {
     maxBufferedDeleteTerms = config.MaxBufferedDeleteTerms;
     maxBufferedDocs = config.MaxBufferedDocs;
     mergedSegmentWarmer = config.MergedSegmentWarmer;
     RamBufferSizeMB = config.RAMBufferSizeMB;
     readerTermsIndexDivisor = config.ReaderTermsIndexDivisor;
     termIndexInterval = config.TermIndexInterval;
     MatchVersion = config.MatchVersion;
     analyzer = config.Analyzer;
     delPolicy = config.DelPolicy;
     Commit = config.IndexCommit;
     openMode = config.OpenMode;
     similarity = config.Similarity;
     mergeScheduler = config.MergeScheduler;
     writeLockTimeout = config.WriteLockTimeout;
     indexingChain = config.IndexingChain;
     codec = config.Codec;
     infoStream = config.InfoStream;
     mergePolicy = config.MergePolicy;
     indexerThreadPool = config.IndexerThreadPool;
     readerPooling = config.ReaderPooling;
     flushPolicy = config.FlushPolicy;
     PerThreadHardLimitMB = config.RAMPerThreadHardLimitMB;
     useCompoundFile = config.UseCompoundFile;
     checkIntegrityAtMerge = config.CheckIntegrityAtMerge;
 }
Ejemplo n.º 15
0
        private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here

        #endregion Fields

        #region Constructors

        // used by IndexWriterConfig
        internal LiveIndexWriterConfig(Analyzer analyzer, LuceneVersion matchVersion)
        {
            this.analyzer = analyzer;
            this.MatchVersion = matchVersion;
            RamBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
            maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
            maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
            readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR;
            mergedSegmentWarmer = null;
            termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
            delPolicy = new KeepOnlyLastCommitDeletionPolicy();
            Commit = null;
            useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
            openMode = OpenMode_e.CREATE_OR_APPEND;
            similarity = IndexSearcher.DefaultSimilarity;
            mergeScheduler = new ConcurrentMergeScheduler();
            writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
            indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
            codec = Codec.Default;
            if (codec == null)
            {
                throw new System.NullReferenceException();
            }
            infoStream = Util.InfoStream.Default;
            mergePolicy = new TieredMergePolicy();
            flushPolicy = new FlushByRamOrCountsPolicy();
            readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING;
            indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES);
            PerThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
        }
Ejemplo n.º 16
0
 /// <summary>
 /// Construct a <code>TermScorer</code>.
 /// </summary>
 /// <param name="weight">
 ///          The weight of the <code>Term</code> in the query. </param>
 /// <param name="td">
 ///          An iterator over the documents matching the <code>Term</code>. </param>
 /// <param name="docScorer">
 ///          The </code>Similarity.SimScorer</code> implementation
 ///          to be used for score computations. </param>
 internal TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer)
     : base(weight)
 {
     this.DocScorer = docScorer;
     this.DocsEnum = td;
 }
Ejemplo n.º 17
0
 public ExampleSimilarityProvider(TestSimilarityProvider outerInstance)
 {
     this.OuterInstance = outerInstance;
     Sim1 = new Sim1(OuterInstance);
     Sim2 = new Sim2(OuterInstance);
 }
 protected internal JustCompileSpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
     : base(spans, weight, docScorer)
 {
 }
Ejemplo n.º 19
0
 public TermWeight(TermQuery outerInstance, IndexSearcher searcher, TermContext termStates)
 {
     this.OuterInstance = outerInstance;
     Debug.Assert(termStates != null, "TermContext must not be null");
     this.TermStates = termStates;
     this.Similarity = searcher.Similarity;
     this.Stats = Similarity.ComputeWeight(outerInstance.Boost, searcher.CollectionStatistics(outerInstance._term.Field), searcher.TermStatistics(outerInstance._term, termStates));
 }
        public TestRuleSetupAndRestoreClassEnv()
        {
            /*// if verbose: print some debugging stuff about which codecs are loaded.
            if (LuceneTestCase.VERBOSE)
            {
                ISet<string> codecs = Codec.AvailableCodecs();
                foreach (string codec in codecs)
                {
                    Console.WriteLine("Loaded codec: '" + codec + "': " + Codec.ForName(codec).GetType().Name);
                }

                ISet<string> postingsFormats = PostingsFormat.AvailablePostingsFormats();
                foreach (string postingsFormat in postingsFormats)
                {
                    Console.WriteLine("Loaded postingsFormat: '" + postingsFormat + "': " + PostingsFormat.ForName(postingsFormat).GetType().Name);
                }
            }

            SavedInfoStream = InfoStream.Default;
            Random random = RandomizedContext.Current.Random;
            bool v = random.NextBoolean();
            if (LuceneTestCase.INFOSTREAM)
            {
                InfoStream.Default = new ThreadNameFixingPrintStreamInfoStream(Console.Out);
            }
            else if (v)
            {
                InfoStream.Default = new NullInfoStream();
            }

            Type targetClass = RandomizedContext.Current.GetTargetType;
            AvoidCodecs = new HashSet<string>();

            // set back to default
            LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false;

            SavedCodec = Codec.Default;
            int randomVal = random.Next(10);
            if ("Lucene3x".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 3 && !ShouldAvoidCodec("Lucene3x"))) // preflex-only setup
            {
                Codec = Codec.ForName("Lucene3x");
                Debug.Assert((Codec is PreFlexRWCodec), "fix your classpath to have tests-framework.jar before lucene-core.jar");
                LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
            }
            else if ("Lucene40".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && randomVal == 0 && !ShouldAvoidCodec("Lucene40"))) // 4.0 setup
            {
                Codec = Codec.ForName("Lucene40");
                LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
                Debug.Assert(Codec is Lucene40RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar");
                Debug.Assert((PostingsFormat.ForName("Lucene40") is Lucene40RWPostingsFormat), "fix your classpath to have tests-framework.jar before lucene-core.jar");
            }
            else if ("Lucene41".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 1 && !ShouldAvoidCodec("Lucene41")))
            {
                Codec = Codec.ForName("Lucene41");
                LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
                Debug.Assert(Codec is Lucene41RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar");
            }
            else if ("Lucene42".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 2 && !ShouldAvoidCodec("Lucene42")))
            {
                Codec = Codec.ForName("Lucene42");
                LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
                Debug.Assert(Codec is Lucene42RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar");
            }
            else if ("Lucene45".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && "random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) && "random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) && randomVal == 5 && !ShouldAvoidCodec("Lucene45")))
            {
                Codec = Codec.ForName("Lucene45");
                LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true;
                Debug.Assert(Codec is Lucene45RWCodec, "fix your classpath to have tests-framework.jar before lucene-core.jar");
            }
            else if (("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT) == false) || ("random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT) == false))
            {
                // the user wired postings or DV: this is messy
                // refactor into RandomCodec....

                PostingsFormat format;
                if ("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT))
                {
                    format = PostingsFormat.ForName("Lucene41");
                }
                else
                {
                    format = PostingsFormat.ForName(LuceneTestCase.TEST_POSTINGSFORMAT);
                }

                DocValuesFormat dvFormat;
                if ("random".Equals(LuceneTestCase.TEST_DOCVALUESFORMAT))
                {
                    dvFormat = DocValuesFormat.ForName("Lucene45");
                }
                else
                {
                    dvFormat = DocValuesFormat.ForName(LuceneTestCase.TEST_DOCVALUESFORMAT);
                }

                Codec = new Lucene46CodecAnonymousInnerClassHelper(this, format, dvFormat);
            }
            else if ("Asserting".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && randomVal == 6 && !ShouldAvoidCodec("Asserting")))
            {
                Codec = new AssertingCodec();
            }
            else if ("Compressing".Equals(LuceneTestCase.TEST_CODEC) || ("random".Equals(LuceneTestCase.TEST_CODEC) && randomVal == 5 && !ShouldAvoidCodec("Compressing")))
            {
                Codec = CompressingCodec.RandomInstance(random);
            }
            else if (!"random".Equals(LuceneTestCase.TEST_CODEC))
            {
                Codec = Codec.ForName(LuceneTestCase.TEST_CODEC);
            }
            else if ("random".Equals(LuceneTestCase.TEST_POSTINGSFORMAT))
            {
                Codec = new RandomCodec(random, AvoidCodecs);
            }
            else
            {
                Debug.Assert(false);
            }
            Codec.Default = Codec;
            */
            Random random = new Random(1);
            Similarity = random.NextBoolean() ? (Similarity)new DefaultSimilarity() : new RandomSimilarityProvider(new Random(1));
            /*
            // Check codec restrictions once at class level.
            try
            {
                CheckCodecRestrictions(Codec);
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("NOTE: " + e.Message + " Suppressed codecs: " + Arrays.ToString(AvoidCodecs.ToArray()));
                throw e;
            }*/
        }
Ejemplo n.º 21
0
 public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
 {
     return(new PayloadTermSpanScorer(this, (TermSpans)query.GetSpans(context, acceptDocs, TermContexts), this, Similarity.DoSimScorer(Stats, context)));
 }