예제 #1
0
        /// <summary> Advances to the first match beyond the current whose document number is
        /// greater than or equal to a given target. <br/>
        /// The implementation uses <see cref="TermDocs.SkipTo(int)" />.
        ///
        /// </summary>
        /// <param name="target">The target document number.
        /// </param>
        /// <returns> the matching document or -1 if none exist.
        /// </returns>
        public override int Advance(int target)
        {
            // first scan in cache
            for (pointer++; pointer < pointerMax; pointer++)
            {
                if (docs[pointer] >= target)
                {
                    return(doc = docs[pointer]);
                }
            }

            // not found in cache, seek underlying stream
            bool result = termDocs.SkipTo(target);

            if (result)
            {
                pointerMax     = 1;
                pointer        = 0;
                docs[pointer]  = doc = termDocs.Doc;
                freqs[pointer] = termDocs.Freq;
            }
            else
            {
                doc = NO_MORE_DOCS;
            }
            return(doc);
        }
예제 #2
0
 public override int Advance(int target)
 {
     if (_td.SkipTo(target))
     {
         doc = _td.Doc;
         while (!_docset.Get(doc))
         {
             if (_td.Next())
             {
                 doc = _td.Doc;
             }
             else
             {
                 doc = DocIdSetIterator.NO_MORE_DOCS;
                 break;
             }
         }
         return(doc);
     }
     else
     {
         doc = DocIdSetIterator.NO_MORE_DOCS;
         return(doc);
     }
 }
예제 #3
0
            public virtual bool SkipTo(int target)
            {
                if (termDocs == null)
                {
                    return(false);
                }

                return(termDocs.SkipTo(target));
            }
예제 #4
0
 public override int Advance(int target)
 {
     if (_td.SkipTo(target))
     {
         _doc = _td.Doc;
     }
     else
     {
         _td.Dispose();
         _doc = DocIdSetIterator.NO_MORE_DOCS;
     }
     return(_doc);
 }
        // ДОРАБОТКА использовать данную функцию для фильтрации результатов поиска
        public int GetMatchWordCount(IEnumerable <SampleDataFileRow> listFoundDocs, string searchTerm)
        {
            int         totalFreq = 0;
            IndexReader reader    = IndexReader.Open(DirectoryFs, true);

            TermDocs termDocs = reader.TermDocs();

            termDocs.Seek(new Term("LineText", searchTerm));
            foreach (SampleDataFileRow singleRow in listFoundDocs)
            {
                termDocs.SkipTo(singleRow.LineNumber);
                totalFreq += termDocs.Freq;
            }

            return(totalFreq);
        }
예제 #6
0
 public virtual bool SkipTo(int target)
 {
     if (current != null && current.SkipTo(target - base_Renamed))
     {
         return(true);
     }
     else if (pointer < readers.Length)
     {
         base_Renamed = starts [pointer];
         current      = TermDocs(pointer++);
         return(SkipTo(target));
     }
     else
     {
         return(false);
     }
 }
예제 #7
0
 public virtual bool SkipTo(int i)
 {
     return(in_Renamed.SkipTo(i));
 }
        public virtual void  testSkipTo(int indexDivisor)
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Term ta = new Term("content", "aaa");

            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, "aaa aaa aaa aaa");
            }

            Term tb = new Term("content", "bbb");

            for (int i = 0; i < 16; i++)
            {
                AddDoc(writer, "bbb bbb bbb bbb");
            }

            Term tc = new Term("content", "ccc");

            for (int i = 0; i < 50; i++)
            {
                AddDoc(writer, "ccc ccc ccc ccc");
            }

            // assure that we deal with a single segment
            writer.Optimize();
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, null, true, indexDivisor);

            TermDocs tdocs = reader.TermDocs();

            // without optimization (assumption skipInterval == 16)

            // with next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(0, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(1, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(2, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // without next
            tdocs.Seek(ta);
            Assert.IsTrue(tdocs.SkipTo(0));
            Assert.AreEqual(0, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(4));
            Assert.AreEqual(4, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(9));
            Assert.AreEqual(9, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(10));

            // exactly skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(10, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(11, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(12, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // without next
            tdocs.Seek(tb);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(10, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(15));
            Assert.AreEqual(15, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(24));
            Assert.AreEqual(24, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(25));
            Assert.AreEqual(25, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(26));

            // much more than skipInterval documents and therefore with optimization

            // with next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(26, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.Next());
            Assert.AreEqual(27, tdocs.Doc());
            Assert.AreEqual(4, tdocs.Freq());
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(28, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            //without next
            tdocs.Seek(tc);
            Assert.IsTrue(tdocs.SkipTo(5));
            Assert.AreEqual(26, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(40));
            Assert.AreEqual(40, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(57));
            Assert.AreEqual(57, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(74));
            Assert.AreEqual(74, tdocs.Doc());
            Assert.IsTrue(tdocs.SkipTo(75));
            Assert.AreEqual(75, tdocs.Doc());
            Assert.IsFalse(tdocs.SkipTo(76));

            tdocs.Close();
            reader.Close();
            dir.Close();
        }
예제 #9
0
 public override int Advance(int target)
 {
     return(_docid = _termDocs.SkipTo(target) ? _termDocs.Doc : NO_MORE_DOCS);
 }
예제 #10
0
 public virtual bool SkipTo(int i, IState state)
 {
     return(in_Renamed.SkipTo(i, state));
 }
예제 #11
0
 public virtual bool SkipTo(int target)
 {
     return(termDocs.SkipTo(target));
 }
예제 #12
0
            public override Explanation Explain(IndexReader reader, int doc)
            {
                ComplexExplanation result = new ComplexExplanation();

                result.Description = "weight(" + Query + " in " + doc + "), product of:";

                Explanation expl = new Explanation(idf, idfExp.Explain());

                // explain query weight
                Explanation queryExpl = new Explanation();

                queryExpl.Description = "queryWeight(" + Query + "), product of:";

                Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost");

                if (Enclosing_Instance.Boost != 1.0f)
                {
                    queryExpl.AddDetail(boostExpl);
                }
                queryExpl.AddDetail(expl);

                Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");

                queryExpl.AddDetail(queryNormExpl);

                queryExpl.Value = boostExpl.Value * expl.Value * queryNormExpl.Value;

                result.AddDetail(queryExpl);

                // explain field weight
                System.String      field     = Enclosing_Instance.term.Field;
                ComplexExplanation fieldExpl = new ComplexExplanation();

                fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:";

                Explanation tfExplanation = new Explanation();
                int         tf            = 0;
                TermDocs    termDocs      = reader.TermDocs(enclosingInstance.term);

                if (termDocs != null)
                {
                    try
                    {
                        if (termDocs.SkipTo(doc) && termDocs.Doc == doc)
                        {
                            tf = termDocs.Freq;
                        }
                    }
                    finally
                    {
                        termDocs.Close();
                    }
                    tfExplanation.Value       = similarity.Tf(tf);
                    tfExplanation.Description = "tf(termFreq(" + enclosingInstance.term + ")=" + tf + ")";
                }
                else
                {
                    tfExplanation.Value       = 0.0f;
                    tfExplanation.Description = "no matching term";
                }
                fieldExpl.AddDetail(tfExplanation);
                fieldExpl.AddDetail(expl);

                Explanation fieldNormExpl = new Explanation();

                byte[] fieldNorms = reader.Norms(field);
                float  fieldNorm  = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]) : 1.0f;

                fieldNormExpl.Value       = fieldNorm;
                fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")";
                fieldExpl.AddDetail(fieldNormExpl);

                fieldExpl.Match = tfExplanation.IsMatch;
                fieldExpl.Value = tfExplanation.Value * expl.Value * fieldNormExpl.Value;

                result.AddDetail(fieldExpl);
                System.Boolean?tempAux = fieldExpl.Match;
                result.Match = tempAux;

                // combine them
                result.Value = queryExpl.Value * fieldExpl.Value;

                if (queryExpl.Value == 1.0f)
                {
                    return(fieldExpl);
                }

                return(result);
            }