Exemplo n.º 1
0
 public override bool Next()
 {
     if (count == freq)
     {
         if (!positions.Next())
         {
             doc = System.Int32.MaxValue;
             return(false);
         }
         doc   = positions.Doc();
         freq  = positions.Freq();
         count = 0;
     }
     position = positions.NextPosition();
     count++;
     return(true);
 }
Exemplo n.º 2
0
 public override bool Next()
 {
     if (count == freq)
     {
         if (!internalPositions.Next())
         {
             internalDoc = int.MaxValue;
             return(false);
         }
         internalDoc = internalPositions.Doc;
         freq        = internalPositions.Freq;
         count       = 0;
     }
     position = internalPositions.NextPosition();
     count++;
     return(true);
 }
        public virtual void  TestCaching()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();
            TokenStream stream = new AnonymousClassTokenStream(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new Field("preanalyzed", stream, TermVector.NO));

            // 1) we consume all tokens twice before we add the doc to the index
            checkTokens(stream);
            stream.Reset();
            checkTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);
            writer.Close();

            IndexReader   reader        = IndexReader.Open(dir);
            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));

            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term2"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term3"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Close();

            // 3) reset stream and consume tokens again
            stream.Reset();
            checkTokens(stream);
        }
        public virtual void TestPayloadsPos0()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true,
                                                 IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();

            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
            sw.Write("a a b c d e a f g h i j a b k k");
            // flush to stream & reset it's position so it can be read
            sw.Flush();
            ms.Position = 0;
            doc.Add(new Field("content", new System.IO.StreamReader(ms)));
            writer.AddDocument(doc);

            IndexReader r = writer.GetReader();

            TermPositions tp    = r.TermPositions(new Term("content", "a"));
            int           count = 0;

            Assert.IsTrue(tp.Next());
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq);
            int expected = 0;

            Assert.AreEqual(expected, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.IsFalse(tp.Next());

            IndexSearcher is_Renamed = new IndexSearcher(r);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            //System.out.println("\ngetPayloadSpans test");
            Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader);
            while (pspans.Next())
            {
                //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
                System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload();
                sawZero |= pspans.Start() == 0;
                for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                {
                    count++;
                    System.Object generatedAux2 = it.Current;
                    //System.out.println(new String((byte[]) it.next()));
                }
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\ngetSpans test");
            Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader);
            count   = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader);

            System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
            {
                System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current));
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Close();
            is_Renamed.IndexReader.Close();
            dir.Close();
        }
        public virtual void  TestSetPosition()
        {
            Analyzer    analyzer = new AnonymousClassAnalyzer(this);
            Directory   store    = new MockRAMDirectory();
            IndexWriter writer   = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            Document    d        = new Document();

            d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d);
            writer.Optimize();
            writer.Close();


            IndexSearcher searcher = new IndexSearcher(store, true);

            TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1"));

            pos.Next();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = searcher.IndexReader.TermPositions(new Term("field", "2"));
            pos.Next();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // should not find "1 2" because there is a gap of 1 in the index
            QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false));

            q    = (PhraseQuery)qp.Parse("\"1 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // omitted stop word cannot help because stop filter swallows the increments.
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // query parser alone won't help, because stop filter swallows the increments.
            qp.EnablePositionIncrements = true;
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // stop filter alone won't help, because query parser swallows the increments.
            qp.EnablePositionIncrements = false;
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // when both qp qnd stopFilter propagate increments, we should find the doc.
            qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true));
            qp.EnablePositionIncrements = true;
            q    = (PhraseQuery)qp.Parse("\"1 stop 2\"");
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
        }