TermPositions() public abstract method

Returns an unpositioned Lucene.Net.Index.TermPositions enumerator.
public abstract TermPositions ( ) : TermPositions
return TermPositions
Example #1
0
        public virtual void  TestSimpleSkip()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Term         term   = new Term("test", "a");

            for (int i = 0; i < 5000; i++)
            {
                Document d1 = new Document();
                d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(d1);
            }
            writer.Flush();
            writer.Optimize();
            writer.Close();

            IndexReader          reader = SegmentReader.GetOnlySegmentReader(dir);
            SegmentTermPositions tp     = (SegmentTermPositions)reader.TermPositions();

            tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);

            for (int i = 0; i < 2; i++)
            {
                counter = 0;
                tp.Seek(term);

                CheckSkipTo(tp, 14, 185);                 // no skips
                CheckSkipTo(tp, 17, 190);                 // one skip on level 0
                CheckSkipTo(tp, 287, 200);                // one skip on level 1, two on level 0

                // this test would fail if we had only one skip level,
                // because than more bytes would be read from the freqStream
                CheckSkipTo(tp, 4800, 250);                 // one skip on level 2
            }
        }
        public virtual void  TestSeek()
        {
            Directory   directory = new RAMDirectory();
            IndexWriter writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(new Field(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }

            writer.Close();
            IndexReader   reader = IndexReader.Open(directory);
            TermPositions tp     = reader.TermPositions();

            tp.Seek(new Term(this.field, "b"));
            for (int i = 0; i < 10; i++)
            {
                tp.Next();
                Assert.AreEqual(tp.Doc(), i);
                Assert.AreEqual(tp.NextPosition(), 1);
            }
            tp.Seek(new Term(this.field, "a"));
            for (int i = 0; i < 10; i++)
            {
                tp.Next();
                Assert.AreEqual(tp.Doc(), i);
                Assert.AreEqual(tp.NextPosition(), 0);
            }
        }
 internal TermPositions GetPositions()
 {
     if (postings == null)
     {
         postings = reader.TermPositions();
     }
     return(postings);
 }
Example #4
0
 internal TermPositions GetPositions(IState state)
 {
     if (postings == null)
     {
         postings = reader.TermPositions(state);
     }
     return(postings);
 }
Example #5
0
        /// <summary> Creates a new <c>MultipleTermPositions</c> instance.
        /// 
        /// </summary>
        /// <exception cref="System.IO.IOException">
        /// </exception>
        public MultipleTermPositions(IndexReader indexReader, Term[] terms)
        {
            System.Collections.IList termPositions = new System.Collections.ArrayList();

            for (int i = 0; i < terms.Length; i++)
                termPositions.Add(indexReader.TermPositions(terms[i]));

            _termPositionsQueue = new TermPositionsQueue(termPositions);
            _posList = new IntQueue();
        }
        /// <summary> Creates a new <c>MultipleTermPositions</c> instance.
        ///
        /// </summary>
        /// <exception cref="System.IO.IOException">
        /// </exception>
        public MultipleTermPositions(IndexReader indexReader, Term[] terms)
        {
            var termPositions = new System.Collections.Generic.LinkedList <TermPositions>();

            for (int i = 0; i < terms.Length; i++)
            {
                termPositions.AddLast(indexReader.TermPositions(terms[i]));
            }

            _termPositionsQueue = new TermPositionsQueue(termPositions);
            _posList            = new IntQueue();
        }
        /// <summary> Creates a new <code>MultipleTermPositions</code> instance.
        ///
        /// </summary>
        /// <exception cref="IOException">
        /// </exception>
        public MultipleTermPositions(IndexReader indexReader, Term[] terms)
        {
            System.Collections.IList termPositions = new System.Collections.ArrayList();

            for (int i = 0; i < terms.Length; i++)
            {
                termPositions.Add(indexReader.TermPositions(terms[i]));
            }

            _termPositionsQueue = new TermPositionsQueue(termPositions);
            _posList            = new IntQueue();
        }
Example #8
0
        /// <summary> Creates a new <code>MultipleTermPositions</code> instance.
        ///
        /// </summary>
        /// <exception cref="IOException">
        /// </exception>
        public MultipleTermPositions(IndexReader indexReader, Term[] terms)
        {
            IList <TermPositions> termPositions = new List <TermPositions>();

            for (int i = 0; i < terms.Length; i++)
            {
                termPositions.Add(indexReader.TermPositions(terms[i]));
            }

            _termPositionsQueue = new TermPositionsQueue(termPositions);
            _posList            = new IntQueue();
        }
Example #9
0
        public virtual void  TestThreadSafety()
        {
            rnd = NewRandom();
            int           numThreads = 5;
            int           numDocs    = 50;
            ByteArrayPool pool       = new ByteArrayPool(numThreads, 5);

            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);

            System.String field = "test";

            ThreadClass[] ingesters = new ThreadClass[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this);
                ingesters[i].Start();
            }

            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i].Join();
            }
            writer.Close();
            IndexReader reader = IndexReader.Open(dir, true, null);
            TermEnum    terms  = reader.Terms(null);

            while (terms.Next(null))
            {
                TermPositions tp = reader.TermPositions(terms.Term, null);
                while (tp.Next(null))
                {
                    int freq = tp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        tp.NextPosition(null);
                        Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0, null)), terms.Term.Text);
                    }
                }
                tp.Close();
            }
            terms.Close();
            reader.Close();

            Assert.AreEqual(pool.Size(), numThreads);
        }
Example #10
0
        public virtual void  TestCaching()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);
            Document    doc    = new Document();
            TokenStream stream = new AnonymousClassTokenStream(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new Field("preanalyzed", stream, TermVector.NO));

            // 1) we consume all tokens twice before we add the doc to the index
            checkTokens(stream);
            stream.Reset();
            checkTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc, null);
            writer.Close();

            IndexReader   reader        = IndexReader.Open(dir, true, null);
            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"), null);

            Assert.IsTrue(termPositions.Next(null));
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(0, termPositions.NextPosition(null));

            termPositions.Seek(new Term("preanalyzed", "term2"), null);
            Assert.IsTrue(termPositions.Next(null));
            Assert.AreEqual(2, termPositions.Freq);
            Assert.AreEqual(1, termPositions.NextPosition(null));
            Assert.AreEqual(3, termPositions.NextPosition(null));

            termPositions.Seek(new Term("preanalyzed", "term3"), null);
            Assert.IsTrue(termPositions.Next(null));
            Assert.AreEqual(1, termPositions.Freq);
            Assert.AreEqual(2, termPositions.NextPosition(null));
            reader.Close();

            // 3) reset stream and consume tokens again
            stream.Reset();
            checkTokens(stream);
        }
Example #11
0
            public override void  Seek(Term term)
            {
                IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]);

                termDocs = reader != null?reader.TermPositions(term) : null;
            }
Example #12
0
 public override TermPositions TermPositions()
 {
     EnsureOpen();
     return(in_Renamed.TermPositions());
 }
Example #13
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void  PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);

            // should be in sync with value in TermInfosWriter
            int skipInterval = 16;

            int numTerms = 5;

            System.String fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text);
                sb.Append(" ");
            }
            System.String content = sb.ToString();


            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;

            byte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d, null);
            }

            // make sure we create more than one segment to test merging
            writer.Commit(null);

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d, null);
            }

            writer.Optimize(null);
            // flush
            writer.Close();


            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = IndexReader.Open(dir, true, null);

            byte[] verifyPayloadData = new byte[payloadDataLength];
            offset = 0;
            TermPositions[] tps = new TermPositions[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = reader.TermPositions(terms[i], null);
            }

            while (tps[0].Next(null))
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].Next(null);
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition(null);
                        tps[j].GetPayload(verifyPayloadData, offset, null);
                        offset += tps[j].PayloadLength;
                    }
                }
            }

            for (int i = 0; i < numTerms; i++)
            {
                tps[i].Close();
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            TermPositions tp = reader.TermPositions(terms[0], null);

            tp.Next(null);
            tp.NextPosition(null);
            // now we don't read this payload
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            byte[] payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[numTerms]);
            tp.NextPosition(null);

            // we don't read this payload and skip to a different document
            tp.SkipTo(5, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[5 * numTerms]);


            /*
             * Test different lengths at skip points
             */
            tp.Seek(terms[1], null);
            tp.Next(null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(2 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(3 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length.");

            /*
             * Test multiple call of getPayload()
             */
            tp.GetPayload(null, 0, null);

            // it is forbidden to call getPayload() more than once
            // without calling nextPosition()
            Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown");

            reader.Close();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            System.String singleTerm = "lucene";

            d = new Document();
            d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d, null);


            writer.Optimize(null);
            // flush
            writer.Close();

            reader = IndexReader.Open(dir, true, null);
            tp     = reader.TermPositions(new Term(fieldName, singleTerm), null);
            tp.Next(null);
            tp.NextPosition(null);

            verifyPayloadData = new byte[tp.PayloadLength];
            tp.GetPayload(verifyPayloadData, 0, null);
            byte[] portion = new byte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, verifyPayloadData);
            reader.Close();
        }
 public AbstractTerminalNode(Term term, IndexReader reader)
 {
     _tp = reader.TermPositions();
     _tp.Seek(term);
     _posLeft = 0;
 }
Example #15
0
		protected internal override TermDocs TermDocs(IndexReader reader)
		{
			return (TermDocs) reader.TermPositions();
		}
Example #16
0
 protected internal override TermDocs TermDocs(IndexReader reader)
 {
     return((TermDocs)reader.TermPositions());
 }
Example #17
0
            public override void  Seek(Term term, IState state)
            {
                IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];

                termDocs = reader != null?reader.TermPositions(term, state) : null;
            }
Example #18
0
		public static void  AssertIndexEquals(IndexReader index1, IndexReader index2)
		{
			Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs.");
			Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc.");
			Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions.");
			Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized.");
			
			// check field names
			System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL);
			System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL);

            System.Collections.Generic.ICollection<IFieldable> fields1 = null;
            System.Collections.Generic.ICollection<IFieldable> fields2 = null;

            Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields.");
            System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator();
            System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator();
			while (it1.MoveNext() && it2.MoveNext())
			{
				Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names.");
			}
			
			// check norms
            it1 = fieldsNames1.GetEnumerator();
			while (it1.MoveNext())
			{
				System.String curField = (System.String) it1.Current;
				byte[] norms1 = index1.Norms(curField);
				byte[] norms2 = index2.Norms(curField);
				if (norms1 != null && norms2 != null)
				{
					Assert.AreEqual(norms1.Length, norms2.Length);
					for (int i = 0; i < norms1.Length; i++)
					{
						Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'.");
					}
				}
				else
				{
					Assert.AreSame(norms1, norms2);
				}
			}
			
			// check deletions
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index.");
			}
			
			// check stored fields
			for (int i = 0; i < index1.MaxDoc; i++)
			{
				if (!index1.IsDeleted(i))
				{
					Document doc1 = index1.Document(i);
					Document doc2 = index2.Document(i);
					fields1 = doc1.GetFields();
					fields2 = doc2.GetFields();
					Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + ".");
					it1 = fields1.GetEnumerator();
					it2 = fields2.GetEnumerator();
					while (it1.MoveNext() && it2.MoveNext())
					{
						Field curField1 = (Field) it1.Current;
						Field curField2 = (Field) it2.Current;
						Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + ".");
						Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + ".");
					}
				}
			}
			
			// check dictionary and posting lists
			TermEnum enum1 = index1.Terms();
			TermEnum enum2 = index2.Terms();
			TermPositions tp1 = index1.TermPositions();
			TermPositions tp2 = index2.TermPositions();
			while (enum1.Next())
			{
				Assert.IsTrue(enum2.Next());
				Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary.");
				tp1.Seek(enum1.Term);
				tp2.Seek(enum1.Term);
				while (tp1.Next())
				{
					Assert.IsTrue(tp2.Next());
					Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + ".");
					Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + ".");
					for (int i = 0; i < tp1.Freq; i++)
					{
						Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + ".");
					}
				}
			}
		}
Example #19
0
 public override TermPositions TermPositions()
 {
     return(in_Renamed.TermPositions());
 }
Example #20
0
 public override TermPositions TermPositions(IState state)
 {
     EnsureOpen();
     return(in_Renamed.TermPositions(state));
 }