internal bool SkipTo(int target)
 {
     if (!tp.SkipTo(target))
     {
         tp.Close();                  // close stream
         doc = System.Int32.MaxValue; // sentinel value
         return(false);
     }
     doc      = tp.Doc;
     position = 0;
     return(true);
 }
		public virtual void  CheckSkipTo(TermPositions tp, int target, int maxCounter)
		{
			tp.SkipTo(target);
		    Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter);
			
			Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target);
			Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
			tp.NextPosition();
			byte[] b = new byte[1];
			tp.GetPayload(b, 0);
			Assert.AreEqual((byte) target, b[0], "Wrong payload for the target " + target + ": " + b[0]);
		}
        public virtual void  CheckSkipTo(TermPositions tp, int target, int maxCounter)
        {
            tp.SkipTo(target);
            Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter);

            Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target);
            Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
            tp.NextPosition();
            byte[] b = new byte[1];
            tp.GetPayload(b, 0);
            Assert.AreEqual((byte)target, b[0], "Wrong payload for the target " + target + ": " + b[0]);
        }
Example #4
0
 public bool SkipTo(int target)
 {
     while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc())
     {
         TermPositions tp = (TermPositions)_termPositionsQueue.Pop();
         if (tp.SkipTo(target))
         {
             _termPositionsQueue.Put(tp);
         }
         else
         {
             tp.Close();
         }
     }
     return(Next());
 }
Example #5
0
 public bool SkipTo(int target, IState state)
 {
     while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc)
     {
         TermPositions tp = _termPositionsQueue.Pop();
         if (tp.SkipTo(target, state))
         {
             _termPositionsQueue.Add(tp);
         }
         else
         {
             tp.Close();
         }
     }
     return(Next(state));
 }
Example #6
0
        public override bool SkipTo(int target)
        {
            if (!internalPositions.SkipTo(target))
            {
                internalDoc = int.MaxValue;
                return(false);
            }

            internalDoc = internalPositions.Doc;
            freq        = internalPositions.Freq;
            count       = 0;

            position = internalPositions.NextPosition();
            count++;

            return(true);
        }
Example #7
0
        public override int FetchDoc(int targetDoc)
        {
            if (targetDoc <= _curDoc)
            {
                targetDoc = _curDoc + 1;
            }

            if (_tp.SkipTo(targetDoc))
            {
                _curDoc  = _tp.Doc;
                _posLeft = _tp.Freq;
                _curSec  = -1;
                _curPos  = -1;
                return(_curDoc);
            }
            else
            {
                _curDoc = DocIdSetIterator.NO_MORE_DOCS;
                _tp.Dispose();
                return(_curDoc);
            }
        }
Example #8
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void  PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);

            // should be in sync with value in TermInfosWriter
            int skipInterval = 16;

            int numTerms = 5;

            System.String fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text);
                sb.Append(" ");
            }
            System.String content = sb.ToString();


            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;

            byte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d, null);
            }

            // make sure we create more than one segment to test merging
            writer.Commit(null);

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d, null);
            }

            writer.Optimize(null);
            // flush
            writer.Close();


            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = IndexReader.Open(dir, true, null);

            byte[] verifyPayloadData = new byte[payloadDataLength];
            offset = 0;
            TermPositions[] tps = new TermPositions[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = reader.TermPositions(terms[i], null);
            }

            while (tps[0].Next(null))
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].Next(null);
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition(null);
                        tps[j].GetPayload(verifyPayloadData, offset, null);
                        offset += tps[j].PayloadLength;
                    }
                }
            }

            for (int i = 0; i < numTerms; i++)
            {
                tps[i].Close();
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            TermPositions tp = reader.TermPositions(terms[0], null);

            tp.Next(null);
            tp.NextPosition(null);
            // now we don't read this payload
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            byte[] payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[numTerms]);
            tp.NextPosition(null);

            // we don't read this payload and skip to a different document
            tp.SkipTo(5, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[5 * numTerms]);


            /*
             * Test different lengths at skip points
             */
            tp.Seek(terms[1], null);
            tp.Next(null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(2 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(3 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length.");

            /*
             * Test multiple call of getPayload()
             */
            tp.GetPayload(null, 0, null);

            // it is forbidden to call getPayload() more than once
            // without calling nextPosition()
            Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown");

            reader.Close();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            System.String singleTerm = "lucene";

            d = new Document();
            d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d, null);


            writer.Optimize(null);
            // flush
            writer.Close();

            reader = IndexReader.Open(dir, true, null);
            tp     = reader.TermPositions(new Term(fieldName, singleTerm), null);
            tp.Next(null);
            tp.NextPosition(null);

            verifyPayloadData = new byte[tp.PayloadLength];
            tp.GetPayload(verifyPayloadData, 0, null);
            byte[] portion = new byte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, verifyPayloadData);
            reader.Close();
        }
Example #9
0
        void BeginAsyncReconstruction(int docNum, Document document, Hashtable doc)
        {
            // get stored fields
            ArrayList sf = new ArrayList();

            for (int i = 0; i < _indexFields.Length; i++)
            {
                Field[] f = document.GetFields(_indexFields[i]);
                if (f == null || f.Length == 0 || !f[0].IsStored())
                {
                    continue;
                }
                StringBuilder sb = new StringBuilder();
                for (int k = 0; k < f.Length; k++)
                {
                    if (k > 0)
                    {
                        sb.Append('\n');
                    }
                    sb.Append(f[k].StringValue());
                }
                Field field = Legacy.CreateField(_indexFields[i], sb.ToString(), f[0].IsStored(), f[0].IsIndexed(), f[0].IsTokenized(), f[0].IsTermVectorStored());
                field.SetBoost(f[0].GetBoost());
                doc[_indexFields[i]] = field;
                sf.Add(_indexFields[i]);
            }
            String term = null;
            GrowableStringArray terms = null;

            try
            {
                int i     = 0;
                int delta = (int)Math.Ceiling(((double)_numTerms / 100));

                TermEnum      te = _luke.IndexReader.Terms();
                TermPositions tp = _luke.IndexReader.TermPositions();
                while (te.Next())
                {
                    if ((i++ % delta) == 0)
                    {
                        // update UI - async
                        UpdateProgress(i / delta);
                    }

                    // skip stored fields
                    if (sf.Contains(te.Term().Field()))
                    {
                        continue;
                    }
                    tp.Seek(te.Term());
                    if (!tp.SkipTo(docNum) || tp.Doc() != docNum)
                    {
                        // this term is not found in the doc
                        continue;
                    }
                    term  = te.Term().Text();
                    terms = (GrowableStringArray)doc[te.Term().Field()];
                    if (terms == null)
                    {
                        terms = new GrowableStringArray();
                        doc[te.Term().Field()] = terms;
                    }
                    for (int k = 0; k < tp.Freq(); k++)
                    {
                        int pos = tp.NextPosition();
                        terms.Set(pos, term);
                    }
                }
            }
            catch (Exception exc)
            {
                // Update UI - async
                _luke.ShowStatus(exc.Message);
            }
        }