public override void Load()
            {
                TermPositions tp = null;

                byte[] payloadBuffer = new byte[4]; // four bytes for an int
                try
                {
                    tp = _reader.TermPositions(_sizeTerm);

                    if (tp == null)
                    {
                        return;
                    }

                    while (tp.Next())
                    {
                        if (tp.Freq > 0)
                        {
                            tp.NextPosition();
                            tp.GetPayload(payloadBuffer, 0);
                            int len = BytesToInt(payloadBuffer);
                            Allocate(tp.Doc, Math.Min(len, _maxItems), true);
                        }
                    }
                }
                finally
                {
                    if (tp != null)
                    {
                        tp.Dispose();
                    }
                }
            }
Ejemplo n.º 2
0
        protected virtual void LoadPayload(Term term)
        {
            byte[]        payloadBuf = null;
            TermPositions tp         = _reader.TermPositions();

            tp.Seek(term);
            while (tp.Next())
            {
                if (tp.Freq > 0)
                {
                    tp.NextPosition();
                    if (tp.IsPayloadAvailable)
                    {
                        int len = tp.PayloadLength;
                        payloadBuf = tp.GetPayload(payloadBuf, 0);
                        Add(tp.Doc, payloadBuf, len);
                    }
                }
            }

            // save the last page

            while (_curSlot < MAX_SLOTS)
            {
                _curPage[_curSlot++] = MISSING;
            }
            _list[_curPageNo] = CopyPage(new int[_curData]); // optimize the page to make getNumItems work
            _curPage          = null;
        }
Ejemplo n.º 3
0
        // TODO: Remove warning after API has been finalized

        public override ICollection <byte[]> GetPayload()
        {
            byte[] bytes = new byte[internalPositions.PayloadLength];
            bytes = internalPositions.GetPayload(bytes, 0);
            var val = new System.Collections.Generic.List <byte[]>();

            val.Add(bytes);
            return(val);
        }
Ejemplo n.º 4
0
        public virtual void  CheckSkipTo(TermPositions tp, int target, int maxCounter)
        {
            tp.SkipTo(target);
            Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter);

            Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target);
            Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
            tp.NextPosition();
            byte[] b = new byte[1];
            tp.GetPayload(b, 0);
            Assert.AreEqual((byte)target, b[0], "Wrong payload for the target " + target + ": " + b[0]);
        }
Ejemplo n.º 5
0
 protected internal virtual void  ProcessPayload(Similarity similarity)
 {
     if (positions.IsPayloadAvailable)
     {
         payload      = positions.GetPayload(payload, 0);
         payloadScore = Enclosing_Instance.Enclosing_Instance.function.CurrentScore(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, spans.Start(), spans.End(), payloadsSeen, payloadScore, similarity.ScorePayload(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, spans.Start(), spans.End(), payload, 0, positions.PayloadLength));
         payloadsSeen++;
     }
     else
     {
         // zero out the payload?
     }
 }
Ejemplo n.º 6
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
        {
            FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text);
            int df = 0;

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);

                while (postings.Next())
                {
                    df++;
                    int doc = postings.Doc;
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    int freq = postings.Freq;
                    FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);

                    if (!omitTermFreqAndPositions)
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            int position      = postings.NextPosition();
                            int payloadLength = postings.PayloadLength;
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                            }
                            posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
                        }
                        posConsumer.Finish();
                    }
                }
            }
            docConsumer.Finish();

            return(df);
        }
Ejemplo n.º 7
0
        public virtual void  TestThreadSafety()
        {
            rnd = NewRandom();
            int           numThreads = 5;
            int           numDocs    = 50;
            ByteArrayPool pool       = new ByteArrayPool(numThreads, 5);

            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);

            System.String field = "test";

            ThreadClass[] ingesters = new ThreadClass[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this);
                ingesters[i].Start();
            }

            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i].Join();
            }
            writer.Close();
            IndexReader reader = IndexReader.Open(dir, true, null);
            TermEnum    terms  = reader.Terms(null);

            while (terms.Next(null))
            {
                TermPositions tp = reader.TermPositions(terms.Term, null);
                while (tp.Next(null))
                {
                    int freq = tp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        tp.NextPosition(null);
                        Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0, null)), terms.Term.Text);
                    }
                }
                tp.Close();
            }
            terms.Close();
            reader.Close();

            Assert.AreEqual(pool.Size(), numThreads);
        }
Ejemplo n.º 8
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void  PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);

            // should be in sync with value in TermInfosWriter
            int skipInterval = 16;

            int numTerms = 5;

            System.String fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text);
                sb.Append(" ");
            }
            System.String content = sb.ToString();


            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;

            byte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d, null);
            }

            // make sure we create more than one segment to test merging
            writer.Commit(null);

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d, null);
            }

            writer.Optimize(null);
            // flush
            writer.Close();


            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = IndexReader.Open(dir, true, null);

            byte[] verifyPayloadData = new byte[payloadDataLength];
            offset = 0;
            TermPositions[] tps = new TermPositions[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = reader.TermPositions(terms[i], null);
            }

            while (tps[0].Next(null))
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].Next(null);
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition(null);
                        tps[j].GetPayload(verifyPayloadData, offset, null);
                        offset += tps[j].PayloadLength;
                    }
                }
            }

            for (int i = 0; i < numTerms; i++)
            {
                tps[i].Close();
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            TermPositions tp = reader.TermPositions(terms[0], null);

            tp.Next(null);
            tp.NextPosition(null);
            // now we don't read this payload
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            byte[] payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[numTerms]);
            tp.NextPosition(null);

            // we don't read this payload and skip to a different document
            tp.SkipTo(5, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[5 * numTerms]);


            /*
             * Test different lengths at skip points
             */
            tp.Seek(terms[1], null);
            tp.Next(null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(2 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(3 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length.");

            /*
             * Test multiple call of getPayload()
             */
            tp.GetPayload(null, 0, null);

            // it is forbidden to call getPayload() more than once
            // without calling nextPosition()
            Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown");

            reader.Close();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            System.String singleTerm = "lucene";

            d = new Document();
            d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d, null);


            writer.Optimize(null);
            // flush
            writer.Close();

            reader = IndexReader.Open(dir, true, null);
            tp     = reader.TermPositions(new Term(fieldName, singleTerm), null);
            tp.Next(null);
            tp.NextPosition(null);

            verifyPayloadData = new byte[tp.PayloadLength];
            tp.GetPayload(verifyPayloadData, 0, null);
            byte[] portion = new byte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, verifyPayloadData);
            reader.Close();
        }
Ejemplo n.º 9
0
		public virtual void  CheckSkipTo(TermPositions tp, int target, int maxCounter)
		{
			tp.SkipTo(target);
			if (maxCounter < counter)
			{
				Assert.Fail("Too many bytes read: " + counter);
			}
			
			Assert.AreEqual(target, tp.Doc(), "Wrong document " + tp.Doc() + " after skipTo target " + target);
			Assert.AreEqual(1, tp.Freq(), "Frequency is not 1: " + tp.Freq());
			tp.NextPosition();
			byte[] b = new byte[1];
			tp.GetPayload(b, 0);
			Assert.AreEqual((byte) target, b[0], "Wrong payload for the target " + target + ": " + b[0]);
		}
Ejemplo n.º 10
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0;        // number of docs w/ term

            skipListWriter.ResetSkip();
            bool storePayloads     = fieldInfos.FieldInfo(smis[0].term.field).storePayloads;
            int  lastPayloadLength = -1;             // ensures that we write the first length

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc];                   // map around deletions
                    }
                    doc += base_Renamed;                     // convert to merged space

                    if (doc < 0 || (df > 0 && doc <= lastDoc))
                    {
                        throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, storePayloads, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    int docCode = (doc - lastDoc) << 1;                     // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1);                         // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode);                      // write doc
                        freqOutput.WriteVInt(freq);                         // write frequency in doc
                    }

                    /** See {@link DocumentWriter#writePostings(Posting[], String) for
                     *  documentation about the encoding of positions and payloads
                     */
                    int lastPosition = 0;                     // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        int delta    = position - lastPosition;
                        if (storePayloads)
                        {
                            int payloadLength = postings.GetPayloadLength();
                            if (payloadLength == lastPayloadLength)
                            {
                                proxOutput.WriteVInt(delta * 2);
                            }
                            else
                            {
                                proxOutput.WriteVInt(delta * 2 + 1);
                                proxOutput.WriteVInt(payloadLength);
                                lastPayloadLength = payloadLength;
                            }
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                                proxOutput.WriteBytes(payloadBuffer, 0, payloadLength);
                            }
                        }
                        else
                        {
                            proxOutput.WriteVInt(delta);
                        }
                        lastPosition = position;
                    }
                }
            }
            return(df);
        }