Ejemplo n.º 1
0
            public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
            {
                if (Enclosing_Instance.terms.Count == 0)
                {
                    // optimize zero-term case
                    return(null);
                }

                TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
                for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
                {
                    TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]);
                    if (p == null)
                    {
                        return(null);
                    }
                    tps[i] = p;
                }

                if (Enclosing_Instance.slop == 0)
                {
                    // optimize exact case
                    return(new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)));
                }
                else
                {
                    return(new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)));
                }
            }
Ejemplo n.º 2
0
        public virtual void  TestPreAnalyzedField()
        {
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO));

            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(info);

            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));

            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term2"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term3"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
        }
        public virtual void  TestTerms()
        {
            TermEnum terms = _reader.Terms();

            Assert.IsTrue(terms != null);
            while (terms.Next() == true)
            {
                Term term = terms.Term;
                Assert.IsTrue(term != null);
                //System.out.println("Term: " + term);
                System.String fieldValue = (System.String)DocHelper.NameValues[term.Field];
                Assert.IsTrue(fieldValue.IndexOf(term.Text) != -1);
            }

            TermDocs termDocs = _reader.TermDocs();

            Assert.IsTrue(termDocs != null);
            termDocs.Seek(new Term(DocHelper.TextField1Key, "field"));
            Assert.IsTrue(termDocs.Next() == true);

            termDocs.Seek(new Term(DocHelper.NoNormsKey, DocHelper.NoNormsText));
            Assert.IsTrue(termDocs.Next() == true);


            TermPositions positions = _reader.TermPositions();

            positions.Seek(new Term(DocHelper.TextField1Key, "field"));
            Assert.IsTrue(positions != null);
            Assert.IsTrue(positions.Doc == 0);
            Assert.IsTrue(positions.NextPosition() >= 0);
        }
            public override void Load()
            {
                TermPositions tp = null;

                byte[] payloadBuffer = new byte[4]; // four bytes for an int
                try
                {
                    tp = _reader.TermPositions(_sizeTerm);

                    if (tp == null)
                    {
                        return;
                    }

                    while (tp.Next())
                    {
                        if (tp.Freq > 0)
                        {
                            tp.NextPosition();
                            tp.GetPayload(payloadBuffer, 0);
                            int len = BytesToInt(payloadBuffer);
                            Allocate(tp.Doc, Math.Min(len, _maxItems), true);
                        }
                    }
                }
                finally
                {
                    if (tp != null)
                    {
                        tp.Dispose();
                    }
                }
            }
Ejemplo n.º 5
0
        /// <summary>Returns an enumeration of all the documents which contain
        /// <code>term</code>.  For each document, in addition to the document number
        /// and frequency of the term in that document, a list of all of the ordinal
        /// positions of the term in the document is available.  Thus, this method
        /// implements the mapping:
        ///
        /// <p><ul>
        /// Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
        /// &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
        /// pos<sub>freq-1</sub>&gt;
        /// &gt;<sup>*</sup>
        /// </ul>
        /// <p> This positional information faciliates phrase and proximity searching.
        /// <p>The enumeration is ordered by document number.  Each document number is
        /// greater than all that precede it in the enumeration.
        /// </summary>
        public virtual TermPositions TermPositions(Term term)
        {
            TermPositions termPositions = TermPositions();

            termPositions.Seek(term);
            return(termPositions);
        }
Ejemplo n.º 6
0
        internal int[] docMap = null; // maps around deleted docs

        internal SegmentMergeInfo(int b, TermEnum te, Monodoc.Lucene.Net.Index.IndexReader r)
        {
            base_Renamed = b;
            reader       = r;
            termEnum     = te;
            term         = te.Term();
            postings     = reader.TermPositions();

            // build array which maps document numbers around deletions
            if (reader.HasDeletions())
            {
                int maxDoc = reader.MaxDoc();
                docMap = new int[maxDoc];
                int j = 0;
                for (int i = 0; i < maxDoc; i++)
                {
                    if (reader.IsDeleted(i))
                    {
                        docMap[i] = -1;
                    }
                    else
                    {
                        docMap[i] = j++;
                    }
                }
            }
        }
        public virtual void  TestSeek()
        {
            Directory   directory = new RAMDirectory();
            IndexWriter writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            for (int i = 0; i < 10; i++)
            {
                Document doc = new Document();
                doc.Add(new Field(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }

            writer.Close();
            IndexReader   reader = IndexReader.Open(directory);
            TermPositions tp     = reader.TermPositions();

            tp.Seek(new Term(this.field, "b"));
            for (int i = 0; i < 10; i++)
            {
                tp.Next();
                Assert.AreEqual(tp.Doc(), i);
                Assert.AreEqual(tp.NextPosition(), 1);
            }
            tp.Seek(new Term(this.field, "a"));
            for (int i = 0; i < 10; i++)
            {
                tp.Next();
                Assert.AreEqual(tp.Doc(), i);
                Assert.AreEqual(tp.NextPosition(), 0);
            }
        }
Ejemplo n.º 8
0
        public virtual void  TestTerms()
        {
            TermEnum terms = reader.Terms();

            Assert.IsTrue(terms != null);
            while (terms.Next() == true)
            {
                Term term = terms.Term();
                Assert.IsTrue(term != null);
                //System.out.println("Term: " + term);
                System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()];
                Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1);
            }

            TermDocs termDocs = reader.TermDocs();

            Assert.IsTrue(termDocs != null);
            termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
            Assert.IsTrue(termDocs.Next() == true);

            termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT));
            Assert.IsTrue(termDocs.Next() == true);


            TermPositions positions = reader.TermPositions();

            positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
            Assert.IsTrue(positions != null);
            Assert.IsTrue(positions.Doc() == 0);
            Assert.IsTrue(positions.NextPosition() >= 0);
        }
Ejemplo n.º 9
0
        protected virtual void LoadPayload(Term term)
        {
            byte[]        payloadBuf = null;
            TermPositions tp         = _reader.TermPositions();

            tp.Seek(term);
            while (tp.Next())
            {
                if (tp.Freq > 0)
                {
                    tp.NextPosition();
                    if (tp.IsPayloadAvailable)
                    {
                        int len = tp.PayloadLength;
                        payloadBuf = tp.GetPayload(payloadBuf, 0);
                        Add(tp.Doc, payloadBuf, len);
                    }
                }
            }

            // save the last page

            while (_curSlot < MAX_SLOTS)
            {
                _curPage[_curSlot++] = MISSING;
            }
            _list[_curPageNo] = CopyPage(new int[_curData]); // optimize the page to make getNumItems work
            _curPage          = null;
        }
Ejemplo n.º 10
0
		public TermSpans(TermPositions positions, Term term)
		{
			
			this.internalPositions = positions;
			this.term = term;
			internalDoc = - 1;
		}
Ejemplo n.º 11
0
        public virtual void  TestTerms()
        {
            try
            {
                TermEnum terms = reader.Terms();
                Assert.IsTrue(terms != null);
                while (terms.Next() == true)
                {
                    Term term = terms.Term();
                    Assert.IsTrue(term != null);
                    //System.out.println("Term: " + term);
                    System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()];
                    Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1);
                }

                TermDocs termDocs = reader.TermDocs();
                Assert.IsTrue(termDocs != null);
                termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field"));
                Assert.IsTrue(termDocs.Next() == true);

                TermPositions positions = reader.TermPositions();
                positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field"));
                Assert.IsTrue(positions != null);
                Assert.IsTrue(positions.Doc() == 0);
                Assert.IsTrue(positions.NextPosition() >= 0);
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
Ejemplo n.º 12
0
		private float freq; //prhase frequency in current doc as computed by phraseFreq().
		
		internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity)
		{
			this.norms = norms;
			this.weight = weight;
			this.value_Renamed = weight.Value;
			
			// convert tps to a list of phrase positions.
			// note: phrase-position differs from term-position in that its position
			// reflects the phrase offset: pp.pos = tp.pos - offset.
			// this allows to easily identify a matching (exact) phrase 
			// when all PhrasePositions have exactly the same position.
			for (int i = 0; i < tps.Length; i++)
			{
				PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
				if (last != null)
				{
					// add next to end of list
					last.next = pp;
				}
				else
				{
					first = pp;
				}
				last = pp;
			}
			
			pq = new PhraseQueue(tps.Length); // construct empty pq
			first.doc = - 1;
		}
Ejemplo n.º 13
0
        public virtual void  TestPositionIncrementGap()
        {
            Analyzer analyzer = new AnonymousClassAnalyzer(this);

            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(info);

            TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated"));

            Assert.IsTrue(termPositions.Next());
            int freq = termPositions.Freq();

            Assert.AreEqual(2, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(502, termPositions.NextPosition());
        }
Ejemplo n.º 14
0
        public virtual void  TestTokenReuse()
        {
            Analyzer analyzer = new AnonymousClassAnalyzer1(this);

            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(info);

            TermPositions termPositions = reader.TermPositions(new Term("f1", "a"));

            Assert.IsTrue(termPositions.Next());
            int freq = termPositions.Freq();

            Assert.AreEqual(3, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(true, termPositions.IsPayloadAvailable());
            Assert.AreEqual(6, termPositions.NextPosition());
            Assert.AreEqual(false, termPositions.IsPayloadAvailable());
            Assert.AreEqual(7, termPositions.NextPosition());
            Assert.AreEqual(false, termPositions.IsPayloadAvailable());
        }
Ejemplo n.º 15
0
	    public IHttpActionResult Post([FromBody]AddNewPositionModel model)
		{
            var term = termManager.GetById(model.termId);
            ClientTermViewModel result = null;

            var product = productManager.GetById(model.productId);
            var user = userManager.GetByLogin(model.Login);

            if (user != null && user.Token == model.Token &&
                product != null && term != null)
            {
                var newPosition = new Positions()
                {
                    Amount = 1, //TODO
                    Description = product.Name,
                    ProductId = product.Id,
                    Price = product.Price,
                    OrderId = term.OrderId,
                };

                positionsManager.AddEntity(newPosition);
                

                var newTermPosition = new TermPositions()
                {
                    TermId = term.Id,
                    Amount = 1, //TODO
                    Positions = newPosition,
                    TermPositionMaterialRsps = new List<TermPositionMaterialRsp>()
                };

                term.TermPositions.Add(newTermPosition);


                //add linked material to position
                foreach (var material in product.ProductMaterialRsps.Where(o => !o.DeleteDate.HasValue))
                {
                    newTermPosition.TermPositionMaterialRsps.Add(new TermPositionMaterialRsp()
                    {
                        Amount = material.Amount,
                        MaterialId = material.MaterialId,
                        TermPositions = newTermPosition 
                    });
                }

                positionsManager.SaveChanges();



                if (term != null)
                {
                    result = TermViewModelHelper.ToModel(term, true, false);
                }

                return Ok(result);
            }

            return BadRequest();
		}
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        private int AppendPostings(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0; // number of docs w/ term

            ResetSkip();
            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.postings;
                int   base_Renamed        = smi.base_Renamed;
                int[] docMap = smi.docMap;
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    if (doc < lastDoc)
                    {
                        throw new System.SystemException("docs out of order");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        BufferSkip(lastDoc);
                    }

                    int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
                    lastDoc = doc;

                    int freq = postings.Freq();
                    if (freq == 1)
                    {
                        freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
                    }
                    else
                    {
                        freqOutput.WriteVInt(docCode); // write doc
                        freqOutput.WriteVInt(freq);    // write frequency in doc
                    }

                    int lastPosition = 0; // write position deltas
                    for (int j = 0; j < freq; j++)
                    {
                        int position = postings.NextPosition();
                        proxOutput.WriteVInt(position - lastPosition);
                        lastPosition = position;
                    }
                }
            }
            return(df);
        }
Ejemplo n.º 17
0
        public IHttpActionResult Post([FromBody] AddNewPositionModel model)
        {
            var term = termManager.GetById(model.termId);
            ClientTermViewModel result = null;

            var product = productManager.GetById(model.productId);
            var user    = userManager.GetByLogin(model.Login);

            if (user != null && user.Token == model.Token &&
                product != null && term != null)
            {
                var newPosition = new Positions()
                {
                    Amount      = 1, //TODO
                    Description = product.Name,
                    ProductId   = product.Id,
                    Price       = product.Price,
                    OrderId     = term.OrderId,
                };

                positionsManager.AddEntity(newPosition);


                var newTermPosition = new TermPositions()
                {
                    TermId    = term.Id,
                    Amount    = 1, //TODO
                    Positions = newPosition,
                    TermPositionMaterialRsps = new List <TermPositionMaterialRsp>()
                };

                term.TermPositions.Add(newTermPosition);


                //add linked material to position
                foreach (var material in product.ProductMaterialRsps.Where(o => !o.DeleteDate.HasValue))
                {
                    newTermPosition.TermPositionMaterialRsps.Add(new TermPositionMaterialRsp()
                    {
                        Amount        = material.Amount,
                        MaterialId    = material.MaterialId,
                        TermPositions = newTermPosition
                    });
                }

                positionsManager.SaveChanges();



                if (term != null)
                {
                    result = TermViewModelHelper.ToModel(term, true, false);
                }

                return(Ok(result));
            }

            return(BadRequest());
        }
Ejemplo n.º 18
0
 internal TermPositions GetPositions()
 {
     if (postings == null)
     {
         postings = reader.TermPositions();
     }
     return(postings);
 }
Ejemplo n.º 19
0
 internal TermPositions GetPositions(IState state)
 {
     if (postings == null)
     {
         postings = reader.TermPositions(state);
     }
     return(postings);
 }
		internal TermPositions GetPositions()
		{
			if (postings == null)
			{
				postings = reader.TermPositions();
			}
			return postings;
		}
Ejemplo n.º 21
0
        /// <summary>Returns an enumeration of all the documents which contain
        /// <code>term</code>.  For each document, in addition to the document number
        /// and frequency of the term in that document, a list of all of the ordinal
        /// positions of the term in the document is available.  Thus, this method
        /// implements the mapping:
        ///
        /// <p><ul>
        /// Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
        /// &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
        /// pos<sub>freq-1</sub>&gt;
        /// &gt;<sup>*</sup>
        /// </ul>
        /// <p> This positional information facilitates phrase and proximity searching.
        /// <p>The enumeration is ordered by document number.  Each document number is
        /// greater than all that precede it in the enumeration.
        /// </summary>
        /// <throws>  IOException if there is a low-level IO error </throws>
        public virtual TermPositions TermPositions(Term term)
        {
            EnsureOpen();
            TermPositions termPositions = TermPositions();

            termPositions.Seek(term);
            return(termPositions);
        }
        public virtual void  TestFilterIndexReader_Renamed()
        {
            RAMDirectory directory = new MockRAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document d1 = new Document();

            d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            Assert.IsTrue(reader.IsOptimized());

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            int NUM_DOCS = 3;

            TermDocs td = reader.TermDocs(null);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(td.Next());
                Assert.AreEqual(i, td.Doc());
                Assert.AreEqual(1, td.Freq());
            }
            td.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 23
0
		public virtual void  CheckSkipTo(TermPositions tp, int target, int maxCounter)
		{
			tp.SkipTo(target);
		    Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter);
			
			Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target);
			Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
			tp.NextPosition();
			byte[] b = new byte[1];
			tp.GetPayload(b, 0);
			Assert.AreEqual((byte) target, b[0], "Wrong payload for the target " + target + ": " + b[0]);
		}
Ejemplo n.º 24
0
        public virtual void  CheckSkipTo(TermPositions tp, int target, int maxCounter)
        {
            tp.SkipTo(target);
            Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter);

            Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target);
            Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
            tp.NextPosition();
            byte[] b = new byte[1];
            tp.GetPayload(b, 0);
            Assert.AreEqual((byte)target, b[0], "Wrong payload for the target " + target + ": " + b[0]);
        }
Ejemplo n.º 25
0
        /// <summary>Process postings from multiple segments all positioned on the
        /// same term. Writes out merged entries into freqOutput and
        /// the proxOutput streams.
        ///
        /// </summary>
        /// <param name="smis">array of segments
        /// </param>
        /// <param name="n">number of cells in the array actually occupied
        /// </param>
        /// <returns> number of documents across all segments where this term was found
        /// </returns>
        /// <throws>  CorruptIndexException if the index is corrupt </throws>
        /// <throws>  IOException if there is a low-level IO error </throws>
        private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
        {
            FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text);
            int df = 0;

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);

                while (postings.Next())
                {
                    df++;
                    int doc = postings.Doc;
                    if (docMap != null)
                    {
                        doc = docMap[doc]; // map around deletions
                    }
                    doc += base_Renamed;   // convert to merged space

                    int freq = postings.Freq;
                    FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq);

                    if (!omitTermFreqAndPositions)
                    {
                        for (int j = 0; j < freq; j++)
                        {
                            int position      = postings.NextPosition();
                            int payloadLength = postings.PayloadLength;
                            if (payloadLength > 0)
                            {
                                if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
                                {
                                    payloadBuffer = new byte[payloadLength];
                                }
                                postings.GetPayload(payloadBuffer, 0);
                            }
                            posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
                        }
                        posConsumer.Finish();
                    }
                }
            }
            docConsumer.Finish();

            return(df);
        }
Ejemplo n.º 26
0
            internal TermPositionsQueue(System.Collections.IList termPositions)
            {
                Initialize(termPositions.Count);

                System.Collections.IEnumerator i = termPositions.GetEnumerator();
                while (i.MoveNext())
                {
                    TermPositions tp = (TermPositions)i.Current;
                    if (tp.Next())
                    {
                        Put(tp);
                    }
                }
            }
Ejemplo n.º 27
0
 public bool SkipTo(int target, IState state)
 {
     while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc)
     {
         TermPositions tp = _termPositionsQueue.Pop();
         if (tp.SkipTo(target, state))
         {
             _termPositionsQueue.Add(tp);
         }
         else
         {
             tp.Close();
         }
     }
     return(Next(state));
 }
Ejemplo n.º 28
0
 public bool SkipTo(int target)
 {
     while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc())
     {
         TermPositions tp = (TermPositions)_termPositionsQueue.Pop();
         if (tp.SkipTo(target))
         {
             _termPositionsQueue.Put(tp);
         }
         else
         {
             tp.Close();
         }
     }
     return(Next());
 }
Ejemplo n.º 29
0
        /// <summary>
        /// Process postings from multiple segments without tf, all positioned on the same term.
        /// Writes out merged entries only into freqOutput, proxOut is not written.
        /// </summary>
        /// <param name="smis">smis array of segments</param>
        /// <param name="n">number of cells in the array actually occupied</param>
        /// <returns></returns>
        private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n)
        {
            int lastDoc = 0;
            int df      = 0;      // number of docs w/ term

            skipListWriter.ResetSkip();
            int lastPayloadLength = -1;   // ensures that we write the first length

            for (int i = 0; i < n; i++)
            {
                SegmentMergeInfo smi      = smis[i];
                TermPositions    postings = smi.GetPositions();
                System.Diagnostics.Debug.Assert(postings != null);
                int   base_Renamed = smi.base_Renamed;
                int[] docMap       = smi.GetDocMap();
                postings.Seek(smi.termEnum);
                while (postings.Next())
                {
                    int doc = postings.Doc();
                    if (docMap != null)
                    {
                        doc = docMap[doc];                      // map around deletions
                    }
                    doc += base_Renamed;                        // convert to merged space

                    if (doc < 0 || (df > 0 && doc <= lastDoc))
                    {
                        throw new CorruptIndexException("docs out of order (" + doc +
                                                        " <= " + lastDoc + " )");
                    }

                    df++;

                    if ((df % skipInterval) == 0)
                    {
                        skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength);
                        skipListWriter.BufferSkip(df);
                    }

                    int docCode = (doc - lastDoc);
                    lastDoc = doc;
                    freqOutput.WriteVInt(docCode);    // write doc & freq=1
                }
            }
            return(df);
        }
Ejemplo n.º 30
0
        public virtual void  TestThreadSafety()
        {
            rnd = NewRandom();
            int           numThreads = 5;
            int           numDocs    = 50;
            ByteArrayPool pool       = new ByteArrayPool(numThreads, 5);

            Directory   dir    = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);

            System.String field = "test";

            ThreadClass[] ingesters = new ThreadClass[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this);
                ingesters[i].Start();
            }

            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i].Join();
            }
            writer.Close();
            IndexReader reader = IndexReader.Open(dir, true, null);
            TermEnum    terms  = reader.Terms(null);

            while (terms.Next(null))
            {
                TermPositions tp = reader.TermPositions(terms.Term, null);
                while (tp.Next(null))
                {
                    int freq = tp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        tp.NextPosition(null);
                        Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0, null)), terms.Term.Text);
                    }
                }
                tp.Close();
            }
            terms.Close();
            reader.Close();

            Assert.AreEqual(pool.Size(), numThreads);
        }
Ejemplo n.º 31
0
        private void  PrintSegment(System.IO.StringWriter out_Renamed, System.String segment)
        {
            Directory     directory = FSDirectory.GetDirectory(indexDir, false);
            SegmentReader reader    = new SegmentReader(new SegmentInfo(segment, 1, directory));

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                out_Renamed.Write(tis.Term());
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        out_Renamed.Write(" doc=" + positions.Doc());
                        out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition());
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 32
0
        internal static void  PrintSegment(System.String segment)
        {
            Directory     directory = FSDirectory.GetDirectory("test", false);
            SegmentReader reader    = new SegmentReader(new SegmentInfo(segment, 1, directory));

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                System.Console.Out.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                System.Console.Out.Write(tis.Term());
                System.Console.Out.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        System.Console.Out.Write(" doc=" + positions.Doc());
                        System.Console.Out.Write(" TF=" + positions.Freq());
                        System.Console.Out.Write(" pos=");
                        System.Console.Out.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            System.Console.Out.Write("," + positions.NextPosition());
                        }
                        System.Console.Out.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
            directory.Close();
        }
Ejemplo n.º 33
0
        public virtual void  TestFilterIndexReader_()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true);

            Document d1 = new Document();

            d1.Add(Field.Text("default", "one two"));
            writer.AddDocument(d1);

            Document d2 = new Document();

            d2.Add(Field.Text("default", "one three"));
            writer.AddDocument(d2);

            Document d3 = new Document();

            d3.Add(Field.Text("default", "two four"));
            writer.AddDocument(d3);

            writer.Close();

            IndexReader reader = new TestReader(IndexReader.Open(directory));

            TermEnum terms = reader.Terms();

            while (terms.Next())
            {
                Assert.IsTrue(terms.Term().Text().IndexOf((System.Char) 'e') != -1);
            }
            terms.Close();

            TermPositions positions = reader.TermPositions(new Term("default", "one"));

            while (positions.Next())
            {
                Assert.IsTrue((positions.Doc() % 2) == 1);
            }

            reader.Close();
        }
Ejemplo n.º 34
0
            public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
            {
                if (Enclosing_Instance.termArrays.Count == 0)
                {
                    // optimize zero-term case
                    return(null);
                }

                TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
                for (int i = 0; i < tps.Length; i++)
                {
                    Term[] terms = Enclosing_Instance.termArrays[i];

                    TermPositions p;
                    if (terms.Length > 1)
                    {
                        p = new MultipleTermPositions(reader, terms);
                    }
                    else
                    {
                        p = reader.TermPositions(terms[0]);
                    }

                    if (p == null)
                    {
                        return(null);
                    }

                    tps[i] = p;
                }

                if (Enclosing_Instance.slop == 0)
                {
                    return(new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)));
                }
                else
                {
                    return(new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)));
                }
            }
Ejemplo n.º 35
0
        private void  PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si)
        {
            SegmentReader reader = SegmentReader.Get(si);

            for (int i = 0; i < reader.NumDocs(); i++)
            {
                out_Renamed.WriteLine(reader.Document(i));
            }

            TermEnum tis = reader.Terms();

            while (tis.Next())
            {
                out_Renamed.Write(tis.Term());
                out_Renamed.WriteLine(" DF=" + tis.DocFreq());

                TermPositions positions = reader.TermPositions(tis.Term());
                try
                {
                    while (positions.Next())
                    {
                        out_Renamed.Write(" doc=" + positions.Doc());
                        out_Renamed.Write(" TF=" + positions.Freq());
                        out_Renamed.Write(" pos=");
                        out_Renamed.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            out_Renamed.Write("," + positions.NextPosition());
                        }
                        out_Renamed.WriteLine("");
                    }
                }
                finally
                {
                    positions.Close();
                }
            }
            tis.Close();
            reader.Close();
        }
Ejemplo n.º 36
0
		internal PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) : base(similarity)
		{
			this.norms = norms;
			this.weight = weight;
			this.value_Renamed = weight.GetValue();
			
			// convert tps to a list
			for (int i = 0; i < tps.Length; i++)
			{
				PhrasePositions pp = new PhrasePositions(tps[i], positions[i]);
				if (last != null)
				{
					// add next to end of list
					last.next = pp;
				}
				else
					first = pp;
				last = pp;
			}
			
			pq = new PhraseQueue(tps.Length); // construct empty pq
		}
Ejemplo n.º 37
0
		internal int[] docMap = null; // maps around deleted docs
		
		internal SegmentMergeInfo(int b, TermEnum te, Monodoc.Lucene.Net.Index.IndexReader r)
		{
			base_Renamed = b;
			reader = r;
			termEnum = te;
			term = te.Term();
			postings = reader.TermPositions();
			
			// build array which maps document numbers around deletions 
			if (reader.HasDeletions())
			{
				int maxDoc = reader.MaxDoc();
				docMap = new int[maxDoc];
				int j = 0;
				for (int i = 0; i < maxDoc; i++)
				{
					if (reader.IsDeleted(i))
						docMap[i] = - 1;
					else
						docMap[i] = j++;
				}
			}
		}
Ejemplo n.º 38
0
			public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
			{
				if (Enclosing_Instance.terms.Count == 0)
				// optimize zero-term case
					return null;
				
				TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
				for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
				{
					TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]);
					if (p == null)
						return null;
					tps[i] = p;
				}
				
				if (Enclosing_Instance.slop == 0)
				// optimize exact case
					return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
				else
					return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
			}
Ejemplo n.º 39
0
		internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, int slop, byte[] norms):base(weight, tps, offsets, similarity, norms)
		{
			this.slop = slop;
		}
 internal ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms)
     : base(weight, tps, positions, similarity, norms)
 {
 }
Ejemplo n.º 41
0
				public TestTermPositions(TermPositions in_Renamed):base(in_Renamed)
				{
				}
 public AbstractTerminalNode(Term term, IndexReader reader)
 {
     _tp = reader.TermPositions();
     _tp.Seek(term);
     _posLeft = 0;
 }
Ejemplo n.º 43
0
			public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer)
			{
				if (Enclosing_Instance.termArrays.Count == 0)
				// optimize zero-term case
					return null;
				
				TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
				for (int i = 0; i < tps.Length; i++)
				{
					Term[] terms = Enclosing_Instance.termArrays[i];
					
					TermPositions p;
					if (terms.Length > 1)
						p = new MultipleTermPositions(reader, terms);
					else
						p = reader.TermPositions(terms[0]);
					
					if (p == null)
						return null;
					
					tps[i] = p;
				}
				
				if (Enclosing_Instance.slop == 0)
					return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
				else
					return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
			}
			public FilterTermPositions(TermPositions in_Renamed):base(in_Renamed)
			{
			}
Ejemplo n.º 45
0
		// builds an index with payloads in the given Directory and performs
		// different tests to verify the payload encoding
		private void  PerformTest(Directory dir)
		{
			PayloadAnalyzer analyzer = new PayloadAnalyzer();
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			// should be in sync with value in TermInfosWriter
			int skipInterval = 16;
			
			int numTerms = 5;
			System.String fieldName = "f1";
			
			int numDocs = skipInterval + 1;
			// create content for the test documents with just a few terms
			Term[] terms = GenerateTerms(fieldName, numTerms);
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			for (int i = 0; i < terms.Length; i++)
			{
				sb.Append(terms[i].text_ForNUnit);
				sb.Append(" ");
			}
			System.String content = sb.ToString();
			
			
			int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
			byte[] payloadData = GenerateRandomData(payloadDataLength);
			
			Document d = new Document();
			d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
			// add the same document multiple times to have the same payload lengths for all
			// occurrences within two consecutive skip intervals
			int offset = 0;
			for (int i = 0; i < 2 * numDocs; i++)
			{
				analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
				offset += numTerms;
				writer.AddDocument(d);
			}
			
			// make sure we create more than one segment to test merging
			writer.Flush();
			
			// now we make sure to have different payload lengths next at the next skip point        
			for (int i = 0; i < numDocs; i++)
			{
				analyzer.SetPayloadData(fieldName, payloadData, offset, i);
				offset += i * numTerms;
				writer.AddDocument(d);
			}
			
			writer.Optimize();
			// flush
			writer.Close();
			
			
			/*
			* Verify the index
			* first we test if all payloads are stored correctly
			*/
			IndexReader reader = IndexReader.Open(dir);
			
			byte[] verifyPayloadData = new byte[payloadDataLength];
			offset = 0;
			TermPositions[] tps = new TermPositions[numTerms];
			for (int i = 0; i < numTerms; i++)
			{
				tps[i] = reader.TermPositions(terms[i]);
			}
			
			while (tps[0].Next())
			{
				for (int i = 1; i < numTerms; i++)
				{
					tps[i].Next();
				}
				int freq = tps[0].Freq();
				
				for (int i = 0; i < freq; i++)
				{
					for (int j = 0; j < numTerms; j++)
					{
						tps[j].NextPosition();
						tps[j].GetPayload(verifyPayloadData, offset);
						offset += tps[j].GetPayloadLength();
					}
				}
			}
			
			for (int i = 0; i < numTerms; i++)
			{
				tps[i].Close();
			}
			
			AssertByteArrayEquals(payloadData, verifyPayloadData);
			
			/*
			*  test lazy skipping
			*/
			TermPositions tp = reader.TermPositions(terms[0]);
			tp.Next();
			tp.NextPosition();
			// now we don't read this payload
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			byte[] payload = tp.GetPayload(null, 0);
			Assert.AreEqual(payload[0], payloadData[numTerms]);
			tp.NextPosition();
			
			// we don't read this payload and skip to a different document
			tp.SkipTo(5);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			payload = tp.GetPayload(null, 0);
			Assert.AreEqual(payload[0], payloadData[5 * numTerms]);
			
			
			/*
			* Test different lengths at skip points
			*/
			tp.Seek(terms[1]);
			tp.Next();
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(2 * skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(3 * skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayloadLength(), "Wrong payload length.");
			
			/*
			* Test multiple call of getPayload()
			*/
			tp.GetPayload(null, 0);
			try
			{
				// it is forbidden to call getPayload() more than once
				// without calling nextPosition()
				tp.GetPayload(null, 0);
				Assert.Fail("Expected exception not thrown");
			}
			catch (System.Exception expected)
			{
				// expected exception
			}
			
			reader.Close();
			
			// test long payload
			analyzer = new PayloadAnalyzer();
			writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			System.String singleTerm = "lucene";
			
			d = new Document();
			d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
			// add a payload whose length is greater than the buffer size of BufferedIndexOutput
			payloadData = GenerateRandomData(2000);
			analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
			writer.AddDocument(d);
			
			
			writer.Optimize();
			// flush
			writer.Close();
			
			reader = IndexReader.Open(dir);
			tp = reader.TermPositions(new Term(fieldName, singleTerm));
			tp.Next();
			tp.NextPosition();
			
			verifyPayloadData = new byte[tp.GetPayloadLength()];
			tp.GetPayload(verifyPayloadData, 0);
			byte[] portion = new byte[1500];
			Array.Copy(payloadData, 100, portion, 0, 1500);
			
			AssertByteArrayEquals(portion, verifyPayloadData);
			reader.Close();
		}
Ejemplo n.º 46
0
		internal PhrasePositions next; // used to make lists
		
		internal PhrasePositions(TermPositions t, int o)
		{
			tp = t;
			offset = o;
		}
Ejemplo n.º 47
0
				public PayloadTermSpanScorer(PayloadTermWeight enclosingInstance, TermSpans spans, Weight weight, Similarity similarity, byte[] norms):base(spans, weight, similarity, norms)
				{
					InitBlock(enclosingInstance);
					positions = spans.Positions;
				}