public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) { if (Enclosing_Instance.terms.Count == 0) { // optimize zero-term case return(null); } TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count]; for (int i = 0; i < Enclosing_Instance.terms.Count; i++) { TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]); if (p == null) { return(null); } tps[i] = p; } if (Enclosing_Instance.slop == 0) { // optimize exact case return(new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field))); } else { return(new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field))); } }
public virtual void TestPreAnalyzedField() { IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions.Seek(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.Next()); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); }
public virtual void TestTerms() { TermEnum terms = _reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term; Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.NameValues[term.Field]; Assert.IsTrue(fieldValue.IndexOf(term.Text) != -1); } TermDocs termDocs = _reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TextField1Key, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NoNormsKey, DocHelper.NoNormsText)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = _reader.TermPositions(); positions.Seek(new Term(DocHelper.TextField1Key, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
public override void Load() { TermPositions tp = null; byte[] payloadBuffer = new byte[4]; // four bytes for an int try { tp = _reader.TermPositions(_sizeTerm); if (tp == null) { return; } while (tp.Next()) { if (tp.Freq > 0) { tp.NextPosition(); tp.GetPayload(payloadBuffer, 0); int len = BytesToInt(payloadBuffer); Allocate(tp.Doc, Math.Min(len, _maxItems), true); } } } finally { if (tp != null) { tp.Dispose(); } } }
/// <summary>Returns an enumeration of all the documents which contain /// <code>term</code>. For each document, in addition to the document number /// and frequency of the term in that document, a list of all of the ordinal /// positions of the term in the document is available. Thus, this method /// implements the mapping: /// /// <p><ul> /// Term => <docNum, freq, /// <pos<sub>1</sub>, pos<sub>2</sub>, ... /// pos<sub>freq-1</sub>> /// ><sup>*</sup> /// </ul> /// <p> This positional information faciliates phrase and proximity searching. /// <p>The enumeration is ordered by document number. Each document number is /// greater than all that precede it in the enumeration. /// </summary> public virtual TermPositions TermPositions(Term term) { TermPositions termPositions = TermPositions(); termPositions.Seek(term); return(termPositions); }
internal int[] docMap = null; // maps around deleted docs internal SegmentMergeInfo(int b, TermEnum te, Monodoc.Lucene.Net.Index.IndexReader r) { base_Renamed = b; reader = r; termEnum = te; term = te.Term(); postings = reader.TermPositions(); // build array which maps document numbers around deletions if (reader.HasDeletions()) { int maxDoc = reader.MaxDoc(); docMap = new int[maxDoc]; int j = 0; for (int i = 0; i < maxDoc; i++) { if (reader.IsDeleted(i)) { docMap[i] = -1; } else { docMap[i] = j++; } } } }
public virtual void TestSeek() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new Field(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); TermPositions tp = reader.TermPositions(); tp.Seek(new Term(this.field, "b")); for (int i = 0; i < 10; i++) { tp.Next(); Assert.AreEqual(tp.Doc(), i); Assert.AreEqual(tp.NextPosition(), 1); } tp.Seek(new Term(this.field, "a")); for (int i = 0; i < 10; i++) { tp.Next(); Assert.AreEqual(tp.Doc(), i); Assert.AreEqual(tp.NextPosition(), 0); } }
public virtual void TestTerms() { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(termDocs.Next() == true); termDocs.Seek(new Term(DocHelper.NO_NORMS_KEY, DocHelper.NO_NORMS_TEXT)); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); }
protected virtual void LoadPayload(Term term) { byte[] payloadBuf = null; TermPositions tp = _reader.TermPositions(); tp.Seek(term); while (tp.Next()) { if (tp.Freq > 0) { tp.NextPosition(); if (tp.IsPayloadAvailable) { int len = tp.PayloadLength; payloadBuf = tp.GetPayload(payloadBuf, 0); Add(tp.Doc, payloadBuf, len); } } } // save the last page while (_curSlot < MAX_SLOTS) { _curPage[_curSlot++] = MISSING; } _list[_curPageNo] = CopyPage(new int[_curData]); // optimize the page to make getNumItems work _curPage = null; }
public TermSpans(TermPositions positions, Term term) { this.internalPositions = positions; this.term = term; internalDoc = - 1; }
public virtual void TestTerms() { try { TermEnum terms = reader.Terms(); Assert.IsTrue(terms != null); while (terms.Next() == true) { Term term = terms.Term(); Assert.IsTrue(term != null); //System.out.println("Term: " + term); System.String fieldValue = (System.String)DocHelper.nameValues[term.Field()]; Assert.IsTrue(fieldValue.IndexOf(term.Text()) != -1); } TermDocs termDocs = reader.TermDocs(); Assert.IsTrue(termDocs != null); termDocs.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(termDocs.Next() == true); TermPositions positions = reader.TermPositions(); positions.Seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "Field")); Assert.IsTrue(positions != null); Assert.IsTrue(positions.Doc() == 0); Assert.IsTrue(positions.NextPosition() >= 0); } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
private float freq; //prhase frequency in current doc as computed by phraseFreq(). internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity) { this.norms = norms; this.weight = weight; this.value_Renamed = weight.Value; // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. for (int i = 0; i < tps.Length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]); if (last != null) { // add next to end of list last.next = pp; } else { first = pp; } last = pp; } pq = new PhraseQueue(tps.Length); // construct empty pq first.doc = - 1; }
public virtual void TestPositionIncrementGap() { Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(2, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(502, termPositions.NextPosition()); }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnonymousClassAnalyzer1(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("f1", "a")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(true, termPositions.IsPayloadAvailable()); Assert.AreEqual(6, termPositions.NextPosition()); Assert.AreEqual(false, termPositions.IsPayloadAvailable()); Assert.AreEqual(7, termPositions.NextPosition()); Assert.AreEqual(false, termPositions.IsPayloadAvailable()); }
public IHttpActionResult Post([FromBody]AddNewPositionModel model) { var term = termManager.GetById(model.termId); ClientTermViewModel result = null; var product = productManager.GetById(model.productId); var user = userManager.GetByLogin(model.Login); if (user != null && user.Token == model.Token && product != null && term != null) { var newPosition = new Positions() { Amount = 1, //TODO Description = product.Name, ProductId = product.Id, Price = product.Price, OrderId = term.OrderId, }; positionsManager.AddEntity(newPosition); var newTermPosition = new TermPositions() { TermId = term.Id, Amount = 1, //TODO Positions = newPosition, TermPositionMaterialRsps = new List<TermPositionMaterialRsp>() }; term.TermPositions.Add(newTermPosition); //add linked material to position foreach (var material in product.ProductMaterialRsps.Where(o => !o.DeleteDate.HasValue)) { newTermPosition.TermPositionMaterialRsps.Add(new TermPositionMaterialRsp() { Amount = material.Amount, MaterialId = material.MaterialId, TermPositions = newTermPosition }); } positionsManager.SaveChanges(); if (term != null) { result = TermViewModelHelper.ToModel(term, true, false); } return Ok(result); } return BadRequest(); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> private int AppendPostings(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term ResetSkip(); for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.postings; int base_Renamed = smi.base_Renamed; int[] docMap = smi.docMap; postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < lastDoc) { throw new System.SystemException("docs out of order"); } df++; if ((df % skipInterval) == 0) { BufferSkip(lastDoc); } int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 lastDoc = doc; int freq = postings.Freq(); if (freq == 1) { freqOutput.WriteVInt(docCode | 1); // write doc & freq=1 } else { freqOutput.WriteVInt(docCode); // write doc freqOutput.WriteVInt(freq); // write frequency in doc } int lastPosition = 0; // write position deltas for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); proxOutput.WriteVInt(position - lastPosition); lastPosition = position; } } } return(df); }
public IHttpActionResult Post([FromBody] AddNewPositionModel model) { var term = termManager.GetById(model.termId); ClientTermViewModel result = null; var product = productManager.GetById(model.productId); var user = userManager.GetByLogin(model.Login); if (user != null && user.Token == model.Token && product != null && term != null) { var newPosition = new Positions() { Amount = 1, //TODO Description = product.Name, ProductId = product.Id, Price = product.Price, OrderId = term.OrderId, }; positionsManager.AddEntity(newPosition); var newTermPosition = new TermPositions() { TermId = term.Id, Amount = 1, //TODO Positions = newPosition, TermPositionMaterialRsps = new List <TermPositionMaterialRsp>() }; term.TermPositions.Add(newTermPosition); //add linked material to position foreach (var material in product.ProductMaterialRsps.Where(o => !o.DeleteDate.HasValue)) { newTermPosition.TermPositionMaterialRsps.Add(new TermPositionMaterialRsp() { Amount = material.Amount, MaterialId = material.MaterialId, TermPositions = newTermPosition }); } positionsManager.SaveChanges(); if (term != null) { result = TermViewModelHelper.ToModel(term, true, false); } return(Ok(result)); } return(BadRequest()); }
internal TermPositions GetPositions() { if (postings == null) { postings = reader.TermPositions(); } return(postings); }
internal TermPositions GetPositions(IState state) { if (postings == null) { postings = reader.TermPositions(state); } return(postings); }
internal TermPositions GetPositions() { if (postings == null) { postings = reader.TermPositions(); } return postings; }
/// <summary>Returns an enumeration of all the documents which contain /// <code>term</code>. For each document, in addition to the document number /// and frequency of the term in that document, a list of all of the ordinal /// positions of the term in the document is available. Thus, this method /// implements the mapping: /// /// <p><ul> /// Term => <docNum, freq, /// <pos<sub>1</sub>, pos<sub>2</sub>, ... /// pos<sub>freq-1</sub>> /// ><sup>*</sup> /// </ul> /// <p> This positional information facilitates phrase and proximity searching. /// <p>The enumeration is ordered by document number. Each document number is /// greater than all that precede it in the enumeration. /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual TermPositions TermPositions(Term term) { EnsureOpen(); TermPositions termPositions = TermPositions(); termPositions.Seek(term); return(termPositions); }
public virtual void TestFilterIndexReader_Renamed() { RAMDirectory directory = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Document d1 = new Document(); d1.Add(new Field("default", "one two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(new Field("default", "one three", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(new Field("default", "two four", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); Assert.IsTrue(reader.IsOptimized()); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf('e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } int NUM_DOCS = 3; TermDocs td = reader.TermDocs(null); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(td.Next()); Assert.AreEqual(i, td.Doc()); Assert.AreEqual(1, td.Freq()); } td.Close(); reader.Close(); directory.Close(); }
public virtual void CheckSkipTo(TermPositions tp, int target, int maxCounter) { tp.SkipTo(target); Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter); Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq); tp.NextPosition(); byte[] b = new byte[1]; tp.GetPayload(b, 0); Assert.AreEqual((byte) target, b[0], "Wrong payload for the target " + target + ": " + b[0]); }
public virtual void CheckSkipTo(TermPositions tp, int target, int maxCounter) { tp.SkipTo(target); Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter); Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq); tp.NextPosition(); byte[] b = new byte[1]; tp.GetPayload(b, 0); Assert.AreEqual((byte)target, b[0], "Wrong payload for the target " + target + ": " + b[0]); }
/// <summary>Process postings from multiple segments all positioned on the /// same term. Writes out merged entries into freqOutput and /// the proxOutput streams. /// /// </summary> /// <param name="smis">array of segments /// </param> /// <param name="n">number of cells in the array actually occupied /// </param> /// <returns> number of documents across all segments where this term was found /// </returns> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) { FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text); int df = 0; for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { df++; int doc = postings.Doc; if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space int freq = postings.Freq; FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); if (!omitTermFreqAndPositions) { for (int j = 0; j < freq; j++) { int position = postings.NextPosition(); int payloadLength = postings.PayloadLength; if (payloadLength > 0) { if (payloadBuffer == null || payloadBuffer.Length < payloadLength) { payloadBuffer = new byte[payloadLength]; } postings.GetPayload(payloadBuffer, 0); } posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); } posConsumer.Finish(); } } } docConsumer.Finish(); return(df); }
internal TermPositionsQueue(System.Collections.IList termPositions) { Initialize(termPositions.Count); System.Collections.IEnumerator i = termPositions.GetEnumerator(); while (i.MoveNext()) { TermPositions tp = (TermPositions)i.Current; if (tp.Next()) { Put(tp); } } }
public bool SkipTo(int target, IState state) { while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc) { TermPositions tp = _termPositionsQueue.Pop(); if (tp.SkipTo(target, state)) { _termPositionsQueue.Add(tp); } else { tp.Close(); } } return(Next(state)); }
public bool SkipTo(int target) { while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc()) { TermPositions tp = (TermPositions)_termPositionsQueue.Pop(); if (tp.SkipTo(target)) { _termPositionsQueue.Put(tp); } else { tp.Close(); } } return(Next()); }
/// <summary> /// Process postings from multiple segments without tf, all positioned on the same term. /// Writes out merged entries only into freqOutput, proxOut is not written. /// </summary> /// <param name="smis">smis array of segments</param> /// <param name="n">number of cells in the array actually occupied</param> /// <returns></returns> private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n) { int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.ResetSkip(); int lastPayloadLength = -1; // ensures that we write the first length for (int i = 0; i < n; i++) { SegmentMergeInfo smi = smis[i]; TermPositions postings = smi.GetPositions(); System.Diagnostics.Debug.Assert(postings != null); int base_Renamed = smi.base_Renamed; int[] docMap = smi.GetDocMap(); postings.Seek(smi.termEnum); while (postings.Next()) { int doc = postings.Doc(); if (docMap != null) { doc = docMap[doc]; // map around deletions } doc += base_Renamed; // convert to merged space if (doc < 0 || (df > 0 && doc <= lastDoc)) { throw new CorruptIndexException("docs out of order (" + doc + " <= " + lastDoc + " )"); } df++; if ((df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength); skipListWriter.BufferSkip(df); } int docCode = (doc - lastDoc); lastDoc = doc; freqOutput.WriteVInt(docCode); // write doc & freq=1 } } return(df); }
public virtual void TestThreadSafety() { rnd = NewRandom(); int numThreads = 5; int numDocs = 50; ByteArrayPool pool = new ByteArrayPool(numThreads, 5); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); System.String field = "test"; ThreadClass[] ingesters = new ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this); ingesters[i].Start(); } for (int i = 0; i < numThreads; i++) { ingesters[i].Join(); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); TermEnum terms = reader.Terms(null); while (terms.Next(null)) { TermPositions tp = reader.TermPositions(terms.Term, null); while (tp.Next(null)) { int freq = tp.Freq; for (int i = 0; i < freq; i++) { tp.NextPosition(null); Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0, null)), terms.Term.Text); } } tp.Close(); } terms.Close(); reader.Close(); Assert.AreEqual(pool.Size(), numThreads); }
private void PrintSegment(System.IO.StringWriter out_Renamed, System.String segment) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
internal static void PrintSegment(System.String segment) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader reader = new SegmentReader(new SegmentInfo(segment, 1, directory)); for (int i = 0; i < reader.NumDocs(); i++) { System.Console.Out.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { System.Console.Out.Write(tis.Term()); System.Console.Out.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { System.Console.Out.Write(" doc=" + positions.Doc()); System.Console.Out.Write(" TF=" + positions.Freq()); System.Console.Out.Write(" pos="); System.Console.Out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { System.Console.Out.Write("," + positions.NextPosition()); } System.Console.Out.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); directory.Close(); }
public virtual void TestFilterIndexReader_() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); Document d1 = new Document(); d1.Add(Field.Text("default", "one two")); writer.AddDocument(d1); Document d2 = new Document(); d2.Add(Field.Text("default", "one three")); writer.AddDocument(d2); Document d3 = new Document(); d3.Add(Field.Text("default", "two four")); writer.AddDocument(d3); writer.Close(); IndexReader reader = new TestReader(IndexReader.Open(directory)); TermEnum terms = reader.Terms(); while (terms.Next()) { Assert.IsTrue(terms.Term().Text().IndexOf((System.Char) 'e') != -1); } terms.Close(); TermPositions positions = reader.TermPositions(new Term("default", "one")); while (positions.Next()) { Assert.IsTrue((positions.Doc() % 2) == 1); } reader.Close(); }
public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) { if (Enclosing_Instance.termArrays.Count == 0) { // optimize zero-term case return(null); } TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count]; for (int i = 0; i < tps.Length; i++) { Term[] terms = Enclosing_Instance.termArrays[i]; TermPositions p; if (terms.Length > 1) { p = new MultipleTermPositions(reader, terms); } else { p = reader.TermPositions(terms[0]); } if (p == null) { return(null); } tps[i] = p; } if (Enclosing_Instance.slop == 0) { return(new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field))); } else { return(new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field))); } }
private void PrintSegment(System.IO.StreamWriter out_Renamed, SegmentInfo si) { SegmentReader reader = SegmentReader.Get(si); for (int i = 0; i < reader.NumDocs(); i++) { out_Renamed.WriteLine(reader.Document(i)); } TermEnum tis = reader.Terms(); while (tis.Next()) { out_Renamed.Write(tis.Term()); out_Renamed.WriteLine(" DF=" + tis.DocFreq()); TermPositions positions = reader.TermPositions(tis.Term()); try { while (positions.Next()) { out_Renamed.Write(" doc=" + positions.Doc()); out_Renamed.Write(" TF=" + positions.Freq()); out_Renamed.Write(" pos="); out_Renamed.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { out_Renamed.Write("," + positions.NextPosition()); } out_Renamed.WriteLine(""); } } finally { positions.Close(); } } tis.Close(); reader.Close(); }
internal PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) : base(similarity) { this.norms = norms; this.weight = weight; this.value_Renamed = weight.GetValue(); // convert tps to a list for (int i = 0; i < tps.Length; i++) { PhrasePositions pp = new PhrasePositions(tps[i], positions[i]); if (last != null) { // add next to end of list last.next = pp; } else first = pp; last = pp; } pq = new PhraseQueue(tps.Length); // construct empty pq }
internal int[] docMap = null; // maps around deleted docs internal SegmentMergeInfo(int b, TermEnum te, Monodoc.Lucene.Net.Index.IndexReader r) { base_Renamed = b; reader = r; termEnum = te; term = te.Term(); postings = reader.TermPositions(); // build array which maps document numbers around deletions if (reader.HasDeletions()) { int maxDoc = reader.MaxDoc(); docMap = new int[maxDoc]; int j = 0; for (int i = 0; i < maxDoc; i++) { if (reader.IsDeleted(i)) docMap[i] = - 1; else docMap[i] = j++; } } }
public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) { if (Enclosing_Instance.terms.Count == 0) // optimize zero-term case return null; TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count]; for (int i = 0; i < Enclosing_Instance.terms.Count; i++) { TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]); if (p == null) return null; tps[i] = p; } if (Enclosing_Instance.slop == 0) // optimize exact case return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)); else return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)); }
internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, int slop, byte[] norms):base(weight, tps, offsets, similarity, norms) { this.slop = slop; }
internal ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity, byte[] norms) : base(weight, tps, positions, similarity, norms) { }
public TestTermPositions(TermPositions in_Renamed):base(in_Renamed) { }
public AbstractTerminalNode(Term term, IndexReader reader) { _tp = reader.TermPositions(); _tp.Seek(term); _posLeft = 0; }
public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) { if (Enclosing_Instance.termArrays.Count == 0) // optimize zero-term case return null; TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count]; for (int i = 0; i < tps.Length; i++) { Term[] terms = Enclosing_Instance.termArrays[i]; TermPositions p; if (terms.Length > 1) p = new MultipleTermPositions(reader, terms); else p = reader.TermPositions(terms[0]); if (p == null) return null; tps[i] = p; } if (Enclosing_Instance.slop == 0) return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)); else return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)); }
public FilterTermPositions(TermPositions in_Renamed):base(in_Renamed) { }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); // should be in sync with value in TermInfosWriter int skipInterval = 16; int numTerms = 5; System.String fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].text_ForNUnit); sb.Append(" "); } System.String content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; byte[] payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d); } // make sure we create more than one segment to test merging writer.Flush(); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d); } writer.Optimize(); // flush writer.Close(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = IndexReader.Open(dir); byte[] verifyPayloadData = new byte[payloadDataLength]; offset = 0; TermPositions[] tps = new TermPositions[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = reader.TermPositions(terms[i]); } while (tps[0].Next()) { for (int i = 1; i < numTerms; i++) { tps[i].Next(); } int freq = tps[0].Freq(); for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(); tps[j].GetPayload(verifyPayloadData, offset); offset += tps[j].GetPayloadLength(); } } } for (int i = 0; i < numTerms; i++) { tps[i].Close(); } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ TermPositions tp = reader.TermPositions(terms[0]); tp.Next(); tp.NextPosition(); // now we don't read this payload tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); byte[] payload = tp.GetPayload(null, 0); Assert.AreEqual(payload[0], payloadData[numTerms]); tp.NextPosition(); // we don't read this payload and skip to a different document tp.SkipTo(5); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); payload = tp.GetPayload(null, 0); Assert.AreEqual(payload[0], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp.Seek(terms[1]); tp.Next(); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); tp.SkipTo(skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); tp.SkipTo(2 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); tp.SkipTo(3 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayloadLength(), "Wrong payload length."); /* * Test multiple call of getPayload() */ tp.GetPayload(null, 0); try { // it is forbidden to call getPayload() more than once // without calling nextPosition() tp.GetPayload(null, 0); Assert.Fail("Expected exception not thrown"); } catch (System.Exception expected) { // expected exception } reader.Close(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); System.String singleTerm = "lucene"; d = new Document(); d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d); writer.Optimize(); // flush writer.Close(); reader = IndexReader.Open(dir); tp = reader.TermPositions(new Term(fieldName, singleTerm)); tp.Next(); tp.NextPosition(); verifyPayloadData = new byte[tp.GetPayloadLength()]; tp.GetPayload(verifyPayloadData, 0); byte[] portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, verifyPayloadData); reader.Close(); }
internal PhrasePositions next; // used to make lists internal PhrasePositions(TermPositions t, int o) { tp = t; offset = o; }
public PayloadTermSpanScorer(PayloadTermWeight enclosingInstance, TermSpans spans, Weight weight, Similarity similarity, byte[] norms):base(spans, weight, similarity, norms) { InitBlock(enclosingInstance); positions = spans.Positions; }