internal bool SkipTo(int target) { if (!tp.SkipTo(target)) { tp.Close(); // close stream doc = System.Int32.MaxValue; // sentinel value return(false); } doc = tp.Doc; position = 0; return(true); }
public virtual void CheckSkipTo(TermPositions tp, int target, int maxCounter) { tp.SkipTo(target); Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter); Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq); tp.NextPosition(); byte[] b = new byte[1]; tp.GetPayload(b, 0); Assert.AreEqual((byte) target, b[0], "Wrong payload for the target " + target + ": " + b[0]); }
public virtual void CheckSkipTo(TermPositions tp, int target, int maxCounter) { tp.SkipTo(target); Assert.Greater(maxCounter, counter, "Too many bytes read: " + counter); Assert.AreEqual(target, tp.Doc, "Wrong document " + tp.Doc + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq); tp.NextPosition(); byte[] b = new byte[1]; tp.GetPayload(b, 0); Assert.AreEqual((byte)target, b[0], "Wrong payload for the target " + target + ": " + b[0]); }
public bool SkipTo(int target) { while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc()) { TermPositions tp = (TermPositions)_termPositionsQueue.Pop(); if (tp.SkipTo(target)) { _termPositionsQueue.Put(tp); } else { tp.Close(); } } return(Next()); }
public bool SkipTo(int target, IState state) { while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc) { TermPositions tp = _termPositionsQueue.Pop(); if (tp.SkipTo(target, state)) { _termPositionsQueue.Add(tp); } else { tp.Close(); } } return(Next(state)); }
public override bool SkipTo(int target) { if (!internalPositions.SkipTo(target)) { internalDoc = int.MaxValue; return(false); } internalDoc = internalPositions.Doc; freq = internalPositions.Freq; count = 0; position = internalPositions.NextPosition(); count++; return(true); }
public override int FetchDoc(int targetDoc) { if (targetDoc <= _curDoc) { targetDoc = _curDoc + 1; } if (_tp.SkipTo(targetDoc)) { _curDoc = _tp.Doc; _posLeft = _tp.Freq; _curSec = -1; _curPos = -1; return(_curDoc); } else { _curDoc = DocIdSetIterator.NO_MORE_DOCS; _tp.Dispose(); return(_curDoc); } }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); // should be in sync with value in TermInfosWriter int skipInterval = 16; int numTerms = 5; System.String fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text); sb.Append(" "); } System.String content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; byte[] payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, null); } // make sure we create more than one segment to test merging writer.Commit(null); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, null); } writer.Optimize(null); // flush writer.Close(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = IndexReader.Open(dir, true, null); byte[] verifyPayloadData = new byte[payloadDataLength]; offset = 0; TermPositions[] tps = new TermPositions[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = reader.TermPositions(terms[i], null); } while (tps[0].Next(null)) { for (int i = 1; i < numTerms; i++) { tps[i].Next(null); } int freq = tps[0].Freq; for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(null); tps[j].GetPayload(verifyPayloadData, offset, null); offset += tps[j].PayloadLength; } } } for (int i = 0; i < numTerms; i++) { tps[i].Close(); } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ TermPositions tp = reader.TermPositions(terms[0], null); tp.Next(null); tp.NextPosition(null); // now we don't read this payload tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); byte[] payload = tp.GetPayload(null, 0, null); Assert.AreEqual(payload[0], payloadData[numTerms]); tp.NextPosition(null); // we don't read this payload and skip to a different document tp.SkipTo(5, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); payload = tp.GetPayload(null, 0, null); Assert.AreEqual(payload[0], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp.Seek(terms[1], null); tp.Next(null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(2 * skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(3 * skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length."); /* * Test multiple call of getPayload() */ tp.GetPayload(null, 0, null); // it is forbidden to call getPayload() more than once // without calling nextPosition() Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown"); reader.Close(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); System.String singleTerm = "lucene"; d = new Document(); d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d, null); writer.Optimize(null); // flush writer.Close(); reader = IndexReader.Open(dir, true, null); tp = reader.TermPositions(new Term(fieldName, singleTerm), null); tp.Next(null); tp.NextPosition(null); verifyPayloadData = new byte[tp.PayloadLength]; tp.GetPayload(verifyPayloadData, 0, null); byte[] portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, verifyPayloadData); reader.Close(); }
void BeginAsyncReconstruction(int docNum, Document document, Hashtable doc) { // get stored fields ArrayList sf = new ArrayList(); for (int i = 0; i < _indexFields.Length; i++) { Field[] f = document.GetFields(_indexFields[i]); if (f == null || f.Length == 0 || !f[0].IsStored()) { continue; } StringBuilder sb = new StringBuilder(); for (int k = 0; k < f.Length; k++) { if (k > 0) { sb.Append('\n'); } sb.Append(f[k].StringValue()); } Field field = Legacy.CreateField(_indexFields[i], sb.ToString(), f[0].IsStored(), f[0].IsIndexed(), f[0].IsTokenized(), f[0].IsTermVectorStored()); field.SetBoost(f[0].GetBoost()); doc[_indexFields[i]] = field; sf.Add(_indexFields[i]); } String term = null; GrowableStringArray terms = null; try { int i = 0; int delta = (int)Math.Ceiling(((double)_numTerms / 100)); TermEnum te = _luke.IndexReader.Terms(); TermPositions tp = _luke.IndexReader.TermPositions(); while (te.Next()) { if ((i++ % delta) == 0) { // update UI - async UpdateProgress(i / delta); } // skip stored fields if (sf.Contains(te.Term().Field())) { continue; } tp.Seek(te.Term()); if (!tp.SkipTo(docNum) || tp.Doc() != docNum) { // this term is not found in the doc continue; } term = te.Term().Text(); terms = (GrowableStringArray)doc[te.Term().Field()]; if (terms == null) { terms = new GrowableStringArray(); doc[te.Term().Field()] = terms; } for (int k = 0; k < tp.Freq(); k++) { int pos = tp.NextPosition(); terms.Set(pos, term); } } } catch (Exception exc) { // Update UI - async _luke.ShowStatus(exc.Message); } }