// LUCENE-1448 // TODO: instead of testing it this way, we can test // with BaseTokenStreamTestCase now... //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception public virtual void testEndOffsetPositionWithTeeSinkTokenFilter() { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); TokenStream tokenStream = analyzer.tokenStream("field", "abcd "); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream); TokenStream sink = tee.newSinkTokenStream(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; Field f1 = new Field("field", tee, ft); Field f2 = new Field("field", sink, ft); doc.add(f1); doc.add(f2); w.addDocument(doc); w.close(); IndexReader r = DirectoryReader.open(dir); Terms vector = r.getTermVectors(0).terms("field"); assertEquals(1, vector.size()); TermsEnum termsEnum = vector.iterator(null); termsEnum.next(); assertEquals(2, termsEnum.totalTermFreq()); DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null); assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, positions.freq()); positions.nextPosition(); assertEquals(0, positions.startOffset()); assertEquals(4, positions.endOffset()); positions.nextPosition(); assertEquals(8, positions.startOffset()); assertEquals(12, positions.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc()); r.close(); dir.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void addPositions(final org.apache.lucene.index.DocsAndPositionsEnum in, final org.apache.lucene.store.IndexOutput out) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal virtual void addPositions(DocsAndPositionsEnum @in, IndexOutput @out) { int freq = @in.freq(); @out.writeVInt(freq); int previousPosition = 0; int previousEndOffset = 0; for (int i = 0; i < freq; i++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int pos = in.nextPosition(); int pos = @in.nextPosition(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.util.BytesRef payload = in.getPayload(); BytesRef payload = @in.Payload; // The low-order bit of token is set only if there is a payload, the // previous bits are the delta-encoded position. //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1); int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1); @out.writeVInt(token); previousPosition = pos; if (storeOffsets) // don't encode offsets if they are not stored { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int startOffset = in.startOffset(); int startOffset = @in.startOffset(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int endOffset = in.endOffset(); int endOffset = @in.endOffset(); @out.writeVInt(startOffset - previousEndOffset); @out.writeVInt(endOffset - startOffset); previousEndOffset = endOffset; } if (payload != null) { @out.writeVInt(payload.length); @out.writeBytes(payload.bytes, payload.offset, payload.length); } } }
/// <summary> /// Make sure we skip wicked long terms. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException public virtual void testWickedLongTerm() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.fill(chars, 'x'); Document doc = new Document(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String bigTerm = new String(chars); string bigTerm = new string(chars); // This produces a too-long term: string contents = "abc xyz x" + bigTerm + " another term"; doc.add(new TextField("content", contents, Field.Store.NO)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another")); assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new TextField("content", bigTerm, Field.Store.NO)); ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.MaxTokenLength = 100000; writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); }