public void TestPayloads() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir); FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED); myFieldType.StoreTermVectors = (true); myFieldType.StoreTermVectorOffsets = (true); myFieldType.StoreTermVectorPositions = (true); myFieldType.StoreTermVectorPayloads = (true); curOffset = 0; Token[] tokens = new Token[] { getToken("foxes"), getToken("can"), getToken("jump"), getToken("high") }; Document doc = new Document(); doc.Add(new Field("field", new CannedTokenStream(tokens), myFieldType)); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); writer.Dispose(); assertEquals(1, reader.NumDocs); for (int i = 0; i < 2; i++) { // Do this twice, once passing true and then passing // false: they are entirely different code paths // under-the-hood: TokenStream ts = TokenSources.GetTokenStream(reader.GetTermVectors(0).GetTerms("field"), i == 0); ICharTermAttribute termAtt = ts.GetAttribute <ICharTermAttribute>(); IPositionIncrementAttribute posIncAtt = ts.GetAttribute <IPositionIncrementAttribute>(); IOffsetAttribute offsetAtt = ts.GetAttribute <IOffsetAttribute>(); IPayloadAttribute payloadAtt = ts.GetAttribute <IPayloadAttribute>(); foreach (Token token in tokens) { assertTrue(ts.IncrementToken()); assertEquals(token.toString(), termAtt.toString()); assertEquals(token.PositionIncrement, posIncAtt.PositionIncrement); assertEquals(token.Payload, payloadAtt.Payload); assertEquals(token.StartOffset, offsetAtt.StartOffset); assertEquals(token.EndOffset, offsetAtt.EndOffset); } assertFalse(ts.IncrementToken()); } reader.Dispose(); dir.Dispose(); }
public void TestConcurrentSpan() { String TEXT = "the fox jumped"; Directory directory = NewDirectory(); IndexWriter indexWriter = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); try { Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectorOffsets = (true); customType.StoreTermVectorPositions = (true); customType.StoreTermVectors = (true); document.Add(new Field(FIELD, new TokenStreamConcurrent(), customType)); indexWriter.AddDocument(document); } finally { indexWriter.Dispose(); } IndexReader indexReader = DirectoryReader.Open(directory); try { assertEquals(1, indexReader.NumDocs); IndexSearcher indexSearcher = NewSearcher(indexReader); Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "fox")), new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true); FixedBitSet bitset = new FixedBitSet(indexReader.MaxDoc); indexSearcher.Search(phraseQuery, new ConcurrentSpanCollectorAnonymousHelper(this, bitset)); assertEquals(1, bitset.Cardinality()); int maxDoc = indexReader.MaxDoc; Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery)); for (int position = bitset.NextSetBit(0); position >= 0 && position < maxDoc - 1; position = bitset .NextSetBit(position + 1)) { assertEquals(0, position); TokenStream tokenStream = TokenSources.GetTokenStream( indexReader.GetTermVector(position, FIELD), false); assertEquals(highlighter.GetBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.GetBestFragment(tokenStream, TEXT)); } } finally { indexReader.Dispose(); directory.Dispose(); } }
public void TestOverlapWithOffset() { String TEXT = "the fox did not jump"; Directory directory = NewDirectory(); IndexWriter indexWriter = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); try { Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = (true); customType.StoreTermVectorOffsets = (true); document.Add(new Field(FIELD, new OverlappingTokenStream(), customType)); indexWriter.AddDocument(document); } finally { indexWriter.Dispose(); } IndexReader indexReader = DirectoryReader.Open(directory); assertEquals(1, indexReader.NumDocs); IndexSearcher indexSearcher = NewSearcher(indexReader); try { DisjunctionMaxQuery query = new DisjunctionMaxQuery(1); query.Add(new SpanTermQuery(new Term(FIELD, "{fox}"))); query.Add(new SpanTermQuery(new Term(FIELD, "fox"))); // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { // new SpanTermQuery(new Term(FIELD, "{fox}")), // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true); TopDocs hits = indexSearcher.Search(query, 1); assertEquals(1, hits.TotalHits); Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query)); TokenStream tokenStream = TokenSources .GetTokenStream( indexReader.GetTermVector(0, FIELD), false); assertEquals("<B>the fox</B> did not jump", highlighter.GetBestFragment(tokenStream, TEXT)); } finally { indexReader.Dispose(); directory.Dispose(); } }
public void TestSparseSpan() { String TEXT = "the fox did not jump"; Directory directory = NewDirectory(); IndexWriter indexWriter = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false))); try { Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectorOffsets = (true); customType.StoreTermVectorPositions = (true); customType.StoreTermVectors = (true); document.Add(new Field(FIELD, new TokenStreamSparse(), customType)); indexWriter.AddDocument(document); } finally { indexWriter.Dispose(); } IndexReader indexReader = DirectoryReader.Open(directory); try { assertEquals(1, indexReader.NumDocs); IndexSearcher indexSearcher = NewSearcher(indexReader); Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "did")), new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true); TopDocs hits = indexSearcher.Search(phraseQuery, 1); assertEquals(0, hits.TotalHits); Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery)); TokenStream tokenStream = TokenSources .GetTokenStream(indexReader.GetTermVector( 0, FIELD), false); assertEquals( highlighter.GetBestFragment(new TokenStreamSparse(), TEXT), highlighter.GetBestFragment(tokenStream, TEXT)); } finally { indexReader.Dispose(); directory.Dispose(); } }
public void TestTermVectorWithoutOffsetsThrowsException() { Directory directory = NewDirectory(); IndexWriter indexWriter = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); try { Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = (true); customType.StoreTermVectorOffsets = (false); customType.StoreTermVectorPositions = (true); document.Add(new Field(FIELD, new OverlappingTokenStream(), customType)); indexWriter.AddDocument(document); } finally { indexWriter.Dispose(); } IndexReader indexReader = DirectoryReader.Open(directory); try { assertEquals(1, indexReader.NumDocs); TokenSources.GetTokenStream( indexReader.GetTermVector(0, FIELD), false); fail("TokenSources.getTokenStream should throw IllegalArgumentException if term vector has no offsets"); } #pragma warning disable 168 catch (ArgumentException e) #pragma warning restore 168 { // expected } finally { indexReader.Dispose(); directory.Dispose(); } }