Hides implementation issues associated with obtaining a TokenStream for use with the higlighter - can obtain from TermFreqVectors with offsets and (optionally) positions or from Analyzer class reparsing the stored content.
示例#1
0
        public void TestPayloads()
        {
            Directory         dir         = NewDirectory();
            RandomIndexWriter writer      = new RandomIndexWriter(Random, dir);
            FieldType         myFieldType = new FieldType(TextField.TYPE_NOT_STORED);

            myFieldType.StoreTermVectors         = (true);
            myFieldType.StoreTermVectorOffsets   = (true);
            myFieldType.StoreTermVectorPositions = (true);
            myFieldType.StoreTermVectorPayloads  = (true);

            curOffset = 0;

            Token[] tokens = new Token[] {
                getToken("foxes"),
                getToken("can"),
                getToken("jump"),
                getToken("high")
            };

            Document doc = new Document();

            doc.Add(new Field("field", new CannedTokenStream(tokens), myFieldType));
            writer.AddDocument(doc);

            IndexReader reader = writer.GetReader();

            writer.Dispose();
            assertEquals(1, reader.NumDocs);

            for (int i = 0; i < 2; i++)
            {
                // Do this twice, once passing true and then passing
                // false: they are entirely different code paths
                // under-the-hood:
                TokenStream ts = TokenSources.GetTokenStream(reader.GetTermVectors(0).GetTerms("field"), i == 0);

                ICharTermAttribute          termAtt    = ts.GetAttribute <ICharTermAttribute>();
                IPositionIncrementAttribute posIncAtt  = ts.GetAttribute <IPositionIncrementAttribute>();
                IOffsetAttribute            offsetAtt  = ts.GetAttribute <IOffsetAttribute>();
                IPayloadAttribute           payloadAtt = ts.GetAttribute <IPayloadAttribute>();

                foreach (Token token in tokens)
                {
                    assertTrue(ts.IncrementToken());
                    assertEquals(token.toString(), termAtt.toString());
                    assertEquals(token.PositionIncrement, posIncAtt.PositionIncrement);
                    assertEquals(token.Payload, payloadAtt.Payload);
                    assertEquals(token.StartOffset, offsetAtt.StartOffset);
                    assertEquals(token.EndOffset, offsetAtt.EndOffset);
                }

                assertFalse(ts.IncrementToken());
            }

            reader.Dispose();
            dir.Dispose();
        }
示例#2
0
        public void TestConcurrentSpan()
        {
            String      TEXT        = "the fox jumped";
            Directory   directory   = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(directory,
                                                      NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            try
            {
                Document document = new Document();

                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectorOffsets   = (true);
                customType.StoreTermVectorPositions = (true);
                customType.StoreTermVectors         = (true);
                document.Add(new Field(FIELD, new TokenStreamConcurrent(), customType));
                indexWriter.AddDocument(document);
            }
            finally
            {
                indexWriter.Dispose();
            }
            IndexReader indexReader = DirectoryReader.Open(directory);

            try
            {
                assertEquals(1, indexReader.NumDocs);
                IndexSearcher indexSearcher = NewSearcher(indexReader);
                Query         phraseQuery   = new SpanNearQuery(new SpanQuery[] {
                    new SpanTermQuery(new Term(FIELD, "fox")),
                    new SpanTermQuery(new Term(FIELD, "jumped"))
                }, 0, true);
                FixedBitSet bitset = new FixedBitSet(indexReader.MaxDoc);
                indexSearcher.Search(phraseQuery, new ConcurrentSpanCollectorAnonymousHelper(this, bitset));

                assertEquals(1, bitset.Cardinality());
                int         maxDoc      = indexReader.MaxDoc;
                Highlighter highlighter = new Highlighter(
                    new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
                    new QueryScorer(phraseQuery));
                for (int position = bitset.NextSetBit(0); position >= 0 && position < maxDoc - 1; position = bitset
                                                                                                             .NextSetBit(position + 1))
                {
                    assertEquals(0, position);
                    TokenStream tokenStream = TokenSources.GetTokenStream(
                        indexReader.GetTermVector(position,
                                                  FIELD), false);
                    assertEquals(highlighter.GetBestFragment(new TokenStreamConcurrent(),
                                                             TEXT), highlighter.GetBestFragment(tokenStream, TEXT));
                }
            }
            finally
            {
                indexReader.Dispose();
                directory.Dispose();
            }
        }
示例#3
0
        public void TestOverlapWithOffset()
        {
            String      TEXT        = "the fox did not jump";
            Directory   directory   = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(directory,
                                                      NewIndexWriterConfig(TEST_VERSION_CURRENT, null));

            try
            {
                Document  document   = new Document();
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors       = (true);
                customType.StoreTermVectorOffsets = (true);
                document.Add(new Field(FIELD, new OverlappingTokenStream(), customType));
                indexWriter.AddDocument(document);
            }
            finally
            {
                indexWriter.Dispose();
            }
            IndexReader indexReader = DirectoryReader.Open(directory);

            assertEquals(1, indexReader.NumDocs);
            IndexSearcher indexSearcher = NewSearcher(indexReader);

            try
            {
                DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
                query.Add(new SpanTermQuery(new Term(FIELD, "{fox}")));
                query.Add(new SpanTermQuery(new Term(FIELD, "fox")));
                // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
                // new SpanTermQuery(new Term(FIELD, "{fox}")),
                // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);

                TopDocs hits = indexSearcher.Search(query, 1);
                assertEquals(1, hits.TotalHits);
                Highlighter highlighter = new Highlighter(
                    new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
                    new QueryScorer(query));
                TokenStream tokenStream = TokenSources
                                          .GetTokenStream(
                    indexReader.GetTermVector(0, FIELD),
                    false);
                assertEquals("<B>the fox</B> did not jump",
                             highlighter.GetBestFragment(tokenStream, TEXT));
            }
            finally
            {
                indexReader.Dispose();
                directory.Dispose();
            }
        }
示例#4
0
        public void TestSparseSpan()
        {
            String      TEXT        = "the fox did not jump";
            Directory   directory   = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(directory,
                                                      NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false)));

            try
            {
                Document  document   = new Document();
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectorOffsets   = (true);
                customType.StoreTermVectorPositions = (true);
                customType.StoreTermVectors         = (true);
                document.Add(new Field(FIELD, new TokenStreamSparse(), customType));
                indexWriter.AddDocument(document);
            }
            finally
            {
                indexWriter.Dispose();
            }
            IndexReader indexReader = DirectoryReader.Open(directory);

            try
            {
                assertEquals(1, indexReader.NumDocs);
                IndexSearcher indexSearcher = NewSearcher(indexReader);
                Query         phraseQuery   = new SpanNearQuery(new SpanQuery[] {
                    new SpanTermQuery(new Term(FIELD, "did")),
                    new SpanTermQuery(new Term(FIELD, "jump"))
                }, 0, true);

                TopDocs hits = indexSearcher.Search(phraseQuery, 1);
                assertEquals(0, hits.TotalHits);
                Highlighter highlighter = new Highlighter(
                    new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
                    new QueryScorer(phraseQuery));
                TokenStream tokenStream = TokenSources
                                          .GetTokenStream(indexReader.GetTermVector(
                                                              0, FIELD), false);
                assertEquals(
                    highlighter.GetBestFragment(new TokenStreamSparse(), TEXT),
                    highlighter.GetBestFragment(tokenStream, TEXT));
            }
            finally
            {
                indexReader.Dispose();
                directory.Dispose();
            }
        }
示例#5
0
        public void TestTermVectorWithoutOffsetsThrowsException()
        {
            Directory   directory   = NewDirectory();
            IndexWriter indexWriter = new IndexWriter(directory,
                                                      NewIndexWriterConfig(TEST_VERSION_CURRENT, null));

            try
            {
                Document  document   = new Document();
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors         = (true);
                customType.StoreTermVectorOffsets   = (false);
                customType.StoreTermVectorPositions = (true);
                document.Add(new Field(FIELD, new OverlappingTokenStream(), customType));
                indexWriter.AddDocument(document);
            }
            finally
            {
                indexWriter.Dispose();
            }
            IndexReader indexReader = DirectoryReader.Open(directory);

            try
            {
                assertEquals(1, indexReader.NumDocs);
                TokenSources.GetTokenStream(
                    indexReader.GetTermVector(0, FIELD),
                    false);
                fail("TokenSources.getTokenStream should throw IllegalArgumentException if term vector has no offsets");
            }
#pragma warning disable 168
            catch (ArgumentException e)
#pragma warning restore 168
            {
                // expected
            }
            finally
            {
                indexReader.Dispose();
                directory.Dispose();
            }
        }