Esempio n. 1
0
        // LUCENE-1448
        // TODO: instead of testing it this way, we can test
        // with BaseTokenStreamTestCase now...
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception
        public virtual void testEndOffsetPositionWithTeeSinkTokenFilter()
        {
            Directory          dir         = newDirectory();
            Analyzer           analyzer    = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
            IndexWriter        w           = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document           doc         = new Document();
            TokenStream        tokenStream = analyzer.tokenStream("field", "abcd   ");
            TeeSinkTokenFilter tee         = new TeeSinkTokenFilter(tokenStream);
            TokenStream        sink        = tee.newSinkTokenStream();
            FieldType          ft          = new FieldType(TextField.TYPE_NOT_STORED);

            ft.StoreTermVectors         = true;
            ft.StoreTermVectorOffsets   = true;
            ft.StoreTermVectorPositions = true;
            Field f1 = new Field("field", tee, ft);
            Field f2 = new Field("field", sink, ft);

            doc.add(f1);
            doc.add(f2);
            w.addDocument(doc);
            w.close();

            IndexReader r      = DirectoryReader.open(dir);
            Terms       vector = r.getTermVectors(0).terms("field");

            assertEquals(1, vector.size());
            TermsEnum termsEnum = vector.iterator(null);

            termsEnum.next();
            assertEquals(2, termsEnum.totalTermFreq());
            DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);

            assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(2, positions.freq());
            positions.nextPosition();
            assertEquals(0, positions.startOffset());
            assertEquals(4, positions.endOffset());
            positions.nextPosition();
            assertEquals(8, positions.startOffset());
            assertEquals(12, positions.endOffset());
            assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
            r.close();
            dir.close();
        }
Esempio n. 2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void addPositions(final org.apache.lucene.index.DocsAndPositionsEnum in, final org.apache.lucene.store.IndexOutput out) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
            internal virtual void addPositions(DocsAndPositionsEnum @in, IndexOutput @out)
            {
                int freq = @in.freq();

                @out.writeVInt(freq);
                int previousPosition  = 0;
                int previousEndOffset = 0;

                for (int i = 0; i < freq; i++)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int pos = in.nextPosition();
                    int pos = @in.nextPosition();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.util.BytesRef payload = in.getPayload();
                    BytesRef payload = @in.Payload;
                    // The low-order bit of token is set only if there is a payload, the
                    // previous bits are the delta-encoded position.
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
                    int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
                    @out.writeVInt(token);
                    previousPosition = pos;
                    if (storeOffsets)     // don't encode offsets if they are not stored
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int startOffset = in.startOffset();
                        int startOffset = @in.startOffset();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int endOffset = in.endOffset();
                        int endOffset = @in.endOffset();
                        @out.writeVInt(startOffset - previousEndOffset);
                        @out.writeVInt(endOffset - startOffset);
                        previousEndOffset = endOffset;
                    }
                    if (payload != null)
                    {
                        @out.writeVInt(payload.length);
                        @out.writeBytes(payload.bytes, payload.offset, payload.length);
                    }
                }
            }
Esempio n. 3
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: private void addPositions(final org.apache.lucene.index.DocsAndPositionsEnum in, final org.apache.lucene.store.IndexOutput out) throws java.io.IOException
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 internal virtual void addPositions(DocsAndPositionsEnum @in, IndexOutput @out)
 {
     int freq = @in.freq();
       @out.writeVInt(freq);
       int previousPosition = 0;
       int previousEndOffset = 0;
       for (int i = 0; i < freq; i++)
       {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int pos = in.nextPosition();
     int pos = @in.nextPosition();
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.util.BytesRef payload = in.getPayload();
     BytesRef payload = @in.Payload;
     // The low-order bit of token is set only if there is a payload, the
     // previous bits are the delta-encoded position.
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
     int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
     @out.writeVInt(token);
     previousPosition = pos;
     if (storeOffsets) // don't encode offsets if they are not stored
     {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int startOffset = in.startOffset();
       int startOffset = @in.startOffset();
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int endOffset = in.endOffset();
       int endOffset = @in.endOffset();
       @out.writeVInt(startOffset - previousEndOffset);
       @out.writeVInt(endOffset - startOffset);
       previousEndOffset = endOffset;
     }
     if (payload != null)
     {
       @out.writeVInt(payload.length);
       @out.writeBytes(payload.bytes, payload.offset, payload.length);
     }
       }
 }
Esempio n. 4
0
        /// <summary>
        /// Make sure we skip wicked long terms.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException
        public virtual void testWickedLongTerm()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

            char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
            Arrays.fill(chars, 'x');
            Document doc = new Document();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String bigTerm = new String(chars);
            string bigTerm = new string(chars);

            // This produces a too-long term:
            string contents = "abc xyz x" + bigTerm + " another term";

            doc.add(new TextField("content", contents, Field.Store.NO));
            writer.addDocument(doc);

            // Make sure we can add another normal document
            doc = new Document();
            doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
            writer.addDocument(doc);
            writer.close();

            IndexReader reader = IndexReader.open(dir);

            // Make sure all terms < max size were indexed
            assertEquals(2, reader.docFreq(new Term("content", "abc")));
            assertEquals(1, reader.docFreq(new Term("content", "bbb")));
            assertEquals(1, reader.docFreq(new Term("content", "term")));
            assertEquals(1, reader.docFreq(new Term("content", "another")));

            // Make sure position is still incremented when
            // massive term is skipped:
            DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another"));

            assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(1, tps.freq());
            assertEquals(3, tps.nextPosition());

            // Make sure the doc that has the massive term is in
            // the index:
            assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

            reader.close();

            // Make sure we can add a document with exactly the
            // maximum length term, and search on that term:
            doc = new Document();
            doc.add(new TextField("content", bigTerm, Field.Store.NO));
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            sa.MaxTokenLength = 100000;
            writer            = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
            writer.addDocument(doc);
            writer.close();
            reader = IndexReader.open(dir);
            assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
            reader.close();

            dir.close();
        }