Exemple #1
0
        // LUCENE-1448
        // TODO: instead of testing it this way, we can test
        // with BaseTokenStreamTestCase now...
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception
        public virtual void testEndOffsetPositionWithTeeSinkTokenFilter()
        {
            Directory          dir         = newDirectory();
            Analyzer           analyzer    = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
            IndexWriter        w           = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document           doc         = new Document();
            TokenStream        tokenStream = analyzer.tokenStream("field", "abcd   ");
            TeeSinkTokenFilter tee         = new TeeSinkTokenFilter(tokenStream);
            TokenStream        sink        = tee.newSinkTokenStream();
            FieldType          ft          = new FieldType(TextField.TYPE_NOT_STORED);

            ft.StoreTermVectors         = true;
            ft.StoreTermVectorOffsets   = true;
            ft.StoreTermVectorPositions = true;
            Field f1 = new Field("field", tee, ft);
            Field f2 = new Field("field", sink, ft);

            doc.add(f1);
            doc.add(f2);
            w.addDocument(doc);
            w.close();

            IndexReader r      = DirectoryReader.open(dir);
            Terms       vector = r.getTermVectors(0).terms("field");

            assertEquals(1, vector.size());
            TermsEnum termsEnum = vector.iterator(null);

            termsEnum.next();
            assertEquals(2, termsEnum.totalTermFreq());
            DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);

            assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(2, positions.freq());
            positions.nextPosition();
            assertEquals(0, positions.startOffset());
            assertEquals(4, positions.endOffset());
            positions.nextPosition();
            assertEquals(8, positions.startOffset());
            assertEquals(12, positions.endOffset());
            assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
            r.close();
            dir.close();
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final org.apache.lucene.index.DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
            internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets) : base(@in)
            {
                this.maxDoc       = maxDoc;
                this.storeOffsets = storeOffsets;
                if (reuse != null)
                {
                    docs    = reuse.docs;
                    offsets = reuse.offsets;
                    payload = reuse.payload;
                    file    = reuse.file;
                    if (reuse.maxDoc == maxDoc)
                    {
                        sorter = reuse.sorter;
                    }
                    else
                    {
                        sorter = new DocOffsetSorter(maxDoc);
                    }
                }
                else
                {
                    docs    = new int[32];
                    offsets = new long[32];
                    payload = new BytesRef(32);
                    file    = new RAMFile();
                    sorter  = new DocOffsetSorter(maxDoc);
                }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.store.IndexOutput out = new org.apache.lucene.store.RAMOutputStream(file);
                IndexOutput @out = new RAMOutputStream(file);
                int         doc;
                int         i = 0;

                while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (i == docs.Length)
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int newLength = org.apache.lucene.util.ArrayUtil.oversize(i + 1, 4);
                        int newLength = ArrayUtil.oversize(i + 1, 4);
                        docs    = Arrays.copyOf(docs, newLength);
                        offsets = Arrays.copyOf(offsets, newLength);
                    }
                    docs[i]    = docMap.oldToNew(doc);
                    offsets[i] = @out.FilePointer;
                    addPositions(@in, @out);
                    i++;
                }
                upto = i;
                sorter.reset(docs, offsets);
                sorter.sort(0, upto);
                @out.close();
                this.postingInput = new RAMInputStream("", file);
            }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final org.apache.lucene.index.DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws java.io.IOException
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets)
     : base(@in)
 {
     this.maxDoc = maxDoc;
       this.storeOffsets = storeOffsets;
       if (reuse != null)
       {
     docs = reuse.docs;
     offsets = reuse.offsets;
     payload = reuse.payload;
     file = reuse.file;
     if (reuse.maxDoc == maxDoc)
     {
       sorter = reuse.sorter;
     }
     else
     {
       sorter = new DocOffsetSorter(maxDoc);
     }
       }
       else
       {
     docs = new int[32];
     offsets = new long[32];
     payload = new BytesRef(32);
     file = new RAMFile();
     sorter = new DocOffsetSorter(maxDoc);
       }
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.store.IndexOutput out = new org.apache.lucene.store.RAMOutputStream(file);
       IndexOutput @out = new RAMOutputStream(file);
       int doc;
       int i = 0;
       while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
       {
     if (i == docs.Length)
     {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int newLength = org.apache.lucene.util.ArrayUtil.oversize(i + 1, 4);
       int newLength = ArrayUtil.oversize(i + 1, 4);
       docs = Arrays.copyOf(docs, newLength);
       offsets = Arrays.copyOf(offsets, newLength);
     }
     docs[i] = docMap.oldToNew(doc);
     offsets[i] = @out.FilePointer;
     addPositions(@in, @out);
     i++;
       }
       upto = i;
       sorter.reset(docs, offsets);
       sorter.sort(0, upto);
       @out.close();
       this.postingInput = new RAMInputStream("", file);
 }
Exemple #4
0
        /// <summary>
        /// Make sure we skip wicked long terms.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException
        public virtual void testWickedLongTerm()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

            char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
            Arrays.fill(chars, 'x');
            Document doc = new Document();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String bigTerm = new String(chars);
            string bigTerm = new string(chars);

            // This produces a too-long term:
            string contents = "abc xyz x" + bigTerm + " another term";

            doc.add(new TextField("content", contents, Field.Store.NO));
            writer.addDocument(doc);

            // Make sure we can add another normal document
            doc = new Document();
            doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
            writer.addDocument(doc);
            writer.close();

            IndexReader reader = IndexReader.open(dir);

            // Make sure all terms < max size were indexed
            assertEquals(2, reader.docFreq(new Term("content", "abc")));
            assertEquals(1, reader.docFreq(new Term("content", "bbb")));
            assertEquals(1, reader.docFreq(new Term("content", "term")));
            assertEquals(1, reader.docFreq(new Term("content", "another")));

            // Make sure position is still incremented when
            // massive term is skipped:
            DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another"));

            assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(1, tps.freq());
            assertEquals(3, tps.nextPosition());

            // Make sure the doc that has the massive term is in
            // the index:
            assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

            reader.close();

            // Make sure we can add a document with exactly the
            // maximum length term, and search on that term:
            doc = new Document();
            doc.add(new TextField("content", bigTerm, Field.Store.NO));
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            sa.MaxTokenLength = 100000;
            writer            = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
            writer.addDocument(doc);
            writer.close();
            reader = IndexReader.open(dir);
            assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
            reader.close();

            dir.close();
        }