예제 #1
0
        /// <summary>
        /// Make sure we skip wicked long terms.
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException
        public virtual void testWickedLongTerm()
        {
            RAMDirectory dir = new RAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

            char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
            Arrays.fill(chars, 'x');
            Document doc = new Document();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String bigTerm = new String(chars);
            string bigTerm = new string(chars);

            // This produces a too-long term:
            string contents = "abc xyz x" + bigTerm + " another term";
            doc.add(new TextField("content", contents, Field.Store.NO));
            writer.addDocument(doc);

            // Make sure we can add another normal document
            doc = new Document();
            doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
            writer.addDocument(doc);
            writer.close();

            IndexReader reader = IndexReader.open(dir);

            // Make sure all terms < max size were indexed
            assertEquals(2, reader.docFreq(new Term("content", "abc")));
            assertEquals(1, reader.docFreq(new Term("content", "bbb")));
            assertEquals(1, reader.docFreq(new Term("content", "term")));
            assertEquals(1, reader.docFreq(new Term("content", "another")));

            // Make sure position is still incremented when
            // massive term is skipped:
            DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another"));
            assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(1, tps.freq());
            assertEquals(3, tps.nextPosition());

            // Make sure the doc that has the massive term is in
            // the index:
            assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

            reader.close();

            // Make sure we can add a document with exactly the
            // maximum length term, and search on that term:
            doc = new Document();
            doc.add(new TextField("content", bigTerm, Field.Store.NO));
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
            sa.MaxTokenLength = 100000;
            writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
            writer.addDocument(doc);
            writer.close();
            reader = IndexReader.open(dir);
            assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
            reader.close();

            dir.close();
        }
예제 #2
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testMaxTermLength2() throws Exception
        public virtual void testMaxTermLength2()
        {
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
            assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "toolong", "xy", "z"});
            sa.MaxTokenLength = 5;

            assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
        }
예제 #3
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testJava14BWCompatibility() throws Exception
 public virtual void testJava14BWCompatibility()
 {
     ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
     assertAnalyzesTo(sa, "test\u02C6test", new string[] {"test", "test"});
 }
예제 #4
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testLucene1140() throws Exception
 public virtual void testLucene1140()
 {
     try
     {
       ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
       assertAnalyzesTo(analyzer, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"});
     }
     catch (System.NullReferenceException)
     {
       fail("Should not throw an NPE and it did");
     }
 }
예제 #5
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testDomainNames() throws Exception
        public virtual void testDomainNames()
        {
            // Current lucene should not show the bug
            ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            // domain names
            assertAnalyzesTo(a2, "www.nutch.org", new string[]{"www.nutch.org"});
            //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
            // the following should be recognized as HOST:
            assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"});

            // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
            // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
            // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });

            // 2.4 should not show the bug. But, alas, it's also obsolete,
            // so we check latest released (Robert's gonna break this on 4.0 soon :) )
            a2 = new ClassicAnalyzer(Version.LUCENE_31);
            assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"});
        }