/// <summary> /// Make sure we skip wicked long terms. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException public virtual void testWickedLongTerm() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.fill(chars, 'x'); Document doc = new Document(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String bigTerm = new String(chars); string bigTerm = new string(chars); // This produces a too-long term: string contents = "abc xyz x" + bigTerm + " another term"; doc.add(new TextField("content", contents, Field.Store.NO)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir); // Make sure all terms < max size were indexed assertEquals(2, reader.docFreq(new Term("content", "abc"))); assertEquals(1, reader.docFreq(new Term("content", "bbb"))); assertEquals(1, reader.docFreq(new Term("content", "term"))); assertEquals(1, reader.docFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another")); assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); reader.close(); // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.add(new TextField("content", bigTerm, Field.Store.NO)); ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.MaxTokenLength = 100000; writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(dir); assertEquals(1, reader.docFreq(new Term("content", bigTerm))); reader.close(); dir.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testMaxTermLength2() throws Exception public virtual void testMaxTermLength2() { ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "toolong", "xy", "z"}); sa.MaxTokenLength = 5; assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testJava14BWCompatibility() throws Exception public virtual void testJava14BWCompatibility() { ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30); assertAnalyzesTo(sa, "test\u02C6test", new string[] {"test", "test"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testLucene1140() throws Exception public virtual void testLucene1140() { try { ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); assertAnalyzesTo(analyzer, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"}); } catch (System.NullReferenceException) { fail("Should not throw an NPE and it did"); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDomainNames() throws Exception public virtual void testDomainNames() { // Current lucene should not show the bug ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); // domain names assertAnalyzesTo(a2, "www.nutch.org", new string[]{"www.nutch.org"}); //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068. // the following should be recognized as HOST: assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"}); // 2.3 should show the bug. But, alas, it's obsolete, we don't support it. // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23); // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" }); // 2.4 should not show the bug. But, alas, it's also obsolete, // so we check latest released (Robert's gonna break this on 4.0 soon :) ) a2 = new ClassicAnalyzer(Version.LUCENE_31); assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"}); }