public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, ClassicTokenizer src, TokenStream tok, TextReader reader) : base(src, tok) { this.outerInstance = outerInstance; this.reader = reader; this.src = src; }
public virtual void TestMaxTermLength2() { ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); AssertAnalyzesTo(sa, "ab cd toolong xy z", new string[] { "ab", "cd", "toolong", "xy", "z" }); sa.MaxTokenLength = 5; AssertAnalyzesTo(sa, "ab cd toolong xy z", new string[] { "ab", "cd", "xy", "z" }, new int[] { 1, 1, 2, 1 }); }
public virtual void TestWickedLongTerm() { using (RAMDirectory dir = new RAMDirectory()) { char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; Arrays.Fill(chars, 'x'); string bigTerm = new string(chars); Document doc = new Document(); using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)))) { // This produces a too-long term: string contents = "abc xyz x" + bigTerm + " another term"; doc.Add(new TextField("content", contents, Field.Store.NO)); writer.AddDocument(doc); // Make sure we can add another normal document doc = new Document(); doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO)); writer.AddDocument(doc); } #pragma warning disable 612, 618 using (IndexReader reader = IndexReader.Open(dir)) #pragma warning restore 612, 618 { // Make sure all terms < max size were indexed assertEquals(2, reader.DocFreq(new Term("content", "abc"))); assertEquals(1, reader.DocFreq(new Term("content", "bbb"))); assertEquals(1, reader.DocFreq(new Term("content", "term"))); assertEquals(1, reader.DocFreq(new Term("content", "another"))); // Make sure position is still incremented when // massive term is skipped: DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another")); assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.Freq()); assertEquals(3, tps.NextPosition()); // Make sure the doc that has the massive term is in // the index: assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs); } // Make sure we can add a document with exactly the // maximum length term, and search on that term: doc = new Document(); doc.Add(new TextField("content", bigTerm, Field.Store.NO)); ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); sa.MaxTokenLength = 100000; using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa))) { writer.AddDocument(doc); } #pragma warning disable 612, 618 using (var reader = IndexReader.Open(dir)) #pragma warning restore 612, 618 { assertEquals(1, reader.DocFreq(new Term("content", bigTerm))); } } }
public virtual void TestJava14BWCompatibility() { #pragma warning disable 612, 618 ClassicAnalyzer sa = new ClassicAnalyzer(LuceneVersion.LUCENE_30); #pragma warning restore 612, 618 AssertAnalyzesTo(sa, "test\u02C6test", new string[] { "test", "test" }); }
public virtual void TestDomainNames() { // Current lucene should not show the bug ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); // domain names AssertAnalyzesTo(a2, "www.nutch.org", new string[] { "www.nutch.org" }); //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068. // the following should be recognized as HOST: AssertAnalyzesTo(a2, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" }); // 2.3 should show the bug. But, alas, it's obsolete, we don't support it. // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23); // AssertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" }); // 2.4 should not show the bug. But, alas, it's also obsolete, // so we check latest released (Robert's gonna break this on 4.0 soon :) ) #pragma warning disable 612, 618 a2 = new ClassicAnalyzer(LuceneVersion.LUCENE_31); #pragma warning restore 612, 618 AssertAnalyzesTo(a2, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" }); }
public virtual void TestLucene1140() { try { ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); AssertAnalyzesTo(analyzer, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" }); } catch (System.NullReferenceException) { fail("Should not throw an NPE and it did"); } }