Пример #1
0
 public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, ClassicTokenizer src, TokenStream tok, TextReader reader)
     : base(src, tok)
 {
     this.outerInstance = outerInstance;
     this.reader        = reader;
     this.src           = src;
 }
        public virtual void TestMaxTermLength2()
        {
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
            AssertAnalyzesTo(sa, "ab cd toolong xy z", new string[] { "ab", "cd", "toolong", "xy", "z" });
            sa.MaxTokenLength = 5;

            AssertAnalyzesTo(sa, "ab cd toolong xy z", new string[] { "ab", "cd", "xy", "z" }, new int[] { 1, 1, 2, 1 });
        }
        public virtual void TestWickedLongTerm()
        {
            using (RAMDirectory dir = new RAMDirectory())
            {

                char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
                Arrays.Fill(chars, 'x');

                string bigTerm = new string(chars);
                Document doc = new Document();

                using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))))
                {
                    // This produces a too-long term:
                    string contents = "abc xyz x" + bigTerm + " another term";
                    doc.Add(new TextField("content", contents, Field.Store.NO));
                    writer.AddDocument(doc);

                    // Make sure we can add another normal document
                    doc = new Document();
                    doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO));
                    writer.AddDocument(doc);
                }
#pragma warning disable 612, 618
                using (IndexReader reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
                {

                    // Make sure all terms < max size were indexed
                    assertEquals(2, reader.DocFreq(new Term("content", "abc")));
                    assertEquals(1, reader.DocFreq(new Term("content", "bbb")));
                    assertEquals(1, reader.DocFreq(new Term("content", "term")));
                    assertEquals(1, reader.DocFreq(new Term("content", "another")));

                    // Make sure position is still incremented when
                    // massive term is skipped:
                    DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another"));
                    assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                    assertEquals(1, tps.Freq());
                    assertEquals(3, tps.NextPosition());

                    // Make sure the doc that has the massive term is in
                    // the index:
                    assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs);

                }

                // Make sure we can add a document with exactly the
                // maximum length term, and search on that term:
                doc = new Document();
                doc.Add(new TextField("content", bigTerm, Field.Store.NO));
                ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
                sa.MaxTokenLength = 100000;
                using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)))
                {
                    writer.AddDocument(doc);
                }
#pragma warning disable 612, 618
                using (var reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
                {
                    assertEquals(1, reader.DocFreq(new Term("content", bigTerm)));
                }

            }
        }
        public virtual void TestJava14BWCompatibility()
        {
#pragma warning disable 612, 618
            ClassicAnalyzer sa = new ClassicAnalyzer(LuceneVersion.LUCENE_30);
#pragma warning restore 612, 618
            AssertAnalyzesTo(sa, "test\u02C6test", new string[] { "test", "test" });
        }
        public virtual void TestDomainNames()
        {
            // Current lucene should not show the bug
            ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            // domain names
            AssertAnalyzesTo(a2, "www.nutch.org", new string[] { "www.nutch.org" });
            //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
            // the following should be recognized as HOST:
            AssertAnalyzesTo(a2, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" });

            // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
            // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
            // AssertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });

            // 2.4 should not show the bug. But, alas, it's also obsolete,
            // so we check latest released (Robert's gonna break this on 4.0 soon :) )
#pragma warning disable 612, 618
            a2 = new ClassicAnalyzer(LuceneVersion.LUCENE_31);
#pragma warning restore 612, 618
            AssertAnalyzesTo(a2, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" });
        }
        public virtual void TestLucene1140()
        {
            try
            {
                ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
                AssertAnalyzesTo(analyzer, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" });
            }
            catch (System.NullReferenceException)
            {
                fail("Should not throw an NPE and it did");
            }

        }