예제 #1
0
        public virtual void TestMaxTermLength()
        {
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            sa.MaxTokenLength = 5;
            AssertAnalyzesTo(sa, "ab cd toolong xy z", new string[] { "ab", "cd", "xy", "z" });
        }
예제 #2
0
        public virtual void TestWickedLongTerm()
        {
            using RAMDirectory dir = new RAMDirectory();
            char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
            Arrays.Fill(chars, 'x');

            string   bigTerm = new string(chars);
            Document doc     = new Document();

            using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))))
            {
                // This produces a too-long term:
                string contents = "abc xyz x" + bigTerm + " another term";
                doc.Add(new TextField("content", contents, Field.Store.NO));
                writer.AddDocument(doc);

                // Make sure we can add another normal document
                doc = new Document();
                doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO));
                writer.AddDocument(doc);
            }
#pragma warning disable 612, 618
            using (IndexReader reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
            {
                // Make sure all terms < max size were indexed
                assertEquals(2, reader.DocFreq(new Term("content", "abc")));
                assertEquals(1, reader.DocFreq(new Term("content", "bbb")));
                assertEquals(1, reader.DocFreq(new Term("content", "term")));
                assertEquals(1, reader.DocFreq(new Term("content", "another")));

                // Make sure position is still incremented when
                // massive term is skipped:
                DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another"));
                assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(1, tps.Freq);
                assertEquals(3, tps.NextPosition());

                // Make sure the doc that has the massive term is in
                // the index:
                assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs);
            }

            // Make sure we can add a document with exactly the
            // maximum length term, and search on that term:
            doc = new Document();
            doc.Add(new TextField("content", bigTerm, Field.Store.NO));
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
            sa.MaxTokenLength = 100000;
            using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)))
            {
                writer.AddDocument(doc);
            }
#pragma warning disable 612, 618
            using (var reader = IndexReader.Open(dir))
#pragma warning restore 612, 618
            {
                assertEquals(1, reader.DocFreq(new Term("content", bigTerm)));
            }
        }
예제 #3
0
        public virtual void TestJava14BWCompatibility()
        {
#pragma warning disable 612, 618
            ClassicAnalyzer sa = new ClassicAnalyzer(LuceneVersion.LUCENE_30);
#pragma warning restore 612, 618
            AssertAnalyzesTo(sa, "test\u02C6test", new string[] { "test", "test" });
        }
예제 #4
0
 public virtual void TestLucene1140()
 {
     try
     {
         ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
         AssertAnalyzesTo(analyzer, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" });
     }
     catch (System.NullReferenceException)
     {
         fail("Should not throw an NPE and it did");
     }
 }
예제 #5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testDomainNames() throws Exception
        public virtual void testDomainNames()
        {
            // Current lucene should not show the bug
            ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            // domain names
            assertAnalyzesTo(a2, "www.nutch.org", new string[] { "www.nutch.org" });
            //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
            // the following should be recognized as HOST:
            assertAnalyzesTo(a2, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" });

            // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
            // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
            // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });

            // 2.4 should not show the bug. But, alas, it's also obsolete,
            // so we check latest released (Robert's gonna break this on 4.0 soon :) )
            a2 = new ClassicAnalyzer(Version.LUCENE_31);
            assertAnalyzesTo(a2, "www.nutch.org.", new string[] { "www.nutch.org" }, new string[] { "<HOST>" });
        }
예제 #6
0
        /// <summary>
        /// Make sure we skip wicked long terms.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException
        public virtual void testWickedLongTerm()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

            char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
            Arrays.fill(chars, 'x');
            Document doc = new Document();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String bigTerm = new String(chars);
            string bigTerm = new string(chars);

            // This produces a too-long term:
            string contents = "abc xyz x" + bigTerm + " another term";

            doc.add(new TextField("content", contents, Field.Store.NO));
            writer.addDocument(doc);

            // Make sure we can add another normal document
            doc = new Document();
            doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
            writer.addDocument(doc);
            writer.close();

            IndexReader reader = IndexReader.open(dir);

            // Make sure all terms < max size were indexed
            assertEquals(2, reader.docFreq(new Term("content", "abc")));
            assertEquals(1, reader.docFreq(new Term("content", "bbb")));
            assertEquals(1, reader.docFreq(new Term("content", "term")));
            assertEquals(1, reader.docFreq(new Term("content", "another")));

            // Make sure position is still incremented when
            // massive term is skipped:
            DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another"));

            assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(1, tps.freq());
            assertEquals(3, tps.nextPosition());

            // Make sure the doc that has the massive term is in
            // the index:
            assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

            reader.close();

            // Make sure we can add a document with exactly the
            // maximum length term, and search on that term:
            doc = new Document();
            doc.add(new TextField("content", bigTerm, Field.Store.NO));
            ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);

            sa.MaxTokenLength = 100000;
            writer            = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
            writer.addDocument(doc);
            writer.close();
            reader = IndexReader.open(dir);
            assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
            reader.close();

            dir.close();
        }
예제 #7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testJava14BWCompatibility() throws Exception
        public virtual void testJava14BWCompatibility()
        {
            ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);

            assertAnalyzesTo(sa, "test\u02C6test", new string[] { "test", "test" });
        }
예제 #8
0
 public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, ClassicTokenizer src, TokenStream tok, Reader reader)
     : base(src, tok)
 {
     this.outerInstance = outerInstance;
     this.reader = reader;
     this.src = src;
 }