예제 #1
0
        public virtual void TestEmptyTerm()
        {
            Random   random = Random;
            Analyzer a      = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
        }
        public virtual void TestKeyword()
        {
            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("sekretæren"), false);
            Analyzer     a            = new AnalyzerAnonymousInnerClassHelper3(this, exclusionSet);

            CheckOneTerm(a, "sekretæren", "sekretæren");
        }
예제 #3
0
        public virtual void TestSurrogates()
        {
            var analyzer = new AnalyzerAnonymousInnerClassHelper3();

            AssertAnalyzesTo(analyzer, "bar 123" + (char)55404 + (char)56321 + "34 5te 987", new string[] { "123𫀁34", "5", "987" });
            AssertAnalyzesTo(analyzer, "787 " + (char)55297 + (char)56388 + "6" + (char)55404 + (char)56321 + " art true 734", new string[] { "787", "𐑄6𫀁", "734" });
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testKeyword() throws java.io.IOException
 public virtual void testKeyword()
 {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
     CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, exclusionSet);
     checkOneTerm(a, "sekretæren", "sekretæren");
 }
        public virtual void TestVersion40()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            // U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
            // on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
            AssertAnalyzesTo(a, "this is just a t\u061Cst [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "lucene", "apache.org" });
        }
예제 #6
0
        public virtual void TestCombiningMarksBackwards()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckOneTerm(a, "ざ", "さ");  // hiragana Bug
            CheckOneTerm(a, "ザ", "ザ"); // katakana Works
            CheckOneTerm(a, "壹゙", "壹");  // ideographic Bug
            CheckOneTerm(a, "아゙", "아゙"); // hangul Works
        }
예제 #7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testKeyword() throws java.io.IOException
        public virtual void testKeyword()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
            CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("sekretæren"), false);
            Analyzer     a            = new AnalyzerAnonymousInnerClassHelper3(this, exclusionSet);

            checkOneTerm(a, "sekretæren", "sekretæren");
        }
예제 #8
0
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper4(this);

            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
        }
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random, a, 1000 * RandomMultiplier);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper4(this);

            CheckRandomData(Random, b, 1000 * RandomMultiplier);
        }
예제 #10
0
        public virtual void TestRandomMaps()
        {
            int numIterations = AtLeast(3);

            for (int i = 0; i < numIterations; i++)
            {
                NormalizeCharMap map      = RandomMap();
                Analyzer         analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
                int numRounds             = 100;
                CheckRandomData(Random(), analyzer, numRounds);
            }
        }
        public virtual void TestLetterHtmlish()
        {
            Random   random = Random;
            Analyzer left   = new MockAnalyzer(random, jvmLetter, false);
            Analyzer right  = new AnalyzerAnonymousInnerClassHelper3(this);

            for (int i = 0; i < 1000; i++)
            {
                string s = TestUtil.RandomHtmlishString(random, 20);
                assertEquals(s, left.GetTokenStream("foo", newStringReader(s)), right.GetTokenStream("foo", newStringReader(s)));
            }
        }
예제 #12
0
        //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomMaps() throws Exception
        public virtual void testRandomMaps()
        {
            int numIterations = atLeast(3);

            for (int i = 0; i < numIterations; i++)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap map = randomMap();
                NormalizeCharMap map      = randomMap();
                Analyzer         analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
                int numRounds             = 100;
                checkRandomData(random(), analyzer, numRounds);
            }
        }
        public virtual void TestRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
                int min = TestUtil.NextInt(Random(), 2, 10);
                int max = TestUtil.NextInt(Random(), min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
        }
        public virtual void TestRandomStrings()
        {
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer     a    = new AnalyzerAnonymousInnerClassHelper2(this, dict);

            CheckRandomData(Random, a, 1000 * RANDOM_MULTIPLIER);

            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
            {
                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
                Analyzer        b          = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
                CheckRandomData(Random, b, 1000 * RANDOM_MULTIPLIER);
            }
        }
        public virtual void TestRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
                int min = TestUtil.NextInt32(Random, 2, 10);
                int max = TestUtil.NextInt32(Random, min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                CheckRandomData(Random, a, 100 * RandomMultiplier);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random, b, 1000 * RandomMultiplier, 20, false, false);
        }
예제 #16
0
        public virtual void TestMockGraphTokenFilterRandom()
        {
            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }

                // Make new analyzer each time, because MGTF has fixed
                // seed:
                Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

                Random random = Random;
                CheckRandomData(random, a, 5, AtLeast(100));
            }
        }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer     a    = new AnalyzerAnonymousInnerClassHelper2(this, dict);

            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
            Analyzer        b          = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);

            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        }
예제 #18
0
        public virtual void TestIgnoreCaseNoSideEffects()
        {
            Dictionary d;

            System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
            System.IO.Stream dictStream  = typeof(TestStemmer).getResourceAsStream("simple.dic");
            try
            {
                d = new Dictionary(affixStream, new Stream[] { dictStream }, true);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(affixStream, dictStream);
            }
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, d);

            CheckOneTerm(a, "NoChAnGy", "NoChAnGy");
        }
예제 #19
0
        /// <summary>
        /// blast some random strings through the analyzer </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10);
                int min = TestUtil.Next(random(), 2, 10);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20);
                int max = TestUtil.Next(random(), min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                checkRandomData(random(), a, 100 * RANDOM_MULTIPLIER);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);

            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
        }
예제 #20
0
        public virtual void TestPositionIncrements()
        {
            WordDelimiterFlags flags = WordDelimiterFlags.GENERATE_WORD_PARTS
                                       | WordDelimiterFlags.GENERATE_NUMBER_PARTS
                                       | WordDelimiterFlags.CATENATE_ALL
                                       | WordDelimiterFlags.SPLIT_ON_CASE_CHANGE
                                       | WordDelimiterFlags.SPLIT_ON_NUMERICS
                                       | WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE;

            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            AssertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 });

            /* only in this case, posInc of 2 ?! */
            AssertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 });

            AssertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 });

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            AssertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 });

            /* the "/" had a position increment of 10, where did it go?!?!! */
            AssertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 });

            /* in this case, the increment of 10 from the "/" is carried over */
            AssertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 });

            AssertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 });

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            AssertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 });

            /* the stopword should add a gap here */
            AssertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 });
        }
예제 #21
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testIgnoreCaseNoSideEffects() throws Exception
        public virtual void testIgnoreCaseNoSideEffects()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.hunspell.Dictionary d;
            Dictionary d;

            System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
            System.IO.Stream dictStream  = typeof(TestStemmer).getResourceAsStream("simple.dic");
            try
            {
                d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
            }
            finally
            {
                IOUtils.closeWhileHandlingException(affixStream, dictStream);
            }
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, d);

            checkOneTerm(a, "NoChAnGy", "NoChAnGy");
        }
예제 #22
0
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
//ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        public virtual void testPositionIncrements()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false);
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet <>("NUTCH"), false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            assertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 });

            /* only in this case, posInc of 2 ?! */
            assertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 });

            assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 });

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 });

            /* the "/" had a position increment of 10, where did it go?!?!! */
            assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 });

            /* in this case, the increment of 10 from the "/" is carried over */
            assertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 });

            assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 });

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            assertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 });

            /* the stopword should add a gap here */
            assertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 });
        }
        public virtual void TestEmptyTerm()
        {
            Random random   = Random();
            int    numIters = AtLeast(10);

            for (int i = 0; i < numIters; i++)
            {
                b = new SynonymMap.Builder(random.nextBoolean());
                int numEntries = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean());
                }
                SynonymMap map        = b.Build();
                bool       ignoreCase = random.nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map, ignoreCase);

                CheckAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
            }
        }
        public virtual void TestRandomStrings()
        {
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);

            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
            {
                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
                Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
                CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
            }
        }
예제 #25
0
 //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRandomMaps() throws Exception
 public virtual void testRandomMaps()
 {
     int numIterations = atLeast(3);
     for (int i = 0; i < numIterations; i++)
     {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final NormalizeCharMap map = randomMap();
       NormalizeCharMap map = randomMap();
       Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
       int numRounds = 100;
       checkRandomData(random(), analyzer, numRounds);
     }
 }
예제 #26
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testIgnoreCaseNoSideEffects() throws Exception
 public virtual void testIgnoreCaseNoSideEffects()
 {
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.hunspell.Dictionary d;
     Dictionary d;
     System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
     System.IO.Stream dictStream = typeof(TestStemmer).getResourceAsStream("simple.dic");
     try
     {
       d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
     }
     finally
     {
       IOUtils.closeWhileHandlingException(affixStream, dictStream);
     }
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, d);
     checkOneTerm(a, "NoChAnGy", "NoChAnGy");
 }
 public virtual void TestCombiningMarksBackwards()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
     CheckOneTerm(a, "ざ", "さ"); // hiragana Bug
     CheckOneTerm(a, "ザ", "ザ"); // katakana Works
     CheckOneTerm(a, "壹゙", "壹"); // ideographic Bug
     CheckOneTerm(a, "아゙", "아゙"); // hangul Works
 }
        public virtual void TestRandomStrings()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);

            Analyzer b = new AnalyzerAnonymousInnerClassHelper4(this);

            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
        }
예제 #29
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testEmptyTerm() throws Exception
 public virtual void testEmptyTerm()
 {
     Random random = random();
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
     checkAnalysisConsistency(random, a, random.nextBoolean(), "");
 }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
 public virtual void testEmptyTerm()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
     checkOneTerm(a, "", "");
 }
예제 #31
0
        public virtual void TestMockGraphTokenFilterRandom()
        {
            for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter);
                }

                // Make new analyzer each time, because MGTF has fixed
                // seed:
                Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

                Random random = Random();
                CheckRandomData(random, a, 5, AtLeast(100));
            }
        }
예제 #32
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
        public virtual void testEmptyTerm()
        {
            Random random = random();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numIters = atLeast(10);
            int numIters = atLeast(10);
            for (int i = 0; i < numIters; i++)
            {
              b = new SynonymMap.Builder(random.nextBoolean());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numEntries = atLeast(10);
              int numEntries = atLeast(10);
              for (int j = 0; j < numEntries; j++)
              {
            add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final SynonymMap map = b.build();
              SynonymMap map = b.build();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean ignoreCase = random.nextBoolean();
              bool ignoreCase = random.nextBoolean();

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.Analyzer analyzer = new org.apache.lucene.analysis.Analyzer()
              Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map, ignoreCase);

              checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
            }
        }
 public virtual void TestVersion40()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
     // U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
     // on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
     AssertAnalyzesTo(a, "this is just a t\u061Cst [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "lucene", "apache.org" });
 }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int min = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 10);
              int min = TestUtil.Next(random(), 2, 10);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int max = org.apache.lucene.util.TestUtil.nextInt(random(), min, 20);
              int max = TestUtil.Next(random(), min, 20);

              Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
              checkRandomData(random(), a, 100 * RANDOM_MULTIPLIER);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);
            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
        }
예제 #35
0
        //JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
        //ORIGINAL LINE: @Test public void testPositionIncrements() throws Exception
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        public virtual void testPositionIncrements()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("NUTCH")), false);
            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>("NUTCH"), false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            assertAnalyzesTo(a, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 1});

            /* only in this case, posInc of 2 ?! */
            assertAnalyzesTo(a, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 1, 0, 1});

            assertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 1, 1});

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            assertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] {"LUCENE", "largegap", "SOLR"}, new int[] {0, 7, 16}, new int[] {6, 15, 20}, new int[] {1, 10, 1});

            /* the "/" had a position increment of 10, where did it go?!?!! */
            assertAnalyzesTo(a2, "LUCENE / SOLR", new string[] {"LUCENE", "SOLR"}, new int[] {0, 9}, new int[] {6, 13}, new int[] {1, 11});

            /* in this case, the increment of 10 from the "/" is carried over */
            assertAnalyzesTo(a2, "LUCENE / solR", new string[] {"LUCENE", "sol", "solR", "R"}, new int[] {0, 9, 9, 12}, new int[] {6, 12, 13, 13}, new int[] {1, 11, 0, 1});

            assertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] {"LUCENE", "NUTCH", "SOLR"}, new int[] {0, 9, 15}, new int[] {6, 14, 19}, new int[] {1, 11, 1});

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            assertAnalyzesTo(a3, "lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {0, 0, 7}, new int[] {6, 11, 11}, new int[] {1, 0, 1});

            /* the stopword should add a gap here */
            assertAnalyzesTo(a3, "the lucene.solr", new string[] {"lucene", "lucenesolr", "solr"}, new int[] {4, 4, 11}, new int[] {10, 15, 15}, new int[] {2, 0, 1});
        }
 public virtual void TestIgnoreCaseNoSideEffects()
 {
     Dictionary d;
     System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
     System.IO.Stream dictStream = typeof(TestStemmer).getResourceAsStream("simple.dic");
     try
     {
         d = new Dictionary(affixStream, Arrays.AsList(dictStream), true);
     }
     finally
     {
         IOUtils.CloseWhileHandlingException(affixStream, dictStream);
     }
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, d);
     CheckOneTerm(a, "NoChAnGy", "NoChAnGy");
 }
        public virtual void TestPositionIncrements()
        {
            int flags = WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.CATENATE_ALL | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;

            CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "NUTCH" }, false);

            /* analyzer that uses whitespace + wdf */
            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, flags, protWords);

            /* in this case, works as expected. */
            AssertAnalyzesTo(a, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 1 });

            /* only in this case, posInc of 2 ?! */
            AssertAnalyzesTo(a, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 1, 0, 1 });

            AssertAnalyzesTo(a, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 1, 1 });

            /* analyzer that will consume tokens with large position increments */
            Analyzer a2 = new AnalyzerAnonymousInnerClassHelper2(this, flags, protWords);

            /* increment of "largegap" is preserved */
            AssertAnalyzesTo(a2, "LUCENE largegap SOLR", new string[] { "LUCENE", "largegap", "SOLR" }, new int[] { 0, 7, 16 }, new int[] { 6, 15, 20 }, new int[] { 1, 10, 1 });

            /* the "/" had a position increment of 10, where did it go?!?!! */
            AssertAnalyzesTo(a2, "LUCENE / SOLR", new string[] { "LUCENE", "SOLR" }, new int[] { 0, 9 }, new int[] { 6, 13 }, new int[] { 1, 11 });

            /* in this case, the increment of 10 from the "/" is carried over */
            AssertAnalyzesTo(a2, "LUCENE / solR", new string[] { "LUCENE", "sol", "solR", "R" }, new int[] { 0, 9, 9, 12 }, new int[] { 6, 12, 13, 13 }, new int[] { 1, 11, 0, 1 });

            AssertAnalyzesTo(a2, "LUCENE / NUTCH SOLR", new string[] { "LUCENE", "NUTCH", "SOLR" }, new int[] { 0, 9, 15 }, new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 });

            Analyzer a3 = new AnalyzerAnonymousInnerClassHelper3(this, flags, protWords);

            AssertAnalyzesTo(a3, "lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 0, 0, 7 }, new int[] { 6, 11, 11 }, new int[] { 1, 0, 1 });

            /* the stopword should add a gap here */
            AssertAnalyzesTo(a3, "the lucene.solr", new string[] { "lucene", "lucenesolr", "solr" }, new int[] { 4, 4, 11 }, new int[] { 10, 15, 15 }, new int[] { 2, 0, 1 });
        }
 public virtual void TestKeyword()
 {
     CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("sekretæren"), false);
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, exclusionSet);
     CheckOneTerm(a, "sekretæren", "sekretæren");
 }
 public virtual void TestRandomMaps()
 {
     int numIterations = AtLeast(3);
     for (int i = 0; i < numIterations; i++)
     {
         NormalizeCharMap map = RandomMap();
         Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
         int numRounds = 100;
         CheckRandomData(Random(), analyzer, numRounds);
     }
 }
예제 #40
0
        public virtual void TestEmptyTerm()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            CheckOneTerm(a, "", "");
        }
예제 #41
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testLetterHtmlish() throws Exception
 public virtual void testLetterHtmlish()
 {
     Random random = random();
     Analyzer left = new MockAnalyzer(random, jvmLetter, false);
     Analyzer right = new AnalyzerAnonymousInnerClassHelper3(this);
     for (int i = 0; i < 1000; i++)
     {
       string s = TestUtil.randomHtmlishString(random, 20);
       assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
     }
 }
예제 #42
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testHanOnly() throws Exception
 public virtual void testHanOnly()
 {
     Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
     assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"}, new int[] {0, 1, 2, 3, 5, 6, 8, 9, 10, 11}, new int[] {1, 2, 3, 5, 6, 8, 9, 10, 11, 12}, new string[] {"<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
 }
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
            checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);

            InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
            HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
            checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        }
        public virtual void TestRandomStrings()
        {
            for (int i = 0; i < 10; i++)
            {
                int min = TestUtil.NextInt(Random(), 2, 10);
                int max = TestUtil.NextInt(Random(), min, 20);

                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER);
            }

            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);
            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
        }
예제 #45
0
        public virtual void TestHanOnly()
        {
            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);

            AssertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] { "多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た" }, new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 }, new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 }, new string[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
        }
        public virtual void TestSurrogates()
        {
            var analyzer = new AnalyzerAnonymousInnerClassHelper3();

            AssertAnalyzesTo(analyzer, "bar 123" + (char)55404 + (char)56321 + "34 5te 987", new string[] { "123𫀁34", "5", "987" });
            AssertAnalyzesTo(analyzer, "787 " + (char)55297 + (char)56388 + "6" + (char)55404 + (char)56321 + " art true 734", new string[] { "787", "𐑄6𫀁", "734" });
        }
        public virtual void TestEmptyTerm()
        {
            Random random = Random();
            int numIters = AtLeast(10);
            for (int i = 0; i < numIters; i++)
            {
                b = new SynonymMap.Builder(random.nextBoolean());
                int numEntries = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(RandomNonEmptyString(), RandomNonEmptyString(), random.nextBoolean());
                }
                SynonymMap map = b.Build();
                bool ignoreCase = random.nextBoolean();

                Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map, ignoreCase);

                CheckAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
            }
        }