Esempio n. 1
0
        /// <summary>
        /// Test PatternAnalyzer against a large document.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHugeDocument() throws java.io.IOException
        public virtual void testHugeDocument()
        {
            StringBuilder document = new StringBuilder();

            // 5000 a's
            char[] largeWord = new char[5000];
            Arrays.fill(largeWord, 'a');
            document.Append(largeWord);

            // a space
            document.Append(' ');

            // 2000 b's
            char[] largeWord2 = new char[2000];
            Arrays.fill(largeWord2, 'b');
            document.Append(largeWord2);

            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);

            check(a, document.ToString(), new string[]
            {
                new string(largeWord),
                new string(largeWord2)
            });
        }
Esempio n. 2
0
        /// <summary>
        /// blast some random strings through the analyzer </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final Thread.UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
            UncaughtExceptionHandler savedHandler = Thread.DefaultUncaughtExceptionHandler;

            Thread.DefaultUncaughtExceptionHandler = new UncaughtExceptionHandlerAnonymousInnerClassHelper(this, savedHandler);

            try
            {
                Thread.DefaultUncaughtExceptionHandler;
                checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER);
            }
            catch (System.IndexOutOfRangeException ex)
            {
                assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
                throw ex;   // otherwise rethrow
            }
            finally
            {
                Thread.DefaultUncaughtExceptionHandler = savedHandler;
            }
        }
Esempio n. 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
            public override void reset()
            {
                base.reset();
                this.str         = PatternAnalyzer.ToString(input);
                this.matcher     = pattern.matcher(this.str);
                this.pos         = 0;
                this.initialized = true;
            }
Esempio n. 4
0
        /// <summary>
        /// Test PatternAnalyzer when it is configured with a custom pattern. In this
        /// case, text is tokenized on the comma ","
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testCustomPattern() throws java.io.IOException
        public virtual void testCustomPattern()
        {
            // Split on comma, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
            check(a, "Here,Are,some,Comma,separated,words,", new string[] {"Here", "Are", "some", "Comma", "separated", "words"});

            // split on comma, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            check(b, "Here,Are,some,Comma,separated,words,", new string[] {"here", "some", "comma", "separated", "words"});
        }
Esempio n. 5
0
        /// <summary>
        /// Test PatternAnalyzer when it is configured with a custom pattern. In this
        /// case, text is tokenized on the comma ","
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testCustomPattern() throws java.io.IOException
        public virtual void testCustomPattern()
        {
            // Split on comma, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);

            check(a, "Here,Are,some,Comma,separated,words,", new string[] { "Here", "Are", "some", "Comma", "separated", "words" });

            // split on comma, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            check(b, "Here,Are,some,Comma,separated,words,", new string[] { "here", "some", "comma", "separated", "words" });
        }
Esempio n. 6
0
        /// <summary>
        /// Test PatternAnalyzer when it is configured with a whitespace pattern.
        /// Behavior can be similar to WhitespaceAnalyzer (depending upon options)
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testWhitespacePattern() throws java.io.IOException
        public virtual void testWhitespacePattern()
        {
            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);

            check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });

            // Split on whitespace patterns, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
        }
Esempio n. 7
0
        /// <summary>
        /// Test PatternAnalyzer when it is configured with a non-word pattern.
        /// Behavior can be similar to SimpleAnalyzer (depending upon options)
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNonWordPattern() throws java.io.IOException
        public virtual void testNonWordPattern()
        {
            // Split on non-letter pattern, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);

            check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox", "the", "abcd", "dc" });

            // split on non-letter pattern, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox", "abcd", "dc" });
        }
Esempio n. 8
0
        /// <summary>
        /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
        /// several methods are verified:
        /// <ul>
        /// <li>Analysis with a normal Reader
        /// <li>Analysis with a FastStringReader
        /// <li>Analysis with a String
        /// </ul>
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void check(PatternAnalyzer analyzer, String document, String expected[]) throws java.io.IOException
        private void check(PatternAnalyzer analyzer, string document, string[] expected)
        {
            // ordinary analysis of a Reader
            assertAnalyzesTo(analyzer, document, expected);

            // analysis with a "FastStringReader"
            TokenStream ts = analyzer.tokenStream("dummy", new PatternAnalyzer.FastStringReader(document));

            assertTokenStreamContents(ts, expected);

            // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
            TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));

            assertTokenStreamContents(ts2, expected);
        }
Esempio n. 9
0
        /// <summary>
        /// Indicates whether some other object is "equal to" this one.
        /// </summary>
        /// <param name="other">
        ///            the reference object with which to compare. </param>
        /// <returns> true if equal, false otherwise </returns>
        public override bool Equals(object other)
        {
            if (this == other)
            {
                return(true);
            }
            if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
            {
                return(false);
            }
            if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
            {
                return(false);
            }

            if (other is PatternAnalyzer)
            {
                PatternAnalyzer p2 = (PatternAnalyzer)other;
                return(toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords));
            }
            return(false);
        }
Esempio n. 10
0
        /// <summary>
        /// Test PatternAnalyzer against a large document.
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testHugeDocument() throws java.io.IOException
        public virtual void testHugeDocument()
        {
            StringBuilder document = new StringBuilder();
            // 5000 a's
            char[] largeWord = new char[5000];
            Arrays.fill(largeWord, 'a');
            document.Append(largeWord);

            // a space
            document.Append(' ');

            // 2000 b's
            char[] largeWord2 = new char[2000];
            Arrays.fill(largeWord2, 'b');
            document.Append(largeWord2);

            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
            check(a, document.ToString(), new string[]
            {
            new string(largeWord),
            new string(largeWord2)
            });
        }
Esempio n. 11
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
            public override void reset()
            {
                base.reset();
                this.str = PatternAnalyzer.ToString(input);
                this.pos = 0;
            }
Esempio n. 12
0
        /// <summary>
        /// Test PatternAnalyzer when it is configured with a non-word pattern.
        /// Behavior can be similar to SimpleAnalyzer (depending upon options)
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testNonWordPattern() throws java.io.IOException
        public virtual void testNonWordPattern()
        {
            // Split on non-letter pattern, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);
            check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox", "the", "abcd", "dc"});

            // split on non-letter pattern, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox", "abcd", "dc"});
        }
Esempio n. 13
0
        /// <summary>
        /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
        /// several methods are verified:
        /// <ul>
        /// <li>Analysis with a normal Reader
        /// <li>Analysis with a FastStringReader
        /// <li>Analysis with a String
        /// </ul>
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: private void check(PatternAnalyzer analyzer, String document, String expected[]) throws java.io.IOException
        private void check(PatternAnalyzer analyzer, string document, string[] expected)
        {
            // ordinary analysis of a Reader
            assertAnalyzesTo(analyzer, document, expected);

            // analysis with a "FastStringReader"
            TokenStream ts = analyzer.tokenStream("dummy", new PatternAnalyzer.FastStringReader(document));
            assertTokenStreamContents(ts, expected);

            // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
            TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
            assertTokenStreamContents(ts2, expected);
        }
Esempio n. 14
0
        /// <summary>
        /// Test PatternAnalyzer when it is configured with a whitespace pattern.
        /// Behavior can be similar to WhitespaceAnalyzer (depending upon options)
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testWhitespacePattern() throws java.io.IOException
        public virtual void testWhitespacePattern()
        {
            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
            check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc."});

            // Split on whitespace patterns, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc."});
        }
Esempio n. 15
0
        /// <summary>
        /// blast some random strings through the analyzer </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomStrings() throws Exception
        public virtual void testRandomStrings()
        {
            Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final Thread.UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
            UncaughtExceptionHandler savedHandler = Thread.DefaultUncaughtExceptionHandler;
            Thread.DefaultUncaughtExceptionHandler = new UncaughtExceptionHandlerAnonymousInnerClassHelper(this, savedHandler);

            try
            {
              Thread.DefaultUncaughtExceptionHandler;
              checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER);
            }
            catch (System.IndexOutOfRangeException ex)
            {
              assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
              throw ex; // otherwise rethrow
            }
            finally
            {
              Thread.DefaultUncaughtExceptionHandler = savedHandler;
            }
        }