/// <summary> /// Test PatternAnalyzer against a large document. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testHugeDocument() throws java.io.IOException public virtual void testHugeDocument() { StringBuilder document = new StringBuilder(); // 5000 a's char[] largeWord = new char[5000]; Arrays.fill(largeWord, 'a'); document.Append(largeWord); // a space document.Append(' '); // 2000 b's char[] largeWord2 = new char[2000]; Arrays.fill(largeWord2, 'b'); document.Append(largeWord2); // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); check(a, document.ToString(), new string[] { new string(largeWord), new string(largeWord2) }); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012 //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final Thread.UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler(); UncaughtExceptionHandler savedHandler = Thread.DefaultUncaughtExceptionHandler; Thread.DefaultUncaughtExceptionHandler = new UncaughtExceptionHandlerAnonymousInnerClassHelper(this, savedHandler); try { Thread.DefaultUncaughtExceptionHandler; checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER); } catch (System.IndexOutOfRangeException ex) { assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex)); throw ex; // otherwise rethrow } finally { Thread.DefaultUncaughtExceptionHandler = savedHandler; } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void reset() throws java.io.IOException public override void reset() { base.reset(); this.str = PatternAnalyzer.ToString(input); this.matcher = pattern.matcher(this.str); this.pos = 0; this.initialized = true; }
/// <summary> /// Test PatternAnalyzer when it is configured with a custom pattern. In this /// case, text is tokenized on the comma "," /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCustomPattern() throws java.io.IOException public virtual void testCustomPattern() { // Split on comma, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null); check(a, "Here,Are,some,Comma,separated,words,", new string[] {"Here", "Are", "some", "Comma", "separated", "words"}); // split on comma, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); check(b, "Here,Are,some,Comma,separated,words,", new string[] {"here", "some", "comma", "separated", "words"}); }
/// <summary> /// Test PatternAnalyzer when it is configured with a custom pattern. In this /// case, text is tokenized on the comma "," /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testCustomPattern() throws java.io.IOException public virtual void testCustomPattern() { // Split on comma, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null); check(a, "Here,Are,some,Comma,separated,words,", new string[] { "Here", "Are", "some", "Comma", "separated", "words" }); // split on comma, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); check(b, "Here,Are,some,Comma,separated,words,", new string[] { "here", "some", "comma", "separated", "words" }); }
/// <summary> /// Test PatternAnalyzer when it is configured with a whitespace pattern. /// Behavior can be similar to WhitespaceAnalyzer (depending upon options) /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testWhitespacePattern() throws java.io.IOException public virtual void testWhitespacePattern() { // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." }); // Split on whitespace patterns, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." }); }
/// <summary> /// Test PatternAnalyzer when it is configured with a non-word pattern. /// Behavior can be similar to SimpleAnalyzer (depending upon options) /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNonWordPattern() throws java.io.IOException public virtual void testNonWordPattern() { // Split on non-letter pattern, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null); check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox", "the", "abcd", "dc" }); // split on non-letter pattern, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox", "abcd", "dc" }); }
/// <summary> /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer, /// several methods are verified: /// <ul> /// <li>Analysis with a normal Reader /// <li>Analysis with a FastStringReader /// <li>Analysis with a String /// </ul> /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void check(PatternAnalyzer analyzer, String document, String expected[]) throws java.io.IOException private void check(PatternAnalyzer analyzer, string document, string[] expected) { // ordinary analysis of a Reader assertAnalyzesTo(analyzer, document, expected); // analysis with a "FastStringReader" TokenStream ts = analyzer.tokenStream("dummy", new PatternAnalyzer.FastStringReader(document)); assertTokenStreamContents(ts, expected); // analysis of a String, uses PatternAnalyzer.tokenStream(String, String) TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document)); assertTokenStreamContents(ts2, expected); }
/// <summary> /// Indicates whether some other object is "equal to" this one. /// </summary> /// <param name="other"> /// the reference object with which to compare. </param> /// <returns> true if equal, false otherwise </returns> public override bool Equals(object other) { if (this == other) { return(true); } if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) { return(false); } if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER) { return(false); } if (other is PatternAnalyzer) { PatternAnalyzer p2 = (PatternAnalyzer)other; return(toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords)); } return(false); }
/// <summary> /// Test PatternAnalyzer against a large document. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testHugeDocument() throws java.io.IOException public virtual void testHugeDocument() { StringBuilder document = new StringBuilder(); // 5000 a's char[] largeWord = new char[5000]; Arrays.fill(largeWord, 'a'); document.Append(largeWord); // a space document.Append(' '); // 2000 b's char[] largeWord2 = new char[2000]; Arrays.fill(largeWord2, 'b'); document.Append(largeWord2); // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); check(a, document.ToString(), new string[] { new string(largeWord), new string(largeWord2) }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void reset() throws java.io.IOException public override void reset() { base.reset(); this.str = PatternAnalyzer.ToString(input); this.pos = 0; }
/// <summary> /// Test PatternAnalyzer when it is configured with a non-word pattern. /// Behavior can be similar to SimpleAnalyzer (depending upon options) /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNonWordPattern() throws java.io.IOException public virtual void testNonWordPattern() { // Split on non-letter pattern, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null); check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox", "the", "abcd", "dc"}); // split on non-letter pattern, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox", "abcd", "dc"}); }
/// <summary> /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer, /// several methods are verified: /// <ul> /// <li>Analysis with a normal Reader /// <li>Analysis with a FastStringReader /// <li>Analysis with a String /// </ul> /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void check(PatternAnalyzer analyzer, String document, String expected[]) throws java.io.IOException private void check(PatternAnalyzer analyzer, string document, string[] expected) { // ordinary analysis of a Reader assertAnalyzesTo(analyzer, document, expected); // analysis with a "FastStringReader" TokenStream ts = analyzer.tokenStream("dummy", new PatternAnalyzer.FastStringReader(document)); assertTokenStreamContents(ts, expected); // analysis of a String, uses PatternAnalyzer.tokenStream(String, String) TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document)); assertTokenStreamContents(ts2, expected); }
/// <summary> /// Test PatternAnalyzer when it is configured with a whitespace pattern. /// Behavior can be similar to WhitespaceAnalyzer (depending upon options) /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testWhitespacePattern() throws java.io.IOException public virtual void testWhitespacePattern() { // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc."}); // Split on whitespace patterns, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc."}); }
/// <summary> /// blast some random strings through the analyzer </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomStrings() throws Exception public virtual void testRandomStrings() { Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012 //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final Thread.UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler(); UncaughtExceptionHandler savedHandler = Thread.DefaultUncaughtExceptionHandler; Thread.DefaultUncaughtExceptionHandler = new UncaughtExceptionHandlerAnonymousInnerClassHelper(this, savedHandler); try { Thread.DefaultUncaughtExceptionHandler; checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER); } catch (System.IndexOutOfRangeException ex) { assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex)); throw ex; // otherwise rethrow } finally { Thread.DefaultUncaughtExceptionHandler = savedHandler; } }