/* * Indicates whether some other object is "equal to" this one. * * @param other * the reference object with which to compare. * @return true if equal, false otherwise */ public override bool Equals(Object other) { if (this == other) { return(true); } if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) { return(false); } if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER) { return(false); } if (other is PatternAnalyzer) { PatternAnalyzer p2 = (PatternAnalyzer)other; return (toLowerCase == p2.toLowerCase && EqRegex(Regex, p2.Regex) && Eq(stopWords, p2.stopWords)); } return(false); }
public virtual void TestHugeDocument() { StringBuilder document = new StringBuilder(); // 5000 a's char[] largeWord = new char[5000]; Arrays.Fill(largeWord, 'a'); document.Append(largeWord); // a space document.Append(' '); // 2000 b's char[] largeWord2 = new char[2000]; Arrays.Fill(largeWord2, 'b'); document.Append(largeWord2); // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); Check(a, document.ToString(), new string[] { new string(largeWord), new string(largeWord2) }); }
public virtual void TestRandomStrings() { // LUCENENET: Removed code dealing with buggy JRE Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); CheckRandomData(Random, a, 10000 * RandomMultiplier); }
public virtual void TestCustomPattern() { // Split on comma, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), false, null); Check(a, "Here,Are,some,Comma,separated,words,", new string[] { "Here", "Are", "some", "Comma", "separated", "words" }); // split on comma, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); Check(b, "Here,Are,some,Comma,separated,words,", new string[] { "here", "some", "comma", "separated", "words" }); }
public virtual void TestWhitespacePattern() { // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." }); // Split on whitespace patterns, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." }); }
public virtual void TestNonWordPattern() { // Split on non-letter pattern, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null); Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox", "the", "abcd", "dc" }); // split on non-letter pattern, lowercase, english stopwords PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox", "abcd", "dc" }); }
/// <summary> /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer, /// several methods are verified: /// <ul> /// <li>Analysis with a normal Reader /// <li>Analysis with a FastStringReader /// <li>Analysis with a String /// </ul> /// </summary> private void Check(PatternAnalyzer analyzer, string document, string[] expected) { // ordinary analysis of a Reader AssertAnalyzesTo(analyzer, document, expected); // analysis with a "FastStringReader" TokenStream ts = analyzer.GetTokenStream("dummy", new PatternAnalyzer.FastStringReader(document)); AssertTokenStreamContents(ts, expected); // analysis of a String, uses PatternAnalyzer.tokenStream(String, String) TokenStream ts2 = analyzer.GetTokenStream("dummy", new StringReader(document)); AssertTokenStreamContents(ts2, expected); }
public override void Reset() { base.Reset(); this.str = PatternAnalyzer.ToString(input); // LUCENENET: Since we need to "reset" the Match // object, we also need an "isReset" flag to indicate // whether we are at the head of the match and to // take the appropriate measures to ensure we don't // overwrite our matcher variable with // matcher = matcher.NextMatch(); // before it is time. A string could potentially // match on index 0, so we need another variable to // manage this state. this.matcher = pattern.Match(this.str); this.isReset = true; this.pos = 0; this.initialized = true; }
public virtual void TestRandomStrings() { Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET); CheckRandomData(Random(), a, 10000 * RANDOM_MULTIPLIER); }
/* * Verify the analyzer analyzes to the expected contents. For PatternAnalyzer, * several methods are verified: * <ul> * <li>Analysis with a normal Reader * <li>Analysis with a FastStringReader * <li>Analysis with a String * </ul> */ private void Check(PatternAnalyzer analyzer, String document, String[] expected) { // ordinary analysis of a Reader AssertAnalyzesTo(analyzer, document, expected); // analysis with a "FastStringReader" TokenStream ts = analyzer.TokenStream("dummy", new PatternAnalyzer.FastStringReader(document)); AssertTokenStreamContents(ts, expected); // analysis of a String, uses PatternAnalyzer.tokenStream(String, String) TokenStream ts2 = analyzer.TokenStream("dummy", document); AssertTokenStreamContents(ts2, expected); }
public void TestHugeDocument() { StringBuilder document = new StringBuilder(); // 5000 a's char[] largeWord; largeWord = Enumerable.Repeat('a', 5000).ToArray(); document.Append(largeWord); // a space document.Append(' '); // 2000 b's char[] largeWord2; largeWord2 = Enumerable.Repeat('b', 2000).ToArray(); document.Append(largeWord2); // Split on whitespace patterns, do not lowercase, no stopwords PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null); Check(a, document.ToString(), new String[] { new String(largeWord), new String(largeWord2) }); }