/*
         * Indicates whether some other object is "equal to" this one.
         *
         * @param other
         *            the reference object with which to compare.
         * @return true if equal, false otherwise
         */
        public override bool Equals(Object other)
        {
            if (this == other)
            {
                return(true);
            }
            if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
            {
                return(false);
            }
            if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
            {
                return(false);
            }

            if (other is PatternAnalyzer)
            {
                PatternAnalyzer p2 = (PatternAnalyzer)other;
                return
                    (toLowerCase == p2.toLowerCase &&
                     EqRegex(Regex, p2.Regex) &&
                     Eq(stopWords, p2.stopWords));
            }
            return(false);
        }
        public virtual void TestHugeDocument()
        {
            StringBuilder document = new StringBuilder();

            // 5000 a's
            char[] largeWord = new char[5000];
            Arrays.Fill(largeWord, 'a');
            document.Append(largeWord);

            // a space
            document.Append(' ');

            // 2000 b's
            char[] largeWord2 = new char[2000];
            Arrays.Fill(largeWord2, 'b');
            document.Append(largeWord2);

            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);

            Check(a, document.ToString(), new string[]
            {
                new string(largeWord),
                new string(largeWord2)
            });
        }
Exemple #3
0
        public virtual void TestRandomStrings()
        {
            // LUCENENET: Removed code dealing with buggy JRE

            Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            CheckRandomData(Random, a, 10000 * RandomMultiplier);
        }
        public virtual void TestCustomPattern()
        {
            // Split on comma, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), false, null);
            Check(a, "Here,Are,some,Comma,separated,words,", new string[] { "Here", "Are", "some", "Comma", "separated", "words" });

            // split on comma, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            Check(b, "Here,Are,some,Comma,separated,words,", new string[] { "here", "some", "comma", "separated", "words" });
        }
        public virtual void TestWhitespacePattern()
        {
            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });

            // Split on whitespace patterns, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
        }
        public virtual void TestNonWordPattern()
        {
            // Split on non-letter pattern, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);
            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox", "the", "abcd", "dc" });

            // split on non-letter pattern, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox", "abcd", "dc" });
        }
        public virtual void TestCustomPattern()
        {
            // Split on comma, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), false, null);

            Check(a, "Here,Are,some,Comma,separated,words,", new string[] { "Here", "Are", "some", "Comma", "separated", "words" });

            // split on comma, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            Check(b, "Here,Are,some,Comma,separated,words,", new string[] { "here", "some", "comma", "separated", "words" });
        }
        public virtual void TestWhitespacePattern()
        {
            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);

            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });

            // Split on whitespace patterns, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
        }
        public virtual void TestNonWordPattern()
        {
            // Split on non-letter pattern, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);

            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox", "the", "abcd", "dc" });

            // split on non-letter pattern, lowercase, english stopwords
            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox", "abcd", "dc" });
        }
        /// <summary>
        /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
        /// several methods are verified:
        /// <ul>
        /// <li>Analysis with a normal Reader
        /// <li>Analysis with a FastStringReader
        /// <li>Analysis with a String
        /// </ul>
        /// </summary>
        private void Check(PatternAnalyzer analyzer, string document, string[] expected)
        {
            // ordinary analysis of a Reader
            AssertAnalyzesTo(analyzer, document, expected);

            // analysis with a "FastStringReader"
            TokenStream ts = analyzer.GetTokenStream("dummy", new PatternAnalyzer.FastStringReader(document));

            AssertTokenStreamContents(ts, expected);

            // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
            TokenStream ts2 = analyzer.GetTokenStream("dummy", new StringReader(document));

            AssertTokenStreamContents(ts2, expected);
        }
Exemple #11
0
            public override void Reset()
            {
                base.Reset();
                this.str = PatternAnalyzer.ToString(input);

                // LUCENENET: Since we need to "reset" the Match
                // object, we also need an "isReset" flag to indicate
                // whether we are at the head of the match and to
                // take the appropriate measures to ensure we don't
                // overwrite our matcher variable with
                // matcher = matcher.NextMatch();
                // before it is time. A string could potentially
                // match on index 0, so we need another variable to
                // manage this state.
                this.matcher     = pattern.Match(this.str);
                this.isReset     = true;
                this.pos         = 0;
                this.initialized = true;
            }
        public virtual void TestRandomStrings()
        {
            Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            CheckRandomData(Random(), a, 10000 * RANDOM_MULTIPLIER);
        }
        /*
         * Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
         * several methods are verified:
         * <ul>
         * <li>Analysis with a normal Reader
         * <li>Analysis with a FastStringReader
         * <li>Analysis with a String
         * </ul>
         */
        private void Check(PatternAnalyzer analyzer, String document,
            String[] expected)
        {
            // ordinary analysis of a Reader
            AssertAnalyzesTo(analyzer, document, expected);

            // analysis with a "FastStringReader"
            TokenStream ts = analyzer.TokenStream("dummy",
                new PatternAnalyzer.FastStringReader(document));
            AssertTokenStreamContents(ts, expected);

            // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
            TokenStream ts2 = analyzer.TokenStream("dummy", document);
            AssertTokenStreamContents(ts2, expected);
        }
        public void TestHugeDocument()
        {
            StringBuilder document = new StringBuilder();
            // 5000 a's
            char[] largeWord;
            largeWord = Enumerable.Repeat('a', 5000).ToArray();
            document.Append(largeWord);

            // a space
            document.Append(' ');

            // 2000 b's
            char[] largeWord2;
            largeWord2 = Enumerable.Repeat('b', 2000).ToArray();
            document.Append(largeWord2);

            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
                false, null);
            Check(a, document.ToString(), new String[]
                                              {
                                                  new String(largeWord),
                                                  new String(largeWord2)
                                              });
        }
        public virtual void TestHugeDocument()
        {
            StringBuilder document = new StringBuilder();
            // 5000 a's
            char[] largeWord = new char[5000];
            Arrays.Fill(largeWord, 'a');
            document.Append(largeWord);

            // a space
            document.Append(' ');

            // 2000 b's
            char[] largeWord2 = new char[2000];
            Arrays.Fill(largeWord2, 'b');
            document.Append(largeWord2);

            // Split on whitespace patterns, do not lowercase, no stopwords
            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
            Check(a, document.ToString(), new string[]
            {
            new string(largeWord),
            new string(largeWord2)
            });
        }
        public virtual void TestRandomStrings()
        {
            Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);

            CheckRandomData(Random(), a, 10000 * RANDOM_MULTIPLIER);
        }