public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
#pragma warning disable 612, 618
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader);
#pragma warning restore 612, 618
                return(new TokenStreamComponents(tokenizer));
            }
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
                TokenFilter            filter    = new EmailFilter(tokenizer);

                return(new TokenStreamComponents(tokenizer, filter));
            }
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
                TokenFilter            filter    = new EmailFilter(tokenizer);

                return(new TokenStreamComponents(tokenizer, filter));
            }
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);

                tokenizer.MaxTokenLength = int.MaxValue; // Tokenize arbitrary length URLs
                TokenFilter filter = new URLFilter(tokenizer);

                return(new TokenStreamComponents(tokenizer, filter));
            }
 public virtual void TestHugeDoc()
 {
     StringBuilder sb = new StringBuilder();
     char[] whitespace = new char[4094];
     Arrays.Fill(whitespace, ' ');
     sb.Append(whitespace);
     sb.Append("testing 1234");
     string input = sb.ToString();
     UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     BaseTokenStreamTestCase.AssertTokenStreamContents(tokenizer, new string[] { "testing", "1234" });
 }
        public virtual void TestMailtoBackwards()
        {
            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
#pragma warning disable 612, 618
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_34, reader);
#pragma warning restore 612, 618
                return(new TokenStreamComponents(tokenizer));
            });

            AssertAnalyzesTo(a, "mailto:[email protected]", new string[] { "mailto:test", "example.org" });
        }
        public virtual void TestVersion36()
        {
            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
#pragma warning disable 612, 618
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_36, reader);
#pragma warning restore 612, 618
                return(new TokenStreamComponents(tokenizer));
            });

            AssertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "*****@*****.**" }); // new combining mark in 6.1
        }
        public virtual void TestHugeDoc()
        {
            StringBuilder sb = new StringBuilder();

            char[] whitespace = new char[4094];
            Arrays.Fill(whitespace, ' ');
            sb.Append(whitespace);
            sb.Append("testing 1234");
            string input = sb.ToString();
            UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));

            BaseTokenStreamTestCase.AssertTokenStreamContents(tokenizer, new string[] { "testing", "1234" });
        }
        public virtual void TestVersion40()
        {
            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
#pragma warning disable 612, 618
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader);
#pragma warning restore 612, 618
                return(new TokenStreamComponents(tokenizer));
            });

            // U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
            // on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
            AssertAnalyzesTo(a, "this is just a t\u061Cst [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "*****@*****.**" });
        }
        public virtual void TestCombiningMarksBackwards()
        {
            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
#pragma warning disable 612, 618
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_31, reader);
#pragma warning restore 612, 618
                return(new TokenStreamComponents(tokenizer));
            });

            CheckOneTerm(a, "ざ", "さ");  // hiragana Bug
            CheckOneTerm(a, "ザ", "ザ"); // katakana Works
            CheckOneTerm(a, "壹゙", "壹");  // ideographic Bug
            CheckOneTerm(a, "아゙", "아゙"); // hangul Works
        }
 public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, UAX29URLEmailTokenizer src, TokenStream tok, TextReader reader)
     : base(src, tok)
 {
     this.outerInstance = outerInstance;
     this.reader = reader;
     this.src = src;
 }
		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
		  {

			Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
			return new TokenStreamComponents(tokenizer);
		  }
		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
		  {
			Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_40, reader);
			return new TokenStreamComponents(tokenizer);
		  }
Example #14
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader);

                return(new TokenStreamComponents(tokenizer));
            }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
     tokenizer.MaxTokenLength = int.MaxValue; // Tokenize arbitrary length URLs
     TokenFilter filter = new URLFilter(tokenizer);
     return new TokenStreamComponents(tokenizer, filter);
 }
 public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
 {
     UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input);
     tokenizer.MaxTokenLength = maxTokenLength;
     return tokenizer;
 }
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {

                Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
                return new TokenStreamComponents(tokenizer);
            }
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);

                return(new TokenStreamComponents(tokenizer));
            }
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);

                return(new TokenStreamComponents(tokenizer));
            }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
     src.MaxTokenLength = maxTokenLength;
     TokenStream tok = new StandardFilter(matchVersion, src);
     tok = new LowerCaseFilter(matchVersion, tok);
     tok = new StopFilter(matchVersion, tok, stopwords);
     return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
 }
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
#pragma warning disable 612, 618
                Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader);
#pragma warning restore 612, 618
                return new TokenStreamComponents(tokenizer);
            }