public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { #pragma warning disable 612, 618 Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader); #pragma warning restore 612, 618 return(new TokenStreamComponents(tokenizer)); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); TokenFilter filter = new EmailFilter(tokenizer); return(new TokenStreamComponents(tokenizer, filter)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); TokenFilter filter = new EmailFilter(tokenizer); return(new TokenStreamComponents(tokenizer, filter)); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); tokenizer.MaxTokenLength = int.MaxValue; // Tokenize arbitrary length URLs TokenFilter filter = new URLFilter(tokenizer); return(new TokenStreamComponents(tokenizer, filter)); }
public virtual void TestHugeDoc() { StringBuilder sb = new StringBuilder(); char[] whitespace = new char[4094]; Arrays.Fill(whitespace, ' '); sb.Append(whitespace); sb.Append("testing 1234"); string input = sb.ToString(); UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); BaseTokenStreamTestCase.AssertTokenStreamContents(tokenizer, new string[] { "testing", "1234" }); }
public virtual void TestMailtoBackwards() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { #pragma warning disable 612, 618 Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_34, reader); #pragma warning restore 612, 618 return(new TokenStreamComponents(tokenizer)); }); AssertAnalyzesTo(a, "mailto:[email protected]", new string[] { "mailto:test", "example.org" }); }
public virtual void TestVersion36() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { #pragma warning disable 612, 618 Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_36, reader); #pragma warning restore 612, 618 return(new TokenStreamComponents(tokenizer)); }); AssertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "*****@*****.**" }); // new combining mark in 6.1 }
public virtual void TestVersion40() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { #pragma warning disable 612, 618 Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader); #pragma warning restore 612, 618 return(new TokenStreamComponents(tokenizer)); }); // U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]" // on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp> AssertAnalyzesTo(a, "this is just a t\u061Cst [email protected]", new string[] { "this", "is", "just", "a", "t", "st", "*****@*****.**" }); }
public virtual void TestCombiningMarksBackwards() { Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { #pragma warning disable 612, 618 Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_31, reader); #pragma warning restore 612, 618 return(new TokenStreamComponents(tokenizer)); }); CheckOneTerm(a, "ざ", "さ"); // hiragana Bug CheckOneTerm(a, "ザ", "ザ"); // katakana Works CheckOneTerm(a, "壹゙", "壹"); // ideographic Bug CheckOneTerm(a, "아゙", "아゙"); // hangul Works }
public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, UAX29URLEmailTokenizer src, TokenStream tok, TextReader reader) : base(src, tok) { this.outerInstance = outerInstance; this.reader = reader; this.src = src; }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); return new TokenStreamComponents(tokenizer); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_40, reader); return new TokenStreamComponents(tokenizer); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader); return(new TokenStreamComponents(tokenizer)); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); tokenizer.MaxTokenLength = int.MaxValue; // Tokenize arbitrary length URLs TokenFilter filter = new URLFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, factory, input); tokenizer.MaxTokenLength = maxTokenLength; return tokenizer; }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); return new TokenStreamComponents(tokenizer); }
protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader); return(new TokenStreamComponents(tokenizer)); }
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader); return(new TokenStreamComponents(tokenizer)); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader); src.MaxTokenLength = maxTokenLength; TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { #pragma warning disable 612, 618 Tokenizer tokenizer = new UAX29URLEmailTokenizer(LuceneVersion.LUCENE_40, reader); #pragma warning restore 612, 618 return new TokenStreamComponents(tokenizer); }