public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) { TokenStream result = new LowerCaseTokenizer(reader); result = new PayloadFilter(enclosingInstance, result, fieldName); return(result); }
public virtual void testLowerCaseTokenizerBWCompat() { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30, reader); assertTokenStreamContents(tokenizer, new string[] { "tokenizer", "test" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testLowerCaseTokenizer() throws java.io.IOException public virtual void testLowerCaseTokenizer() { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); assertTokenStreamContents(tokenizer, new string[] { "tokenizer", "\ud801\udc44test" }); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer lowerCaseTokenizer = new LowerCaseTokenizer(LuceneVersion.LUCENE_48, reader); PorterStemFilter porterStemFilter = new PorterStemFilter(lowerCaseTokenizer); StopFilter stopFilter = new StopFilter(LuceneVersion.LUCENE_48, porterStemFilter, EnglishAnalyzer.DefaultStopSet); return(new TokenStreamComponents(lowerCaseTokenizer, stopFilter)); }
/* * tests the max word length of 255 - tokenizer will split at the 255 char no matter what happens */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testMaxWordLength() throws java.io.IOException public virtual void testMaxWordLength() { StringBuilder builder = new StringBuilder(); for (int i = 0; i < 255; i++) { builder.Append("A"); } Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString() + builder.ToString())); assertTokenStreamContents(tokenizer, new string[] { builder.ToString().ToLower(Locale.ROOT), builder.ToString().ToLower(Locale.ROOT) }); }
public virtual void TestMaxWordLength() { var builder = new StringBuilder(); for (var i = 0; i < 255; i++) { builder.Append("A"); } var tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString() + builder.ToString())); AssertTokenStreamContents(tokenizer, new[] { builder.ToString().ToLowerInvariant(), builder.ToString().ToLowerInvariant() }); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { var tokenizer = (Tokenizer) PreviousTokenStream; if (tokenizer == null) { tokenizer = new LowerCaseTokenizer(reader); PreviousTokenStream = tokenizer; } else tokenizer.Reset(reader); return tokenizer; }
/* * test to extend the buffer TermAttribute buffer internally. If the internal * alg that extends the size of the char array only extends by 1 char and the * next char to be filled in is a supplementary codepoint (using 2 chars) an * index out of bound exception is triggered. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testExtendCharBuffer() throws java.io.IOException public virtual void testExtendCharBuffer() { for (int i = 0; i < 40; i++) { StringBuilder builder = new StringBuilder(); for (int j = 0; j < 1 + i; j++) { builder.Append("a"); } builder.Append("\ud801\udc1cabc"); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString())); assertTokenStreamContents(tokenizer, new string[] { builder.ToString().ToLower(Locale.ROOT) }); } }
public virtual void TestExtendCharBuffer() { for (var i = 0; i < 40; i++) { var builder = new StringBuilder(); for (int j = 0; j < 1 + i; j++) { builder.Append("a"); } builder.Append("\ud801\udc1cabc"); var tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString())); AssertTokenStreamContents(tokenizer, new[] { builder.ToString().ToLowerInvariant() }); } }
/* * test to extend the buffer TermAttribute buffer internally. If the internal * alg that extends the size of the char array only extends by 1 char and the * next char to be filled in is a supplementary codepoint (using 2 chars) an * index out of bound exception is triggered. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testExtendCharBuffer() throws java.io.IOException public virtual void testExtendCharBuffer() { for (int i = 0; i < 40; i++) { StringBuilder builder = new StringBuilder(); for (int j = 0; j < 1 + i; j++) { builder.Append("a"); } builder.Append("\ud801\udc1cabc"); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString())); assertTokenStreamContents(tokenizer, new string[] {builder.ToString().ToLower(Locale.ROOT)}); } }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { Tokenizer tokenizer = (Tokenizer)GetPreviousTokenStream(); if (tokenizer == null) { tokenizer = new LowerCaseTokenizer(reader); SetPreviousTokenStream(tokenizer); } else { tokenizer.Reset(reader); } return(tokenizer); }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { var tokenizer = (Tokenizer)PreviousTokenStream; if (tokenizer == null) { tokenizer = new LowerCaseTokenizer(reader); PreviousTokenStream = tokenizer; } else { tokenizer.Reset(reader); } return(tokenizer); }
public virtual void TestReadSupplementaryChars() { var builder = new StringBuilder(); // create random input var num = 1024 + Random().Next(1024); num *= RANDOM_MULTIPLIER; for (var i = 1; i < num; i++) { builder.Append("\ud801\udc1cabc"); if ((i % 10) == 0) { builder.Append(" "); } } // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.Insert(1023, "\ud801\udc1c"); var tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString())); AssertTokenStreamContents(tokenizer, builder.ToString().ToLowerInvariant().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)); }
/* * test to read surrogate pairs without loosing the pairing * if the surrogate pair is at the border of the internal IO buffer */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReadSupplementaryChars() throws java.io.IOException public virtual void testReadSupplementaryChars() { StringBuilder builder = new StringBuilder(); // create random input int num = 1024 + random().Next(1024); num *= RANDOM_MULTIPLIER; for (int i = 1; i < num; i++) { builder.Append("\ud801\udc1cabc"); if ((i % 10) == 0) { builder.Append(" "); } } // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.Insert(1023, "\ud801\udc1c"); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString())); assertTokenStreamContents(tokenizer, builder.ToString().ToLower(Locale.ROOT).split(" ")); }
public virtual void TestReadSupplementaryChars() { var builder = new StringBuilder(); // create random input var num = 1024 + Random.Next(1024); num *= RandomMultiplier; for (var i = 1; i < num; i++) { builder.Append("\ud801\udc1cabc"); if ((i % 10) == 0) { builder.Append(" "); } } // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.Insert(1023, "\ud801\udc1c"); var tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString())); AssertTokenStreamContents(tokenizer, builder.ToString().ToLowerInvariant().Split(' ').TrimEnd()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testLowerCaseTokenizer() throws java.io.IOException public virtual void testLowerCaseTokenizer() { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); assertTokenStreamContents(tokenizer, new string[] {"tokenizer", "\ud801\udc44test"}); }
public virtual void testLowerCaseTokenizerBWCompat() { StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30, reader); assertTokenStreamContents(tokenizer, new string[] {"tokenizer", "test"}); }
/* * tests the max word length of 255 - tokenizer will split at the 255 char no matter what happens */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testMaxWordLength() throws java.io.IOException public virtual void testMaxWordLength() { StringBuilder builder = new StringBuilder(); for (int i = 0; i < 255; i++) { builder.Append("A"); } Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString() + builder.ToString())); assertTokenStreamContents(tokenizer, new string[] {builder.ToString().ToLower(Locale.ROOT), builder.ToString().ToLower(Locale.ROOT)}); }
public virtual void TestMaxWordLengthWithSupplementary() { var builder = new StringBuilder(); for (var i = 0; i < 254; i++) { builder.Append("A"); } builder.Append("\ud801\udc1c"); var tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.ToString() + builder.ToString())); AssertTokenStreamContents(tokenizer, new[] { builder.ToString().ToLowerInvariant(), builder.ToString().ToLowerInvariant() }); }