//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDecomposed3() throws Exception public virtual void testDecomposed3() { TokenStream stream = new MockTokenizer(new StringReader("\u0049\u0307"), MockTokenizer.WHITESPACE, false); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); assertTokenStreamContents(filter, new string[] { "i" }); }
/// <summary> /// Test decomposed forms with additional accents /// In this example, U+0049 + U+0316 + U+0307 is canonically equivalent /// to U+0130 + U+0316, and is lowercased the same way. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDecomposed2() throws Exception public virtual void testDecomposed2() { TokenStream stream = new MockTokenizer(new StringReader("\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"), MockTokenizer.WHITESPACE, false); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); assertTokenStreamContents(filter, new string[] { "i\u0316stanbul", "izmir", "\u0131\u0316sparta" }); }
/// <summary> /// Test composed forms /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTurkishLowerCaseFilter() throws Exception public virtual void testTurkishLowerCaseFilter() { TokenStream stream = new MockTokenizer(new StringReader("\u0130STANBUL \u0130ZM\u0130R ISPARTA"), MockTokenizer.WHITESPACE, false); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); assertTokenStreamContents(filter, new string[] { "istanbul", "izmir", "\u0131sparta" }); }
/// <summary> /// Creates a /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from an <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="TurkishLowerCaseFilter"/>, /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem /// exclusion set is provided and <seealso cref="SnowballFilter"/>. </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); if (matchVersion.onOrAfter(Version.LUCENE_48)) { result = new ApostropheFilter(result); } result = new TurkishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.Empty) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new TurkishStemmer()); return(new TokenStreamComponents(source, result)); }
/// <summary> /// Creates a /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from an <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="TurkishLowerCaseFilter"/>, /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem /// exclusion set is provided and <seealso cref="SnowballFilter"/>. </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); if (matchVersion.onOrAfter(Version.LUCENE_48)) { result = new ApostropheFilter(result); } result = new TurkishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.Empty) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new TurkishStemmer()); return new TokenStreamComponents(source, result); }
/// <summary> /// Test composed forms /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTurkishLowerCaseFilter() throws Exception public virtual void testTurkishLowerCaseFilter() { TokenStream stream = new MockTokenizer(new StringReader("\u0130STANBUL \u0130ZM\u0130R ISPARTA"), MockTokenizer.WHITESPACE, false); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); assertTokenStreamContents(filter, new string[] {"istanbul", "izmir", "\u0131sparta"}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDecomposed3() throws Exception public virtual void testDecomposed3() { TokenStream stream = new MockTokenizer(new StringReader("\u0049\u0307"), MockTokenizer.WHITESPACE, false); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); assertTokenStreamContents(filter, new string[] {"i"}); }
/// <summary> /// Test decomposed forms with additional accents /// In this example, U+0049 + U+0316 + U+0307 is canonically equivalent /// to U+0130 + U+0316, and is lowercased the same way. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDecomposed2() throws Exception public virtual void testDecomposed2() { TokenStream stream = new MockTokenizer(new StringReader("\u0049\u0316\u0307STANBUL \u0049\u0307ZM\u0049\u0307R I\u0316SPARTA"), MockTokenizer.WHITESPACE, false); TurkishLowerCaseFilter filter = new TurkishLowerCaseFilter(stream); assertTokenStreamContents(filter, new string[] {"i\u0316stanbul", "izmir", "\u0131\u0316sparta"}); }