public virtual void Test() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("mtqlpi", ""); builder.Add("mwoknt", "jjp"); builder.Add("tcgyreo", "zpfpajyws"); NormalizeCharMap map = builder.Build(); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65); TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return(new TokenStreamComponents(t, f)); }, initReader: (fieldName, reader) => { reader = new MockCharFilter(reader, 0); reader = new MappingCharFilter(map, reader); return(reader); }); CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj"); }
public virtual void testToString() { CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test")); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test")); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(cz, "hole desek", new string[] { "hole", "desk" }); }
public virtual void Test() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("mtqlpi", ""); builder.Add("mwoknt", "jjp"); builder.Add("tcgyreo", "zpfpajyws"); NormalizeCharMap map = builder.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); CheckAnalysisConsistency(Random(), a, false, "wmgddzunizdomqyj"); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "строевете строеве", new string[] { "строй", "строеве" }); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("представление"); Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.DefaultStopSet, set); AssertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new string[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("fischen"); GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set)); AssertTokenStreamContents(filter, new string[] { "fischen", "trink" }); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("پیاوە"); Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "پیاوە", new string[] { "پیاوە" }); }
public virtual void testSupplementaryCharsBWCompat() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on string[] upperArr = new string[] { "Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB" }; string[] lowerArr = new string[] { "abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b" }; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(cz, "hole desek", new string[] { "hole", "desk" }); }
public virtual void testSingleHighSurrogate() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; string[] upperArr = new string[] { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; string[] lowerArr = new string[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i])); } }
public virtual void testUnmodifiableSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); set.add(Convert.ToInt32(1)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (string stopword in TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Convert.ToInt32(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new char[] { '1' })); try { CharArraySet.unmodifiableSet(null); fail("can not make null unmodifiable"); } catch (System.NullReferenceException) { // expected } }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false); TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set)); AssertTokenStreamContents(filter, new string[] { "yourselves", "your" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set)); AssertTokenStreamContents(filter, new string[] { "hole", "desk" }); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("پیاوە"); Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "پیاوە", new string[] { "پیاوە" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("fischen"); GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set)); AssertTokenStreamContents(filter, new string[] { "fischen", "trink" }); }
public virtual void Test() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("mtqlpi", ""); builder.Add("mwoknt", "jjp"); builder.Add("tcgyreo", "zpfpajyws"); NormalizeCharMap map = builder.Build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); CheckAnalysisConsistency(Random(), a, false, "wmgddzunizdomqyj"); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("представление"); Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.DefaultStopSet, set); AssertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new string[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("Brasília"); BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set)); AssertTokenStreamContents(filter, new string[] { "brasília", "brasil" }); }
public virtual void TestWithStemExclusionSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "строевете строеве", new string[] { "строй", "строеве" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false); TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set)); AssertTokenStreamContents(filter, new string[] { "yourselves", "your" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("ساهدهات"); ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات")); ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "ساهدهات" }); }
public virtual void TestSetFilterIncrementToken() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true); set.add("lucenefox"); string[] output = new string[] { "the", "quick", "brown", "LuceneFox", "jumps" }; AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output); CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("LuceneFox"), false); AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output); CharArraySet set2 = set; AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "строй", "строеве" }); }
public virtual void TestExclusionTableViaCtor() { #pragma warning disable 612, 618 CharArraySet set = new CharArraySet(LuceneVersion.LUCENE_30, 1, true); #pragma warning restore 612, 618 set.add("lichamelijk"); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" }); a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testExclusionTableViaCtor() throws java.io.IOException public virtual void testExclusionTableViaCtor() { CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true); set.add("lichamelijk"); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" }); a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" }); }
public virtual void TestExclusionTableViaCtor() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("habitable"); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" }); fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" }); }
public object Create(Random random) { int num = random.nextInt(10); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean()); for (int i = 0; i < num; i++) { // TODO: make nastier set.add(TestUtil.RandomSimpleString(random)); } return(set); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { CharArraySet keywords = new CharArraySet(TEST_VERSION_CURRENT, 1, false); keywords.add("liście"); Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader); TokenStream result = new SetKeywordMarkerFilter(src, keywords); result = new MorfologikFilter(result); return(new TokenStreamComponents(src, result)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRehash() throws Exception public virtual void testRehash() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { cas.add(TEST_STOP_WORDS[i]); } assertEquals(TEST_STOP_WORDS.Length, cas.size()); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(cas.contains(TEST_STOP_WORDS[i])); } }
public virtual void TestSetFilterIncrementToken() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true); set.add("lucenefox"); string[] output = new string[] { "the", "quick", "brown", "LuceneFox", "jumps" }; AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output); CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("LuceneFox"), false); AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output); CharArraySet set2 = set; AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output); }
/// <summary> /// Test the static #copy() function with a CharArraySet as a source /// </summary> public virtual void testCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws Exception public virtual void test() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false); CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("mtqlpi", ""); builder.add("mwoknt", "jjp"); builder.add("tcgyreo", "zpfpajyws"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build(); NormalizeCharMap map = builder.build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj"); }
public virtual void testObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); int? val = Convert.ToInt32(1); set.add(val); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[] { '1' })); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[] { '1' })); }
/// <summary> /// Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting /// leading and trailing whitespace). Every line of the Reader should contain only /// one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// </summary> /// <param name="reader"> Reader containing the wordlist </param> /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetWordSet(TextReader reader, CharArraySet result) { BufferedReader br = null; try { br = getBufferedReader(reader); string word = null; while ((word = br.readLine()) != null) { result.add(word.Trim()); } } finally { IOUtils.close(br); } return result; }
/// <summary> /// Find the unique stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <returns> List of stems for the word </returns> public IList<CharsRef> uniqueStems(char[] word, int length) { IList<CharsRef> stems = stem(word, length); if (stems.Count < 2) { return stems; } CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase); IList<CharsRef> deduped = new List<CharsRef>(); foreach (CharsRef s in stems) { if (!terms.contains(s)) { deduped.Add(s); terms.add(s); } } return deduped; }
/// <summary> /// Test the static #copy() function with a CharArraySet as a source /// </summary> public virtual void testCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList<string> stopwords = TEST_STOP_WORDS; IList<string> stopwordsUpper = new List<string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList<string> newWords = new List<string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
/// <summary> /// Reads stopwords from a stopword list in Snowball format. /// <para> /// The snowball format is the following: /// <ul> /// <li>Lines may contain multiple words separated by whitespace. /// <li>The comment character is the vertical line (|). /// <li>Lines may contain trailing comments. /// </ul> /// </para> /// </summary> /// <param name="reader"> Reader containing a Snowball stopword list </param> /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result) { BufferedReader br = null; try { br = getBufferedReader(reader); string line = null; while ((line = br.readLine()) != null) { int comment = line.IndexOf('|'); if (comment >= 0) { line = line.Substring(0, comment); } string[] words = line.Split("\\s+", true); for (int i = 0; i < words.Length; i++) { if (words[i].Length > 0) { result.add(words[i]); } } } } finally { IOUtils.close(br); } return result; }
/// <summary> /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting /// leading and trailing whitespace). Every line of the Reader should contain only /// one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// </summary> /// <param name="reader"> Reader containing the wordlist </param> /// <param name="comment"> The string representing a comment. </param> /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result) { BufferedReader br = null; try { br = getBufferedReader(reader); string word = null; while ((word = br.ReadLine()) != null) { if (word.StartsWith(comment, StringComparison.Ordinal) == false) { result.add(word.Trim()); } } } finally { IOUtils.Close(br); } return result; }
public virtual void TestExclusionTableViaCtor() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("habitable"); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" }); fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" }); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("строеве"); MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false); BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "строй", "строеве" }); }
public virtual void TestExclusionTableViaCtor() { #pragma warning disable 612, 618 CharArraySet set = new CharArraySet(LuceneVersion.LUCENE_30, 1, true); #pragma warning restore 612, 618 set.add("lichamelijk"); DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" }); a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" }); }
public virtual void testModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.contains(NOT_IN_SET)); try { set.add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add((object) NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call // remove() on the iterator try { set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("ساهدهات"); #pragma warning disable 612, 618 ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات")); #pragma warning restore 612, 618 ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "ساهدهات" }); }
public virtual void testObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); int? val = Convert.ToInt32(1); set.add(val); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); }
public virtual void TestSingleHighSurrogate() { string missing = "Term {0} is missing in the set"; string falsePos = "Term {0} is in the set but shouldn't"; string[] upperArr = { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; string[] lowerArr = { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(string.Format(missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.Format(falsePos, upperArr[i]), set.contains(lowerArr[i])); } }
public virtual void testMethods() { CharArrayMap <int?> cm = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 2, false); Dictionary <string, int?> hm = new Dictionary <string, int?>(); hm["foo"] = 1; hm["bar"] = 2; cm.putAll(hm); assertEquals(hm.Count, cm.size()); hm["baz"] = 3; cm.putAll(hm); assertEquals(hm.Count, cm.size()); CharArraySet cs = cm.Keys; int n = 0; foreach (object o in cs) { assertTrue(cm.containsKey(o)); char[] co = (char[])o; assertTrue(cm.containsKey(co, 0, co.Length)); n++; } assertEquals(hm.Count, n); assertEquals(hm.Count, cs.size()); assertEquals(cm.size(), cs.size()); cs.clear(); assertEquals(0, cs.size()); assertEquals(0, cm.size()); try { cs.add("test"); fail("keySet() allows adding new keys"); } catch (System.NotSupportedException) { // pass } cm.putAll(hm); assertEquals(hm.Count, cs.size()); assertEquals(cm.size(), cs.size()); IEnumerator <KeyValuePair <object, int?> > iter1 = cm.entrySet().GetEnumerator(); n = 0; while (iter1.MoveNext()) { KeyValuePair <object, int?> entry = iter1.Current; object key = entry.Key; int? val = entry.Value; assertEquals(cm.get(key), val); entry.Value = val * 100; assertEquals(val * 100, (int)cm.get(key)); n++; } assertEquals(hm.Count, n); cm.clear(); cm.putAll(hm); assertEquals(cm.size(), n); CharArrayMap <int?> .EntryIterator iter2 = cm.entrySet().GetEnumerator(); n = 0; while (iter2.hasNext()) { char[] keyc = iter2.nextKey(); int? val = iter2.currentValue(); assertEquals(hm[new string(keyc)], val); iter2.Value = val * 100; assertEquals(val * 100, (int)cm.get(keyc)); n++; } assertEquals(hm.Count, n); cm.entrySet().clear(); assertEquals(0, cm.size()); assertEquals(0, cm.entrySet().size()); assertTrue(cm.Empty); }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("Brasília"); BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set)); AssertTokenStreamContents(filter, new string[] { "brasília", "brasil" }); }
public virtual void TestModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET)); try { set.Add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.Add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet, // but an extension method for the test fixture (which apparently has a bug), this test is non-critical //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call //// remove() on the iterator //try //{ // set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); // fail("Modified unmodifiable set"); //} //catch (System.NotSupportedException) //{ // // expected // assertEquals("Size of unmodifiable set has changed", size, set.size()); //} #region Added for better .NET support // This test was added for .NET to check the Remove method, since the extension method // above fails to execute. try { #pragma warning disable 612, 618 set.Remove(TEST_STOP_WORDS[0]); #pragma warning restore 612, 618 fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } #endregion try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(new[] { NOT_IN_SET}); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } // LUCENENET Specific - added to test .NETified UnionWith method try { set.UnionWith(new[] { NOT_IN_SET }); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public virtual void testSupplementaryCharsBWCompat() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on string[] upperArr = new string[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; string[] lowerArr = new string[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT,missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
public virtual void testUnmodifiableSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(TEST_STOP_WORDS); set.add(Convert.ToInt32(1)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (string stopword in TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Convert.ToInt32(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); try { CharArraySet.unmodifiableSet(null); fail("can not make null unmodifiable"); } catch (System.NullReferenceException) { // expected } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRehash() throws Exception public virtual void testRehash() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for (int i = 0;i < TEST_STOP_WORDS.Length;i++) { cas.add(TEST_STOP_WORDS[i]); } assertEquals(TEST_STOP_WORDS.Length, cas.size()); for (int i = 0;i < TEST_STOP_WORDS.Length;i++) { assertTrue(cas.contains(TEST_STOP_WORDS[i])); } }
public virtual void testModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.contains(NOT_IN_SET)); try { set.add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add((object)NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call // remove() on the iterator try { set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public virtual void testSingleHighSurrogateBWComapt() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; string[] upperArr = new string[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"}; string[] lowerArr = new string[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"}; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); if (i == lowerArr.Length - 1) { assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } else { assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set)); AssertTokenStreamContents(filter, new string[] { "hole", "desk" }); }