/// <summary> /// Test for NPE /// </summary> public virtual void testContainsWithNull() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); try { set.contains((char[])null, 0, 10); fail("null value must raise NPE"); } catch (System.NullReferenceException) { } try { set.contains((CharSequence)null); fail("null value must raise NPE"); } catch (System.NullReferenceException) { } try { set.contains((object)null); fail("null value must raise NPE"); } catch (System.NullReferenceException) { } }
/// <summary> /// Test for NPE /// </summary> public virtual void testContainsWithNull() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); try { set.contains((char[]) null, 0, 10); fail("null value must raise NPE"); } catch (System.NullReferenceException) { } try { set.contains((CharSequence) null); fail("null value must raise NPE"); } catch (System.NullReferenceException) { } try { set.contains((object) null); fail("null value must raise NPE"); } catch (System.NullReferenceException) { } }
public virtual void testSingleHighSurrogate() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; string[] upperArr = new string[] { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; string[] lowerArr = new string[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i])); } }
public virtual void testSupplementaryCharsBWCompat() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on string[] upperArr = new string[] { "Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB" }; string[] lowerArr = new string[] { "abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b" }; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
public virtual void testUnmodifiableSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); set.add(Convert.ToInt32(1)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (string stopword in TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Convert.ToInt32(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new char[] { '1' })); try { CharArraySet.unmodifiableSet(null); fail("can not make null unmodifiable"); } catch (System.NullReferenceException) { // expected } }
private void checkSet(CharArraySet wordset) { assertEquals(3, wordset.size()); assertTrue(wordset.contains("ONE")); // case is not modified assertTrue(wordset.contains("two")); // surrounding whitespace is removed assertTrue(wordset.contains("three")); assertFalse(wordset.contains("four")); }
private void CheckSet(CharArraySet wordset) { assertEquals(3, wordset.size()); assertTrue(wordset.contains("ONE")); // case is not modified assertTrue(wordset.contains("two")); // surrounding whitespace is removed assertTrue(wordset.contains("three")); assertFalse(wordset.contains("four")); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testComments() throws Exception public virtual void testComments() { string s = "ONE\n two \nthree\n#comment"; CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT); checkSet(wordSet1); assertFalse(wordSet1.contains("#comment")); assertFalse(wordSet1.contains("comment")); }
public virtual void TestInform() { IResourceLoader loader = new ClasspathResourceLoader(this.GetType()); assertTrue("loader is null and it shouldn't be", loader != null); StopFilterFactory factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.StopWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.StopWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.StopWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); // defaults factory = (StopFilterFactory)TokenFilterFactory("Stop"); assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.StopWords); assertEquals(false, factory.IgnoreCase); }
public virtual void TestInform() { //IResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter)); IResourceLoader loader = new ClasspathResourceLoader(typeof(TestAnalyzers), "Lucene.Net"); // LUCENENET: Need to set to a type that is in the same path as the files assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true); factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.CommonWords; assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
public virtual void testNonZeroOffset() { string[] words = new string[] { "Hello", "World", "this", "is", "a", "test" }; char[] findme = "xthisy".ToCharArray(); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(words); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new string(findme, 1, 4))); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new string(findme, 1, 4))); }
public virtual void TestStopListPositions() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); string s = "This is a good test of the english stop analyzer with positions"; int[] expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 }; TokenStream stream = newStop.TokenStream("test", s); try { assertNotNull(stream); int i = 0; ICharTermAttribute termAtt = stream.GetAttribute <ICharTermAttribute>(); IPositionIncrementAttribute posIncrAtt = stream.AddAttribute <IPositionIncrementAttribute>(); stream.Reset(); while (stream.IncrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); assertEquals(expectedIncr[i++], posIncrAtt.PositionIncrement); } stream.End(); } finally { IOUtils.CloseWhileHandlingException(stream); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException public override bool incrementToken() { while (input.incrementToken()) { char[] text = termAtt.buffer(); int termLength = termAtt.length(); // why not key off token type here assuming ChineseTokenizer comes first? if (!stopTable.contains(text, 0, termLength)) { switch (char.getType(text[0])) { case char.LOWERCASE_LETTER: case char.UPPERCASE_LETTER: // English word/token should larger than 1 character. if (termLength > 1) { return(true); } break; case char.OTHER_LETTER: // One Chinese character as one Chinese word. // Chinese word extraction to be added later here. return(true); } } } return(false); }
public virtual void testClear() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(TEST_STOP_WORDS); assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size()); set.clear(); assertEquals("not empty", 0, set.size()); for (int i = 0;i < TEST_STOP_WORDS.Length;i++) { assertFalse(set.contains(TEST_STOP_WORDS[i])); } set.addAll(TEST_STOP_WORDS); assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size()); for (int i = 0;i < TEST_STOP_WORDS.Length;i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public virtual void testClear() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size()); set.clear(); assertEquals("not empty", 0, set.size()); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertFalse(set.contains(TEST_STOP_WORDS[i])); } set.addAll(TEST_STOP_WORDS); assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size()); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void assertClasspathDelegation(ResourceLoader rl) throws Exception private void assertClasspathDelegation(ResourceLoader rl) { // try a stopwords file from classpath CharArraySet set = WordlistLoader.getSnowballWordSet(new System.IO.StreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), Encoding.UTF8), TEST_VERSION_CURRENT); assertTrue(set.contains("you")); // try to load a class; we use string comparison because classloader may be different... //JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method: assertEquals("org.apache.lucene.analysis.util.RollingCharBuffer", rl.newInstance("org.apache.lucene.analysis.util.RollingCharBuffer", typeof(object)).GetType().FullName); // theoretically classes should also be loadable: IOUtils.closeWhileHandlingException(rl.openResource("java/lang/String.class")); }
public virtual void TestDefaults() { CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery"); CharArraySet words = factory.CommonWords; assertTrue("words is null and it shouldn't be", words != null); assertTrue(words.contains("the")); Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false); TokenStream stream = factory.Create(tokenizer); AssertTokenStreamContents(stream, new string[] { "testing_the", "the_factory" }); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRehash() throws Exception public virtual void testRehash() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { cas.add(TEST_STOP_WORDS[i]); } assertEquals(TEST_STOP_WORDS.Length, cas.size()); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(cas.contains(TEST_STOP_WORDS[i])); } }
/// <summary> /// Test the static #copy() function with a CharArraySet as a source /// </summary> public virtual void testCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
public virtual void testObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); int? val = Convert.ToInt32(1); set.add(val); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[] { '1' })); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[] { '1' })); }
/// <summary> /// Test stopwords in snowball format /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSnowballListLoading() throws java.io.IOException public virtual void testSnowballListLoading() { string s = "|comment\n" + " |comment\n" + "\n" + " \t\n" + " |comment | comment\n" + "ONE\n" + " two \n" + " three four five \n" + "six seven | comment\n"; //multiple stopwords + comment - multiple stopwords - stopword with leading/trailing space - stopword, in uppercase - commented line with comment - line with only whitespace - blank line - commented line with leading whitespace - commented line CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT); assertEquals(7, wordset.size()); assertTrue(wordset.contains("ONE")); assertTrue(wordset.contains("two")); assertTrue(wordset.contains("three")); assertTrue(wordset.contains("four")); assertTrue(wordset.contains("five")); assertTrue(wordset.contains("six")); assertTrue(wordset.contains("seven")); }
/// <summary> /// Find the unique stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <returns> List of stems for the word </returns> public IList<CharsRef> uniqueStems(char[] word, int length) { IList<CharsRef> stems = stem(word, length); if (stems.Count < 2) { return stems; } CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase); IList<CharsRef> deduped = new List<CharsRef>(); foreach (CharsRef s in stems) { if (!terms.contains(s)) { deduped.Add(s); terms.add(s); } } return deduped; }
/// <summary> /// Test the static #copy() function with a JDK <seealso cref="Set"/> as a source /// </summary> public virtual void testCopyJDKSet() { ISet <string> set = new HashSet <string>(); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } set.addAll(TEST_STOP_WORDS); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set); assertEquals(set.Count, copy.size()); assertEquals(set.Count, copy.size()); assertTrue(copy.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copy.contains(@string)); } IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(set.Contains(@string)); } }
public virtual void TestStopList() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); TokenStream stream = newStop.TokenStream("test", "This is a good test of the english stop analyzer"); try { assertNotNull(stream); ICharTermAttribute termAtt = stream.GetAttribute<ICharTermAttribute>(); stream.Reset(); while (stream.IncrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); } stream.End(); } finally { IOUtils.CloseWhileHandlingException(stream); } }
public virtual void TestStopList() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); TokenStream stream = newStop.TokenStream("test", "This is a good test of the english stop analyzer"); try { assertNotNull(stream); ICharTermAttribute termAtt = stream.GetAttribute <ICharTermAttribute>(); stream.Reset(); while (stream.IncrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); } stream.End(); } finally { IOUtils.CloseWhileHandlingException(stream); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopList() throws java.io.IOException public virtual void testStopList() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer"); try { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
/// <summary> /// Test the static #copy() function with a CharArraySet as a source /// </summary> public virtual void testCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList<string> stopwords = TEST_STOP_WORDS; IList<string> stopwordsUpper = new List<string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper(Locale.ROOT)); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList<string> newWords = new List<string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
public virtual void testUnmodifiableSet() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); set.addAll(TEST_STOP_WORDS); set.add(Convert.ToInt32(1)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (string stopword in TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Convert.ToInt32(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); try { CharArraySet.unmodifiableSet(null); fail("can not make null unmodifiable"); } catch (System.NullReferenceException) { // expected } }
public virtual void testSupplementaryCharsBWCompat() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on string[] upperArr = new string[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; string[] lowerArr = new string[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT,missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
public virtual void testSingleHighSurrogateBWComapt() { string missing = "Term %s is missing in the set"; string falsePos = "Term %s is in the set but shouldn't"; string[] upperArr = new string[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"}; string[] lowerArr = new string[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"}; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); if (i == lowerArr.Length - 1) { assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } else { assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRehash() throws Exception public virtual void testRehash() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for (int i = 0;i < TEST_STOP_WORDS.Length;i++) { cas.add(TEST_STOP_WORDS[i]); } assertEquals(TEST_STOP_WORDS.Length, cas.size()); for (int i = 0;i < TEST_STOP_WORDS.Length;i++) { assertTrue(cas.contains(TEST_STOP_WORDS[i])); } }
public virtual void TestSingleHighSurrogate() { string missing = "Term {0} is missing in the set"; string falsePos = "Term {0} is in the set but shouldn't"; string[] upperArr = { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; string[] lowerArr = { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(string.Format(missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.Format(falsePos, upperArr[i]), set.contains(lowerArr[i])); } }
internal bool isStopWord(string text) { return(stopWords != null && stopWords.contains(text)); }
public virtual void testModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.contains(NOT_IN_SET)); try { set.add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add((object)NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call // remove() on the iterator try { set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public virtual void testModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(TEST_STOP_WORDS); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int size = set.size(); int size = set.size(); set = CharArraySet.unmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.contains(NOT_IN_SET)); try { set.add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add((object) NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call // remove() on the iterator try { set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public override bool matches(char[] s, int len) { return(base.matches(s, len) && !exceptions.contains(s, 0, len)); }
/// <summary> /// Returns the next input Token whose term() is not a stop word. /// </summary> protected internal override bool Accept() { return(!stopWords.contains(termAtt.buffer(), 0, termAtt.length())); }
public virtual void testNonZeroOffset() { string[] words = new string[] {"Hello","World","this","is","a","test"}; char[] findme = "xthisy".ToCharArray(); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.addAll(words); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new string(findme,1,4))); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(findme, 1, 4)); assertTrue(set.contains(new string(findme,1,4))); }
public virtual void testObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); int? val = Convert.ToInt32(1); set.add(val); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); // test unmodifiable set = CharArraySet.unmodifiableSet(set); assertTrue(set.contains(val)); assertTrue(set.contains(new int?(1))); // another integer assertTrue(set.contains("1")); assertTrue(set.contains(new char[]{'1'})); }
public virtual void TestStopListPositions() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); string s = "This is a good test of the english stop analyzer with positions"; int[] expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 }; TokenStream stream = newStop.TokenStream("test", s); try { assertNotNull(stream); int i = 0; ICharTermAttribute termAtt = stream.GetAttribute<ICharTermAttribute>(); IPositionIncrementAttribute posIncrAtt = stream.AddAttribute<IPositionIncrementAttribute>(); stream.Reset(); while (stream.IncrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); assertEquals(expectedIncr[i++], posIncrAtt.PositionIncrement); } stream.End(); } finally { IOUtils.CloseWhileHandlingException(stream); } }
public virtual void TestModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET)); try { set.Add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.Add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet, // but an extension method for the test fixture (which apparently has a bug), this test is non-critical //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call //// remove() on the iterator //try //{ // set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); // fail("Modified unmodifiable set"); //} //catch (System.NotSupportedException) //{ // // expected // assertEquals("Size of unmodifiable set has changed", size, set.size()); //} #region Added for better .NET support // This test was added for .NET to check the Remove method, since the extension method // above fails to execute. try { #pragma warning disable 612, 618 set.Remove(TEST_STOP_WORDS[0]); #pragma warning restore 612, 618 fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } #endregion try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(new[] { NOT_IN_SET}); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } // LUCENENET Specific - added to test .NETified UnionWith method try { set.UnionWith(new[] { NOT_IN_SET }); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }