public virtual void TestToString() { CharArraySet set = CharArraySet.Copy(TEST_VERSION_CURRENT, Collections.Singleton("test")); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); set = CharArraySet.Copy(Version.LUCENE_30, Collections.Singleton("test")); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); }
public virtual void TestSingleHighSurrogate() { string missing = "Term {0} is missing in the set"; string falsePos = "Term {0} is in the set but shouldn't"; string[] upperArr = { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" }; string[] lowerArr = { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" }; CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(string.Format(missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.Format(falsePos, upperArr[i]), set.contains(lowerArr[i])); } }
public virtual void TestSupplementaryCharsBWCompat() { string missing = "Term {0} is missing in the set"; string falsePos = "Term {0} is in the set but shouldn't"; // for reference see // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on string[] upperArr = new string[] { "Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB" }; string[] lowerArr = new string[] { "abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b" }; CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.Format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false); foreach (string upper in upperArr) { set.add(upper); } for (int i = 0; i < upperArr.Length; i++) { assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i])); assertFalse(string.Format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); } }
/// <summary> /// Reads stopwords from a stopword list in Snowball format. /// <para> /// The snowball format is the following: /// <ul> /// <li>Lines may contain multiple words separated by whitespace. /// <li>The comment character is the vertical line (|). /// <li>Lines may contain trailing comments. /// </ul> /// </para> /// </summary> /// <param name="reader"> Reader containing a Snowball stopword list </param> /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result) { BufferedReader br = null; try { br = getBufferedReader(reader); string line = null; while ((line = br.readLine()) != null) { int comment = line.IndexOf('|'); if (comment >= 0) { line = line.Substring(0, comment); } string[] words = line.Split("\\s+", true); for (int i = 0; i < words.Length; i++) { if (words[i].Length > 0) { result.add(words[i]); } } } } finally { IOUtils.close(br); } return(result); }
public virtual void TestToString() { CharArraySet set = CharArraySet.Copy(TEST_VERSION_CURRENT, new JCG.List <string> { "test" }); assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); #pragma warning disable 612, 618 set = CharArraySet.Copy(Version.LUCENE_30, new JCG.List <string> { "test" }); #pragma warning restore 612, 618 assertEquals("[test]", set.ToString()); set.add("test2"); assertTrue(set.ToString().Contains(", ")); }
public virtual void TestCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpper()); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.Copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.Copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
/// <summary> /// Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting /// leading and trailing whitespace). Every line of the Reader should contain only /// one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// </summary> /// <param name="reader"> Reader containing the wordlist </param> /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetWordSet(TextReader reader, CharArraySet result) { BufferedReader br = null; try { br = getBufferedReader(reader); string word = null; while ((word = br.readLine()) != null) { result.add(word.Trim()); } } finally { IOUtils.close(br); } return(result); }
/// <summary> /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting /// leading and trailing whitespace). Every line of the Reader should contain only /// one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// </summary> /// <param name="reader"> Reader containing the wordlist </param> /// <param name="comment"> The string representing a comment. </param> /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result) { BufferedReader br = null; try { br = getBufferedReader(reader); string word = null; while ((word = br.ReadLine()) != null) { if (word.StartsWith(comment, StringComparison.Ordinal) == false) { result.add(word.Trim()); } } } finally { IOUtils.Close(br); } return(result); }
public virtual void TestModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET)); try { set.add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet, // but an extension method for the test fixture (which apparently has a bug), this test is non-critical //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call //// remove() on the iterator //try //{ // set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); // fail("Modified unmodifiable set"); //} //catch (System.NotSupportedException) //{ // // expected // assertEquals("Size of unmodifiable set has changed", size, set.size()); //} #region Added for better .NET support // This test was added for .NET to check the Remove method, since the extension method // above fails to execute. try { set.Remove(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } #endregion try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(new[] { NOT_IN_SET }); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }