public virtual void TestUnmodifiableSet() { var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); set.Add(Convert.ToInt32(1)); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (var stopword in TEST_STOP_WORDS) { assertTrue(set.contains(stopword)); } assertTrue(set.contains(Convert.ToInt32(1))); assertTrue(set.contains("1")); assertTrue(set.contains(new[] { '1' })); try { CharArraySet.UnmodifiableSet(null); fail("can not make null unmodifiable"); } catch (System.NullReferenceException) { // expected } }
public virtual void TestUnmodifiableSet() { var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.UnionWith(TEST_STOP_WORDS); set.Add(Convert.ToInt32(1)); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (var stopword in TEST_STOP_WORDS) { assertTrue(set.Contains(stopword)); } assertTrue(set.Contains(Convert.ToInt32(1))); assertTrue(set.Contains("1")); assertTrue(set.Contains(new[] { '1' })); try { CharArraySet.UnmodifiableSet(null); fail("can not make null unmodifiable"); } catch (ArgumentNullException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption { // expected } }
/// <summary> /// Reads stopwords from a stopword list in Snowball format. /// <para> /// The snowball format is the following: /// <list type="bullet"> /// <item>Lines may contain multiple words separated by whitespace.</item> /// <item>The comment character is the vertical line (|).</item> /// <item>Lines may contain trailing comments.</item> /// </list> /// </para> /// </summary> /// <param name="reader"> <see cref="TextReader"/> containing a Snowball stopword list </param> /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetSnowballWordSet(TextReader reader, CharArraySet result) { try { string line = null; while ((line = reader.ReadLine()) != null) { int comment = line.IndexOf('|'); if (comment >= 0) { line = line.Substring(0, comment); } string[] words = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(w => w.Trim()).ToArray(); foreach (var word in words) { if (word.Length > 0) { result.Add(word); } } } } finally { IOUtils.Close(reader); } return(result); }
/// <summary> /// Reads stopwords from a stopword list in Snowball format. /// <para> /// The snowball format is the following: /// <list type="bullet"> /// <item><description>Lines may contain multiple words separated by whitespace.</description></item> /// <item><description>The comment character is the vertical line (|).</description></item> /// <item><description>Lines may contain trailing comments.</description></item> /// </list> /// </para> /// </summary> /// <param name="reader"> <see cref="TextReader"/> containing a Snowball stopword list </param> /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetSnowballWordSet(TextReader reader, CharArraySet result) { try { string line = null; while ((line = reader.ReadLine()) != null) { int comment = line.IndexOf('|'); if (comment >= 0) { line = line.Substring(0, comment); } string[] words = WHITESPACE.Split(line).TrimEnd(); foreach (var word in words) { if (word.Length > 0) { result.Add(word); } } } } finally { IOUtils.Dispose(reader); } return(result); }
public virtual void TestRehash() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { cas.Add(TEST_STOP_WORDS[i]); } assertEquals(TEST_STOP_WORDS.Length, cas.size()); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(cas.Contains(TEST_STOP_WORDS[i])); } }
public virtual void TestCopyCharArraySet() { CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); IList <string> stopwords = TEST_STOP_WORDS; IList <string> stopwordsUpper = new List <string>(); foreach (string @string in stopwords) { stopwordsUpper.Add(@string.ToUpperInvariant()); } setIngoreCase.addAll(TEST_STOP_WORDS); setIngoreCase.Add(Convert.ToInt32(1)); setCaseSensitive.addAll(TEST_STOP_WORDS); setCaseSensitive.Add(Convert.ToInt32(1)); CharArraySet copy = CharArraySet.Copy(TEST_VERSION_CURRENT, setIngoreCase); CharArraySet copyCaseSens = CharArraySet.Copy(TEST_VERSION_CURRENT, setCaseSensitive); assertEquals(setIngoreCase.size(), copy.size()); assertEquals(setCaseSensitive.size(), copy.size()); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copyCaseSens.containsAll(stopwords)); foreach (string @string in stopwordsUpper) { assertFalse(copyCaseSens.contains(@string)); } // test adding terms to the copy IList <string> newWords = new List <string>(); foreach (string @string in stopwords) { newWords.Add(@string + "_1"); } copy.addAll(newWords); assertTrue(copy.containsAll(stopwords)); assertTrue(copy.containsAll(stopwordsUpper)); assertTrue(copy.containsAll(newWords)); // new added terms are not in the source set foreach (string @string in newWords) { assertFalse(setIngoreCase.contains(@string)); assertFalse(setCaseSensitive.contains(@string)); } }
// LUCENENET TODO: Add .NET overloads that accept a file name? Or at least a FileInfo object as was done in 3.0.3? /// <summary> /// Reads lines from a <see cref="TextReader"/> and adds every line as an entry to a <see cref="CharArraySet"/> (omitting /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only /// one word. The words need to be in lowercase if you make use of an /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>). /// </summary> /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param> /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetWordSet(TextReader reader, CharArraySet result) { try { string word = null; while ((word = reader.ReadLine()) != null) { result.Add(word.Trim()); } } finally { IOUtils.Close(reader); } return(result); }
public virtual void TestObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); int? val = Convert.ToInt32(1); set.Add(val); assertTrue(set.Contains(val)); assertTrue(set.Contains(new int?(1))); // another integer assertTrue(set.Contains("1")); assertTrue(set.Contains(new char[] { '1' })); // test unmodifiable set = CharArraySet.UnmodifiableSet(set); assertTrue(set.Contains(val)); assertTrue(set.Contains(new int?(1))); // another integer assertTrue(set.Contains("1")); assertTrue(set.Contains(new char[] { '1' })); }
public virtual void TestObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); J2N.Numerics.Int32 val = J2N.Numerics.Int32.GetInstance(1); set.Add(val); assertTrue(set.Contains(val)); assertTrue(set.Contains(J2N.Numerics.Int32.GetInstance(1))); // another integer assertTrue(set.Contains("1")); assertTrue(set.Contains(new char[] { '1' })); // test unmodifiable set = CharArraySet.UnmodifiableSet(set); assertTrue(set.Contains(val)); assertTrue(set.Contains(J2N.Numerics.Int32.GetInstance(1))); // another integer assertTrue(set.Contains("1")); assertTrue(set.Contains(new char[] { '1' })); }
/// <summary> /// Reads lines from a <see cref="TextReader"/> and adds every non-comment line as an entry to a <see cref="CharArraySet"/> (omitting /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only /// one word. The words need to be in lowercase if you make use of an /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>). /// </summary> /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param> /// <param name="comment"> The string representing a comment. </param> /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param> /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns> public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result) { try { string word = null; while ((word = reader.ReadLine()) != null) { if (word.StartsWith(comment, StringComparison.Ordinal) == false) { result.Add(word.Trim()); } } } finally { IOUtils.Close(reader); } return(result); }
public virtual void TestMethods() { CharArrayMap <int?> cm = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 2, false); Dictionary <string, int?> hm = new Dictionary <string, int?>(); hm["foo"] = 1; hm["bar"] = 2; cm.PutAll(hm); assertEquals(hm.Count, cm.Count); hm["baz"] = 3; cm.PutAll(hm); assertEquals(hm.Count, cm.Count); // LUCENENET: Need to cast here - no implicit conversion. CharArraySet cs = cm.Keys as CharArraySet; int n = 0; foreach (string o in cs) { assertTrue(cm.ContainsKey(o)); char[] co = o.ToCharArray(); assertTrue(cm.ContainsKey(co, 0, co.Length)); n++; } assertEquals(hm.Count, n); assertEquals(hm.Count, cs.Count); assertEquals(cm.Count, cs.Count); cs.Clear(); assertEquals(0, cs.Count); assertEquals(0, cm.Count); try { cs.Add("test"); fail("keySet() allows adding new keys"); } catch (System.NotSupportedException) { // pass } cm.PutAll(hm); assertEquals(hm.Count, cs.Count); assertEquals(cm.Count, cs.Count); // LUCENENET: Need to cast here - no implicit conversion IEnumerator <KeyValuePair <string, int?> > iter1 = (IEnumerator <KeyValuePair <string, int?> >)cm.EntrySet().GetEnumerator(); n = 0; while (iter1.MoveNext()) { KeyValuePair <string, int?> entry = iter1.Current; object key = entry.Key; int? val = entry.Value; assertEquals(cm.Get(key), val); // LUCENENET: Need a cast to get to this method because it is not part of the IEnumerator<T> interface ((CharArrayMap <int?> .EntryIterator)iter1).SetValue(val * 100); assertEquals(val * 100, (int)cm.Get(key)); n++; } assertEquals(hm.Count, n); cm.Clear(); cm.PutAll(hm); assertEquals(cm.size(), n); CharArrayMap <int?> .EntryIterator iter2 = cm.EntrySet().GetEnumerator() as CharArrayMap <int?> .EntryIterator; n = 0; while (iter2.MoveNext()) { var keyc = iter2.Current.Key; int?val = iter2.Current.Value; assertEquals(hm[keyc], val); iter2.SetValue(val * 100); assertEquals(val * 100, (int)cm.Get(keyc)); n++; } assertEquals(hm.Count, n); cm.EntrySet().Clear(); assertEquals(0, cm.size()); assertEquals(0, cm.EntrySet().size()); assertTrue(cm.Count == 0); }
public virtual void TestModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.UnionWith(TEST_STOP_WORDS); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET)); try { set.Add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.Add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet, // but an extension method for the test fixture (which apparently has a bug), this test is non-critical //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call //// remove() on the iterator //try //{ // set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); // fail("Modified unmodifiable set"); //} //catch (Exception e) when (e.IsUnsupportedOperationException()) //{ // // expected // assertEquals("Size of unmodifiable set has changed", size, set.size()); //} #region LUCENENET Added for better .NET support // This test was added for .NET to check the Remove method, since the extension method // above fails to execute. try { #pragma warning disable 612, 618 set.Remove(TEST_STOP_WORDS[0]); #pragma warning restore 612, 618 fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } // LUCENENET Specific - added to test .NETified UnionWith method try { set.UnionWith(new[] { NOT_IN_SET }); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } #endregion LUCENENET Added for better .NET support try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true)); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(new[] { NOT_IN_SET }); fail("Modified unmodifiable set"); } catch (Exception e) when(e.IsUnsupportedOperationException()) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }
public virtual void TestMethods() { CharArrayMap <int?> cm = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 2, false); //Dictionary<string, int?> hm = new Dictionary<string, int?>(); Dictionary <object, int?> hm = new Dictionary <object, int?>(); // TODO: In .NET, we cannot implicitly convert from string to object using generics hm["foo"] = 1; hm["bar"] = 2; cm.PutAll(hm); assertEquals(hm.Count, cm.Count); hm["baz"] = 3; cm.PutAll(hm); assertEquals(hm.Count, cm.Count); // TODO: In .NET we cannot make this conversion implicitly. CharArraySet cs = cm.Keys as CharArraySet; int n = 0; foreach (object o in cs) { assertTrue(cm.ContainsKey(o)); char[] co = (char[])o; assertTrue(cm.ContainsKey(co, 0, co.Length)); n++; } assertEquals(hm.Count, n); assertEquals(hm.Count, cs.Count); assertEquals(cm.Count, cs.Count); // TODO: This directly contradicts the TestModifyOnUnmodifiable test, // where clear is not allowed from the Keys property. //cs.Clear(); //assertEquals(0, cs.Count); //assertEquals(0, cm.Count); try { cs.Add("test"); fail("keySet() allows adding new keys"); } catch (System.NotSupportedException) { // pass } cm.PutAll(hm); assertEquals(hm.Count, cs.Count); assertEquals(cm.Count, cs.Count); IEnumerator <KeyValuePair <object, int?> > iter1 = IDictionaryExtensions.EntrySet(cm).GetEnumerator(); n = 0; while (iter1.MoveNext()) { KeyValuePair <object, int?> entry = iter1.Current; object key = entry.Key; int? val = entry.Value; assertEquals(cm.Get(key), val); // TODO: In .NET the Value property of KeyValuePair is read-only. Do we need a solution? //entry.Value = val * 100; //assertEquals(val * 100, (int)cm.Get(key)); n++; } assertEquals(hm.Count, n); cm.Clear(); cm.PutAll(hm); assertEquals(cm.size(), n); CharArrayMap <int?> .EntryIterator iter2 = cm.EntrySet().GetEnumerator() as CharArrayMap <int?> .EntryIterator; n = 0; while (iter2.MoveNext()) { char[] keyc = (char[])iter2.Current.Key; int? val = iter2.Current.Value; assertEquals(hm[new string(keyc)], val); // TODO: In .NET the Value property of KeyValuePair is read-only. Do we need a solution? //iter2.Value = val * 100; //assertEquals(val * 100, (int)cm.Get(keyc)); n++; } assertEquals(hm.Count, n); // TODO: In .NET, the EntrySet extension method makes a copy of the data // so clearing it won't work like this. cm.EntrySet().clear(); assertEquals(0, cm.size()); assertEquals(0, cm.EntrySet().size()); assertTrue(cm.Count == 0); }