예제 #1
0
        public virtual void TestUnmodifiableSet()
        {
            var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.AddAll(TEST_STOP_WORDS);
            set.Add(Convert.ToInt32(1));
            int size = set.size();

            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (var stopword in TEST_STOP_WORDS)
            {
                assertTrue(set.contains(stopword));
            }
            assertTrue(set.contains(Convert.ToInt32(1)));
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new[] { '1' }));

            try
            {
                CharArraySet.UnmodifiableSet(null);
                fail("can not make null unmodifiable");
            }
            catch (System.NullReferenceException)
            {
                // expected
            }
        }
예제 #2
0
        public virtual void TestUnmodifiableSet()
        {
            var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.UnionWith(TEST_STOP_WORDS);
            set.Add(Convert.ToInt32(1));
            int size = set.size();

            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (var stopword in TEST_STOP_WORDS)
            {
                assertTrue(set.Contains(stopword));
            }
            assertTrue(set.Contains(Convert.ToInt32(1)));
            assertTrue(set.Contains("1"));
            assertTrue(set.Contains(new[] { '1' }));

            try
            {
                CharArraySet.UnmodifiableSet(null);
                fail("can not make null unmodifiable");
            }
            catch (ArgumentNullException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption
            {
                // expected
            }
        }
예제 #3
0
 /// <summary>
 /// Reads stopwords from a stopword list in Snowball format.
 /// <para>
 /// The snowball format is the following:
 /// <list type="bullet">
 ///     <item>Lines may contain multiple words separated by whitespace.</item>
 ///     <item>The comment character is the vertical line (&#124;).</item>
 ///     <item>Lines may contain trailing comments.</item>
 /// </list>
 /// </para>
 /// </summary>
 /// <param name="reader"> <see cref="TextReader"/> containing a Snowball stopword list </param>
 /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
 /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
 public static CharArraySet GetSnowballWordSet(TextReader reader, CharArraySet result)
 {
     try
     {
         string line = null;
         while ((line = reader.ReadLine()) != null)
         {
             int comment = line.IndexOf('|');
             if (comment >= 0)
             {
                 line = line.Substring(0, comment);
             }
             string[] words = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(w => w.Trim()).ToArray();
             foreach (var word in words)
             {
                 if (word.Length > 0)
                 {
                     result.Add(word);
                 }
             }
         }
     }
     finally
     {
         IOUtils.Close(reader);
     }
     return(result);
 }
예제 #4
0
 /// <summary>
 /// Reads stopwords from a stopword list in Snowball format.
 /// <para>
 /// The snowball format is the following:
 /// <list type="bullet">
 ///     <item><description>Lines may contain multiple words separated by whitespace.</description></item>
 ///     <item><description>The comment character is the vertical line (&#124;).</description></item>
 ///     <item><description>Lines may contain trailing comments.</description></item>
 /// </list>
 /// </para>
 /// </summary>
 /// <param name="reader"> <see cref="TextReader"/> containing a Snowball stopword list </param>
 /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
 /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
 public static CharArraySet GetSnowballWordSet(TextReader reader, CharArraySet result)
 {
     try
     {
         string line = null;
         while ((line = reader.ReadLine()) != null)
         {
             int comment = line.IndexOf('|');
             if (comment >= 0)
             {
                 line = line.Substring(0, comment);
             }
             string[] words = WHITESPACE.Split(line).TrimEnd();
             foreach (var word in words)
             {
                 if (word.Length > 0)
                 {
                     result.Add(word);
                 }
             }
         }
     }
     finally
     {
         IOUtils.Dispose(reader);
     }
     return(result);
 }
예제 #5
0
        public virtual void TestRehash()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                cas.Add(TEST_STOP_WORDS[i]);
            }
            assertEquals(TEST_STOP_WORDS.Length, cas.size());
            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(cas.Contains(TEST_STOP_WORDS[i]));
            }
        }
예제 #6
0
        public virtual void TestCopyCharArraySet()
        {
            CharArraySet setIngoreCase    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpperInvariant());
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.Add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.Add(Convert.ToInt32(1));

            CharArraySet copy         = CharArraySet.Copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.Copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(setIngoreCase.contains(@string));
                assertFalse(setCaseSensitive.contains(@string));
            }
        }
예제 #7
0
        // LUCENENET TODO: Add .NET overloads that accept a file name? Or at least a FileInfo object as was done in 3.0.3?

        /// <summary>
        /// Reads lines from a <see cref="TextReader"/> and adds every line as an entry to a <see cref="CharArraySet"/> (omitting
        /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only
        /// one word. The words need to be in lowercase if you make use of an
        /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>).
        /// </summary>
        /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param>
        /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
        /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
        public static CharArraySet GetWordSet(TextReader reader, CharArraySet result)
        {
            try
            {
                string word = null;
                while ((word = reader.ReadLine()) != null)
                {
                    result.Add(word.Trim());
                }
            }
            finally
            {
                IOUtils.Close(reader);
            }
            return(result);
        }
예제 #8
0
        public virtual void TestObjectContains()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            int?         val = Convert.ToInt32(1);

            set.Add(val);
            assertTrue(set.Contains(val));
            assertTrue(set.Contains(new int?(1))); // another integer
            assertTrue(set.Contains("1"));
            assertTrue(set.Contains(new char[] { '1' }));
            // test unmodifiable
            set = CharArraySet.UnmodifiableSet(set);
            assertTrue(set.Contains(val));
            assertTrue(set.Contains(new int?(1))); // another integer
            assertTrue(set.Contains("1"));
            assertTrue(set.Contains(new char[] { '1' }));
        }
예제 #9
0
        public virtual void TestObjectContains()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            J2N.Numerics.Int32 val = J2N.Numerics.Int32.GetInstance(1);
            set.Add(val);
            assertTrue(set.Contains(val));
            assertTrue(set.Contains(J2N.Numerics.Int32.GetInstance(1))); // another integer
            assertTrue(set.Contains("1"));
            assertTrue(set.Contains(new char[] { '1' }));
            // test unmodifiable
            set = CharArraySet.UnmodifiableSet(set);
            assertTrue(set.Contains(val));
            assertTrue(set.Contains(J2N.Numerics.Int32.GetInstance(1))); // another integer
            assertTrue(set.Contains("1"));
            assertTrue(set.Contains(new char[] { '1' }));
        }
예제 #10
0
 /// <summary>
 /// Reads lines from a <see cref="TextReader"/> and adds every non-comment line as an entry to a <see cref="CharArraySet"/> (omitting
 /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only
 /// one word. The words need to be in lowercase if you make use of an
 /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>).
 /// </summary>
 /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param>
 /// <param name="comment"> The string representing a comment. </param>
 /// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
 /// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
 public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result)
 {
     try
     {
         string word = null;
         while ((word = reader.ReadLine()) != null)
         {
             if (word.StartsWith(comment, StringComparison.Ordinal) == false)
             {
                 result.Add(word.Trim());
             }
         }
     }
     finally
     {
         IOUtils.Close(reader);
     }
     return(result);
 }
예제 #11
0
        public virtual void TestMethods()
        {
            CharArrayMap <int?>       cm = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 2, false);
            Dictionary <string, int?> hm = new Dictionary <string, int?>();

            hm["foo"] = 1;
            hm["bar"] = 2;
            cm.PutAll(hm);
            assertEquals(hm.Count, cm.Count);
            hm["baz"] = 3;
            cm.PutAll(hm);
            assertEquals(hm.Count, cm.Count);

            // LUCENENET: Need to cast here - no implicit conversion.
            CharArraySet cs = cm.Keys as CharArraySet;
            int          n  = 0;

            foreach (string o in cs)
            {
                assertTrue(cm.ContainsKey(o));
                char[] co = o.ToCharArray();
                assertTrue(cm.ContainsKey(co, 0, co.Length));
                n++;
            }
            assertEquals(hm.Count, n);
            assertEquals(hm.Count, cs.Count);
            assertEquals(cm.Count, cs.Count);
            cs.Clear();
            assertEquals(0, cs.Count);
            assertEquals(0, cm.Count);
            try
            {
                cs.Add("test");
                fail("keySet() allows adding new keys");
            }
            catch (System.NotSupportedException)
            {
                // pass
            }
            cm.PutAll(hm);
            assertEquals(hm.Count, cs.Count);
            assertEquals(cm.Count, cs.Count);
            // LUCENENET: Need to cast here - no implicit conversion
            IEnumerator <KeyValuePair <string, int?> > iter1 = (IEnumerator <KeyValuePair <string, int?> >)cm.EntrySet().GetEnumerator();

            n = 0;
            while (iter1.MoveNext())
            {
                KeyValuePair <string, int?> entry = iter1.Current;
                object key = entry.Key;
                int?   val = entry.Value;
                assertEquals(cm.Get(key), val);
                // LUCENENET: Need a cast to get to this method because it is not part of the IEnumerator<T> interface
                ((CharArrayMap <int?> .EntryIterator)iter1).SetValue(val * 100);
                assertEquals(val * 100, (int)cm.Get(key));
                n++;
            }
            assertEquals(hm.Count, n);
            cm.Clear();
            cm.PutAll(hm);
            assertEquals(cm.size(), n);

            CharArrayMap <int?> .EntryIterator iter2 = cm.EntrySet().GetEnumerator() as CharArrayMap <int?> .EntryIterator;
            n = 0;
            while (iter2.MoveNext())
            {
                var keyc = iter2.Current.Key;
                int?val  = iter2.Current.Value;
                assertEquals(hm[keyc], val);
                iter2.SetValue(val * 100);
                assertEquals(val * 100, (int)cm.Get(keyc));
                n++;
            }
            assertEquals(hm.Count, n);

            cm.EntrySet().Clear();
            assertEquals(0, cm.size());
            assertEquals(0, cm.EntrySet().size());
            assertTrue(cm.Count == 0);
        }
예제 #12
0
        public virtual void TestModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.UnionWith(TEST_STOP_WORDS);
            int size = set.size();

            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";

            assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET));

            try
            {
                set.Add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.Add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet,
            // but an extension method for the test fixture (which apparently has a bug), this test is non-critical
            //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            //// remove() on the iterator
            //try
            //{
            //    set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
            //    fail("Modified unmodifiable set");
            //}
            //catch (Exception e) when (e.IsUnsupportedOperationException())
            //{
            //    // expected
            //    assertEquals("Size of unmodifiable set has changed", size, set.size());
            //}

            #region LUCENENET Added for better .NET support
            // This test was added for .NET to check the Remove method, since the extension method
            // above fails to execute.
            try
            {
#pragma warning disable 612, 618
                set.Remove(TEST_STOP_WORDS[0]);
#pragma warning restore 612, 618
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // LUCENENET Specific - added to test .NETified UnionWith method
            try
            {
                set.UnionWith(new[] { NOT_IN_SET });
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            #endregion LUCENENET Added for better .NET support

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true));
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(new[] { NOT_IN_SET });
                fail("Modified unmodifiable set");
            }
            catch (Exception e) when(e.IsUnsupportedOperationException())
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
예제 #13
0
        public virtual void TestMethods()
        {
            CharArrayMap <int?> cm = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 2, false);
            //Dictionary<string, int?> hm = new Dictionary<string, int?>();
            Dictionary <object, int?> hm = new Dictionary <object, int?>(); // TODO: In .NET, we cannot implicitly convert from string to object using generics

            hm["foo"] = 1;
            hm["bar"] = 2;
            cm.PutAll(hm);
            assertEquals(hm.Count, cm.Count);
            hm["baz"] = 3;
            cm.PutAll(hm);
            assertEquals(hm.Count, cm.Count);

            // TODO: In .NET we cannot make this conversion implicitly.
            CharArraySet cs = cm.Keys as CharArraySet;
            int          n  = 0;

            foreach (object o in cs)
            {
                assertTrue(cm.ContainsKey(o));
                char[] co = (char[])o;
                assertTrue(cm.ContainsKey(co, 0, co.Length));
                n++;
            }
            assertEquals(hm.Count, n);
            assertEquals(hm.Count, cs.Count);
            assertEquals(cm.Count, cs.Count);

            // TODO: This directly contradicts the TestModifyOnUnmodifiable test,
            // where clear is not allowed from the Keys property.
            //cs.Clear();
            //assertEquals(0, cs.Count);
            //assertEquals(0, cm.Count);
            try
            {
                cs.Add("test");
                fail("keySet() allows adding new keys");
            }
            catch (System.NotSupportedException)
            {
                // pass
            }
            cm.PutAll(hm);
            assertEquals(hm.Count, cs.Count);
            assertEquals(cm.Count, cs.Count);

            IEnumerator <KeyValuePair <object, int?> > iter1 = IDictionaryExtensions.EntrySet(cm).GetEnumerator();

            n = 0;
            while (iter1.MoveNext())
            {
                KeyValuePair <object, int?> entry = iter1.Current;
                object key = entry.Key;
                int?   val = entry.Value;
                assertEquals(cm.Get(key), val);

                // TODO: In .NET the Value property of KeyValuePair is read-only. Do we need a solution?
                //entry.Value = val * 100;
                //assertEquals(val * 100, (int)cm.Get(key));
                n++;
            }
            assertEquals(hm.Count, n);
            cm.Clear();
            cm.PutAll(hm);
            assertEquals(cm.size(), n);

            CharArrayMap <int?> .EntryIterator iter2 = cm.EntrySet().GetEnumerator() as CharArrayMap <int?> .EntryIterator;
            n = 0;
            while (iter2.MoveNext())
            {
                char[] keyc = (char[])iter2.Current.Key;
                int?   val  = iter2.Current.Value;
                assertEquals(hm[new string(keyc)], val);

                // TODO: In .NET the Value property of KeyValuePair is read-only. Do we need a solution?
                //iter2.Value = val * 100;
                //assertEquals(val * 100, (int)cm.Get(keyc));
                n++;
            }
            assertEquals(hm.Count, n);

            // TODO: In .NET, the EntrySet extension method makes a copy of the data
            // so clearing it won't work like this.
            cm.EntrySet().clear();
            assertEquals(0, cm.size());
            assertEquals(0, cm.EntrySet().size());
            assertTrue(cm.Count == 0);
        }