Exemplo n.º 1
0
        public virtual void TestToString()
        {
            CharArraySet set = CharArraySet.Copy(TEST_VERSION_CURRENT, Collections.Singleton("test"));

            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));

            set = CharArraySet.Copy(Version.LUCENE_30, Collections.Singleton("test"));
            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));
        }
Exemplo n.º 2
0
        public virtual void TestSingleHighSurrogate()
        {
            string missing  = "Term {0} is missing in the set";
            string falsePos = "Term {0} is in the set but shouldn't";

            string[] upperArr = { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" };

            string[]     lowerArr = { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" };
            CharArraySet set      = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);

            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertTrue(string.Format(missing, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.Format(falsePos, upperArr[i]), set.contains(lowerArr[i]));
            }
        }
Exemplo n.º 3
0
        public virtual void TestSupplementaryCharsBWCompat()
        {
            string missing  = "Term {0} is missing in the set";
            string falsePos = "Term {0} is in the set but shouldn't";

            // for reference see
            // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
            string[]     upperArr = new string[] { "Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB" };
            string[]     lowerArr = new string[] { "abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b" };
            CharArraySet set      = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);

            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.Format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.Format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Reads stopwords from a stopword list in Snowball format.
        /// <para>
        /// The snowball format is the following:
        /// <ul>
        /// <li>Lines may contain multiple words separated by whitespace.
        /// <li>The comment character is the vertical line (&#124;).
        /// <li>Lines may contain trailing comments.
        /// </ul>
        /// </para>
        /// </summary>
        /// <param name="reader"> Reader containing a Snowball stopword list </param>
        /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
        /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException
        public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
        {
            BufferedReader br = null;

            try
            {
                br = getBufferedReader(reader);
                string line = null;
                while ((line = br.readLine()) != null)
                {
                    int comment = line.IndexOf('|');
                    if (comment >= 0)
                    {
                        line = line.Substring(0, comment);
                    }
                    string[] words = line.Split("\\s+", true);
                    for (int i = 0; i < words.Length; i++)
                    {
                        if (words[i].Length > 0)
                        {
                            result.add(words[i]);
                        }
                    }
                }
            }
            finally
            {
                IOUtils.close(br);
            }
            return(result);
        }
Exemplo n.º 5
0
        public virtual void TestToString()
        {
            CharArraySet set = CharArraySet.Copy(TEST_VERSION_CURRENT, new JCG.List <string> {
                "test"
            });

            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));

#pragma warning disable 612, 618
            set = CharArraySet.Copy(Version.LUCENE_30, new JCG.List <string> {
                "test"
            });
#pragma warning restore 612, 618
            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));
        }
Exemplo n.º 6
0
        public virtual void TestCopyCharArraySet()
        {
            CharArraySet setIngoreCase    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpper());
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.add(Convert.ToInt32(1));

            CharArraySet copy         = CharArraySet.Copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.Copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(setIngoreCase.contains(@string));
                assertFalse(setCaseSensitive.contains(@string));
            }
        }
Exemplo n.º 7
0
        /// <summary>
        /// Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
        /// leading and trailing whitespace). Every line of the Reader should contain only
        /// one word. The words need to be in lowercase if you make use of an
        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
        /// </summary>
        /// <param name="reader"> Reader containing the wordlist </param>
        /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
        /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
        public static CharArraySet GetWordSet(TextReader reader, CharArraySet result)
        {
            BufferedReader br = null;

            try
            {
                br = getBufferedReader(reader);
                string word = null;
                while ((word = br.readLine()) != null)
                {
                    result.add(word.Trim());
                }
            }
            finally
            {
                IOUtils.close(br);
            }
            return(result);
        }
Exemplo n.º 8
0
        /// <summary>
        /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
        /// leading and trailing whitespace). Every line of the Reader should contain only
        /// one word. The words need to be in lowercase if you make use of an
        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
        /// </summary>
        /// <param name="reader"> Reader containing the wordlist </param>
        /// <param name="comment"> The string representing a comment. </param>
        /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
        /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
        public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result)
        {
            BufferedReader br = null;

            try
            {
                br = getBufferedReader(reader);
                string word = null;
                while ((word = br.ReadLine()) != null)
                {
                    if (word.StartsWith(comment, StringComparison.Ordinal) == false)
                    {
                        result.add(word.Trim());
                    }
                }
            }
            finally
            {
                IOUtils.Close(br);
            }
            return(result);
        }
Exemplo n.º 9
0
        public virtual void TestModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.AddAll(TEST_STOP_WORDS);
            int size = set.size();

            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";

            assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET));

            try
            {
                set.add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet,
            // but an extension method for the test fixture (which apparently has a bug), this test is non-critical
            //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            //// remove() on the iterator
            //try
            //{
            //    set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
            //    fail("Modified unmodifiable set");
            //}
            //catch (System.NotSupportedException)
            //{
            //    // expected
            //    assertEquals("Size of unmodifiable set has changed", size, set.size());
            //}

            #region Added for better .NET support
            // This test was added for .NET to check the Remove method, since the extension method
            // above fails to execute.
            try
            {
                set.Remove(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            #endregion

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(new[] { NOT_IN_SET });
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }