예제 #1
0
        public virtual void TestNonZeroOffset()
        {
            string[] words = new string[] { "Hello", "World", "this", "is", "a", "test" };
            char[] findme = "xthisy".ToCharArray();
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.AddAll(words);
            assertTrue(set.Contains(findme, 1, 4));
            assertTrue(set.Contains(new string(findme, 1, 4)));

            // test unmodifiable
            set = CharArraySet.UnmodifiableSet(set);
            assertTrue(set.Contains(findme, 1, 4));
            assertTrue(set.Contains(new string(findme, 1, 4)));
        }
예제 #2
0
 public virtual void TestRehash()
 {
     CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
     for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
     {
         cas.Add(TEST_STOP_WORDS[i]);
     }
     assertEquals(TEST_STOP_WORDS.Length, cas.size());
     for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
     {
         assertTrue(cas.Contains(TEST_STOP_WORDS[i]));
     }
 }
예제 #3
0
 public virtual void TestObjectContains()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
     int? val = Convert.ToInt32(1);
     set.Add(val);
     assertTrue(set.Contains(val));
     assertTrue(set.Contains(new int?(1))); // another integer
     assertTrue(set.Contains("1"));
     assertTrue(set.Contains(new char[] { '1' }));
     // test unmodifiable
     set = CharArraySet.UnmodifiableSet(set);
     assertTrue(set.Contains(val));
     assertTrue(set.Contains(new int?(1))); // another integer
     assertTrue(set.Contains("1"));
     assertTrue(set.Contains(new char[] { '1' }));
 }
        /// <summary>
        ///   Find the unique stem(s) of the provided word.
        /// </summary>
        /// <param name="word">Word to find the stems for.</param>
        /// <returns>List of stems for the word.</returns>
        public IEnumerable<HunspellStem> UniqueStems(String word) {
            if (word == null) throw new ArgumentNullException("word");

            var stems = new List<HunspellStem>();
            var terms = new CharArraySet(8, false);
            if (_dictionary.LookupWord(word) != null) {
                stems.Add(new HunspellStem(word));
                terms.Add(word);
            }

            var otherStems = Stem(word, null, 0);
            foreach (var s in otherStems) {
                if (!terms.Contains(s.Stem)) {
                    stems.Add(s);
                    terms.Add(s.Stem);
                }
            }

            return stems;
        }
예제 #5
0
 private bool isStopWord(string text)
 {
     return(stopWords != null && stopWords.Contains(text));
 }
예제 #6
0
 protected override bool IsKeyword()
 {
     return(keywordSet.Contains(termAtt.Buffer, 0, termAtt.Length));
 }
예제 #7
0
 /// <summary>
 /// Returns the next input Token whose term() is not a stop word.
 /// </summary>
 protected internal override bool Accept()
 {
     return(!stopWords.Contains(termAtt.Buffer(), 0, termAtt.Length));
 }
예제 #8
0
        public override bool IncrementToken()
        {
            if (endState != null)
            {
                return(false);
            }

            if (!m_input.IncrementToken())
            {
                return(false);
            }

            int skippedPositions = 0;

            while (true)
            {
                if (stopWords.Contains(termAtt.Buffer, 0, termAtt.Length))
                {
                    int posInc    = posIncAtt.PositionIncrement;
                    int endOffset = offsetAtt.EndOffset;
                    // This token may be a stopword, if it's not end:
                    State sav = CaptureState();
                    if (m_input.IncrementToken())
                    {
                        // It was a stopword; skip it
                        skippedPositions += posInc;
                    }
                    else
                    {
                        ClearAttributes();
                        m_input.End();
                        endState = CaptureState();
                        int finalEndOffset = offsetAtt.EndOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(finalEndOffset >= endOffset);
                        }
                        if (finalEndOffset > endOffset)
                        {
                            // OK there was a token separator after the
                            // stopword, so it was a stopword
                            return(false);
                        }
                        else
                        {
                            // No token separator after final token that
                            // looked like a stop-word; don't filter it:
                            RestoreState(sav);
                            posIncAtt.PositionIncrement = skippedPositions + posIncAtt.PositionIncrement;
                            keywordAtt.IsKeyword        = true;
                            return(true);
                        }
                    }
                }
                else
                {
                    // Not a stopword; return the current token:
                    posIncAtt.PositionIncrement = skippedPositions + posIncAtt.PositionIncrement;
                    return(true);
                }
            }
        }
예제 #9
0
        /// <summary>
        /// Find the unique stem(s) of the provided word
        /// </summary>
        /// <param name="word"> Word to find the stems for </param>
        /// <returns> List of stems for the word </returns>
        public IList<CharsRef> UniqueStems(char[] word, int length)
        {
            IList<CharsRef> stems = Stem(word, length);
            if (stems.Count < 2)
            {
                return stems;
            }
            CharArraySet terms = new CharArraySet(
#pragma warning disable 612, 618
                LuceneVersion.LUCENE_CURRENT, 8, dictionary.ignoreCase);
#pragma warning restore 612, 618
            IList<CharsRef> deduped = new List<CharsRef>();
            foreach (CharsRef s in stems)
            {
                if (!terms.Contains(s))
                {
                    deduped.Add(s);
                    terms.Add(s);
                }
            }
            return deduped;
        }
예제 #10
0
        public virtual void TestUnmodifiableSet()
        {
            var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.AddAll(TEST_STOP_WORDS);
            set.Add(Convert.ToInt32(1));
            int size = set.size();
            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (var stopword in TEST_STOP_WORDS)
            {
                assertTrue(set.Contains(stopword));
            }
            assertTrue(set.Contains(Convert.ToInt32(1)));
            assertTrue(set.Contains("1"));
            assertTrue(set.Contains(new[] { '1' }));

            try
            {
                CharArraySet.UnmodifiableSet(null);
                fail("can not make null unmodifiable");
            }
            catch (System.ArgumentNullException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption
            {
                // expected
            }
        }
예제 #11
0
 public virtual void TestClear()
 {
     var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
     set.AddAll(TEST_STOP_WORDS);
     assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
     set.Clear();
     assertEquals("not empty", 0, set.size());
     for (var i = 0; i < TEST_STOP_WORDS.Length; i++)
     {
         assertFalse(set.Contains(TEST_STOP_WORDS[i]));
     }
     set.AddAll(TEST_STOP_WORDS);
     assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
     for (var i = 0; i < TEST_STOP_WORDS.Length; i++)
     {
         assertTrue("Set doesn't contain " + TEST_STOP_WORDS[i], set.Contains(TEST_STOP_WORDS[i]));
     }
 }
예제 #12
0
        public virtual void TestUnionWithObject()
        {
            var originalValues = new string[] { "sally", "sells", "seashells", "by", "the", "sea", "shore" };
            CharArraySet target = new CharArraySet(TEST_VERSION_CURRENT, originalValues, false);
            var existingValuesAsObject = new List<object> { "seashells", "sea", "shore" };
            var mixedExistingNonExistingValuesAsObject = new List<object> { "true", "set", "of", "unique", "values", "except", "sells" };
            var nonExistingMixedTypes = new object[] { true, (byte)55, (short)44, (int)33, (sbyte)22, (long)11, (char)'\n', "hurray", (uint)99, (ulong)89, (ushort)79, new char[] { 't', 'w', 'o' }, new StringCharSequenceWrapper("testing") };

            // Add existing values
            assertFalse(target.UnionWith(existingValuesAsObject));
            assertEquals(7, target.Count);
            CollectionAssert.AreEquivalent(originalValues, target);

            // Add mixed existing/non-existing values
            assertTrue(target.UnionWith(mixedExistingNonExistingValuesAsObject));
            assertEquals(13, target.Count);
            CollectionAssert.AreEquivalent(new string[] { "sally", "sells", "seashells", "by", "the", "sea", "shore",
                "true", "set", "of", "unique", "values", "except"}, target);

            target.Clear();
            assertEquals(0, target.Count);
            assertTrue(target.UnionWith(originalValues.Cast<object>())); // Need to cast here because the .NET return type is void for UnionWith.
            CollectionAssert.AreEquivalent(originalValues, target);

            // Add mixed types as object
            assertTrue(target.UnionWith(nonExistingMixedTypes));
            assertEquals(20, target.Count);
            assertTrue(target.Contains(true));
            assertTrue(target.Contains((byte)55));
            assertTrue(target.Contains((short)44));
            assertTrue(target.Contains((int)33));
            assertTrue(target.Contains((sbyte)22));
            assertTrue(target.Contains((long)11));
            assertTrue(target.Contains((char)'\n'));
            assertTrue(target.Contains("hurray"));
            assertTrue(target.Contains((uint)99));
            assertTrue(target.Contains((ulong)89));
            assertTrue(target.Contains((ushort)79));
            assertTrue(target.Contains(new char[] { 't', 'w', 'o' }));
            assertTrue(target.Contains(new StringCharSequenceWrapper("testing")));
        }
예제 #13
0
 public virtual void TestContainsWithNull()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     try
     {
         set.Contains((char[])null, 0, 10);
         fail("null value must raise NPE");
     }
     catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption
     {
     }
     try
     {
         set.Contains((ICharSequence)null);
         fail("null value must raise NPE");
     }
     catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption
     {
     }
     // LUCENENET Specific test for string (since it does not implement ICharSequence)
     try
     {
         set.Contains((string)null);
         fail("null value must raise NPE");
     }
     catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption
     {
     }
     try
     {
         set.Contains((object)null);
         fail("null value must raise NPE");
     }
     catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption
     {
     }
 }
예제 #14
0
 public override bool Matches(char[] s, int len)
 {
     return(base.Matches(s, len) && !m_exceptions.Contains(s, 0, len));
 }
예제 #15
0
 protected override bool Accept()
 {
     return(words.Contains(termAtt.Buffer, 0, termAtt.Length));
 }
예제 #16
0
        private void ProcessWord(char[] buffer, int offset, int length, int wordCount)
        {
            if (length < 1)
            {
                return;
            }

            if (onlyFirstWord && wordCount > 0)
            {
                for (int i = 0; i < length; i++)
                {
                    buffer[offset + i] = char.ToLower(buffer[offset + i]);
                }
                return;
            }

            if (keep != null && keep.Contains(buffer, offset, length))
            {
                if (wordCount == 0 && forceFirstLetter)
                {
                    buffer[offset] = char.ToUpper(buffer[offset], CultureInfo.InvariantCulture);
                }
                return;
            }

            if (length < minWordLength)
            {
                return;
            }

            if (okPrefix != null)
            {
                foreach (char[] prefix in okPrefix)
                {
                    if (length >= prefix.Length) //don't bother checking if the buffer length is less than the prefix
                    {
                        bool match = true;
                        for (int i = 0; i < prefix.Length; i++)
                        {
                            if (prefix[i] != buffer[offset + i])
                            {
                                match = false;
                                break;
                            }
                        }
                        if (match)
                        {
                            return;
                        }
                    }
                }
            }

            // We know it has at least one character

            /*char[] chars = w.toCharArray();
             * StringBuilder word = new StringBuilder( w.length() );
             * word.append( Character.toUpperCase( chars[0] ) );*/
            buffer[offset] = char.ToUpper(buffer[offset]);

            for (int i = 1; i < length; i++)
            {
                buffer[offset + i] = char.ToLower(buffer[offset + i], CultureInfo.InvariantCulture);
            }
            //return word.toString();
        }
예제 #17
0
        public virtual void TestModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.AddAll(TEST_STOP_WORDS);
            int size = set.size();
            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";
            assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET));

            try
            {
                set.Add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.Add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet,
            // but an extension method for the test fixture (which apparently has a bug), this test is non-critical
            //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            //// remove() on the iterator
            //try
            //{
            //    set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
            //    fail("Modified unmodifiable set");
            //}
            //catch (System.NotSupportedException)
            //{
            //    // expected
            //    assertEquals("Size of unmodifiable set has changed", size, set.size());
            //}

            #region Added for better .NET support
            // This test was added for .NET to check the Remove method, since the extension method
            // above fails to execute.
            try
            {
#pragma warning disable 612, 618
                set.Remove(TEST_STOP_WORDS[0]);
#pragma warning restore 612, 618
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            #endregion

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(new[] { NOT_IN_SET});
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            // LUCENENET Specific - added to test .NETified UnionWith method
            try
            {
                set.UnionWith(new[] { NOT_IN_SET });
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }