public virtual void TestNonZeroOffset() { string[] words = new string[] { "Hello", "World", "this", "is", "a", "test" }; char[] findme = "xthisy".ToCharArray(); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(words); assertTrue(set.Contains(findme, 1, 4)); assertTrue(set.Contains(new string(findme, 1, 4))); // test unmodifiable set = CharArraySet.UnmodifiableSet(set); assertTrue(set.Contains(findme, 1, 4)); assertTrue(set.Contains(new string(findme, 1, 4))); }
public virtual void TestRehash() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { cas.Add(TEST_STOP_WORDS[i]); } assertEquals(TEST_STOP_WORDS.Length, cas.size()); for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(cas.Contains(TEST_STOP_WORDS[i])); } }
public virtual void TestObjectContains() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); int? val = Convert.ToInt32(1); set.Add(val); assertTrue(set.Contains(val)); assertTrue(set.Contains(new int?(1))); // another integer assertTrue(set.Contains("1")); assertTrue(set.Contains(new char[] { '1' })); // test unmodifiable set = CharArraySet.UnmodifiableSet(set); assertTrue(set.Contains(val)); assertTrue(set.Contains(new int?(1))); // another integer assertTrue(set.Contains("1")); assertTrue(set.Contains(new char[] { '1' })); }
/// <summary> /// Find the unique stem(s) of the provided word. /// </summary> /// <param name="word">Word to find the stems for.</param> /// <returns>List of stems for the word.</returns> public IEnumerable<HunspellStem> UniqueStems(String word) { if (word == null) throw new ArgumentNullException("word"); var stems = new List<HunspellStem>(); var terms = new CharArraySet(8, false); if (_dictionary.LookupWord(word) != null) { stems.Add(new HunspellStem(word)); terms.Add(word); } var otherStems = Stem(word, null, 0); foreach (var s in otherStems) { if (!terms.Contains(s.Stem)) { stems.Add(s); terms.Add(s.Stem); } } return stems; }
private bool isStopWord(string text) { return(stopWords != null && stopWords.Contains(text)); }
protected override bool IsKeyword() { return(keywordSet.Contains(termAtt.Buffer, 0, termAtt.Length)); }
/// <summary> /// Returns the next input Token whose term() is not a stop word. /// </summary> protected internal override bool Accept() { return(!stopWords.Contains(termAtt.Buffer(), 0, termAtt.Length)); }
public override bool IncrementToken() { if (endState != null) { return(false); } if (!m_input.IncrementToken()) { return(false); } int skippedPositions = 0; while (true) { if (stopWords.Contains(termAtt.Buffer, 0, termAtt.Length)) { int posInc = posIncAtt.PositionIncrement; int endOffset = offsetAtt.EndOffset; // This token may be a stopword, if it's not end: State sav = CaptureState(); if (m_input.IncrementToken()) { // It was a stopword; skip it skippedPositions += posInc; } else { ClearAttributes(); m_input.End(); endState = CaptureState(); int finalEndOffset = offsetAtt.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(finalEndOffset >= endOffset); } if (finalEndOffset > endOffset) { // OK there was a token separator after the // stopword, so it was a stopword return(false); } else { // No token separator after final token that // looked like a stop-word; don't filter it: RestoreState(sav); posIncAtt.PositionIncrement = skippedPositions + posIncAtt.PositionIncrement; keywordAtt.IsKeyword = true; return(true); } } } else { // Not a stopword; return the current token: posIncAtt.PositionIncrement = skippedPositions + posIncAtt.PositionIncrement; return(true); } } }
/// <summary> /// Find the unique stem(s) of the provided word /// </summary> /// <param name="word"> Word to find the stems for </param> /// <returns> List of stems for the word </returns> public IList<CharsRef> UniqueStems(char[] word, int length) { IList<CharsRef> stems = Stem(word, length); if (stems.Count < 2) { return stems; } CharArraySet terms = new CharArraySet( #pragma warning disable 612, 618 LuceneVersion.LUCENE_CURRENT, 8, dictionary.ignoreCase); #pragma warning restore 612, 618 IList<CharsRef> deduped = new List<CharsRef>(); foreach (CharsRef s in stems) { if (!terms.Contains(s)) { deduped.Add(s); terms.Add(s); } } return deduped; }
public virtual void TestUnmodifiableSet() { var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); set.Add(Convert.ToInt32(1)); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); foreach (var stopword in TEST_STOP_WORDS) { assertTrue(set.Contains(stopword)); } assertTrue(set.Contains(Convert.ToInt32(1))); assertTrue(set.Contains("1")); assertTrue(set.Contains(new[] { '1' })); try { CharArraySet.UnmodifiableSet(null); fail("can not make null unmodifiable"); } catch (System.ArgumentNullException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption { // expected } }
public virtual void TestClear() { var set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size()); set.Clear(); assertEquals("not empty", 0, set.size()); for (var i = 0; i < TEST_STOP_WORDS.Length; i++) { assertFalse(set.Contains(TEST_STOP_WORDS[i])); } set.AddAll(TEST_STOP_WORDS); assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size()); for (var i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue("Set doesn't contain " + TEST_STOP_WORDS[i], set.Contains(TEST_STOP_WORDS[i])); } }
public virtual void TestUnionWithObject() { var originalValues = new string[] { "sally", "sells", "seashells", "by", "the", "sea", "shore" }; CharArraySet target = new CharArraySet(TEST_VERSION_CURRENT, originalValues, false); var existingValuesAsObject = new List<object> { "seashells", "sea", "shore" }; var mixedExistingNonExistingValuesAsObject = new List<object> { "true", "set", "of", "unique", "values", "except", "sells" }; var nonExistingMixedTypes = new object[] { true, (byte)55, (short)44, (int)33, (sbyte)22, (long)11, (char)'\n', "hurray", (uint)99, (ulong)89, (ushort)79, new char[] { 't', 'w', 'o' }, new StringCharSequenceWrapper("testing") }; // Add existing values assertFalse(target.UnionWith(existingValuesAsObject)); assertEquals(7, target.Count); CollectionAssert.AreEquivalent(originalValues, target); // Add mixed existing/non-existing values assertTrue(target.UnionWith(mixedExistingNonExistingValuesAsObject)); assertEquals(13, target.Count); CollectionAssert.AreEquivalent(new string[] { "sally", "sells", "seashells", "by", "the", "sea", "shore", "true", "set", "of", "unique", "values", "except"}, target); target.Clear(); assertEquals(0, target.Count); assertTrue(target.UnionWith(originalValues.Cast<object>())); // Need to cast here because the .NET return type is void for UnionWith. CollectionAssert.AreEquivalent(originalValues, target); // Add mixed types as object assertTrue(target.UnionWith(nonExistingMixedTypes)); assertEquals(20, target.Count); assertTrue(target.Contains(true)); assertTrue(target.Contains((byte)55)); assertTrue(target.Contains((short)44)); assertTrue(target.Contains((int)33)); assertTrue(target.Contains((sbyte)22)); assertTrue(target.Contains((long)11)); assertTrue(target.Contains((char)'\n')); assertTrue(target.Contains("hurray")); assertTrue(target.Contains((uint)99)); assertTrue(target.Contains((ulong)89)); assertTrue(target.Contains((ushort)79)); assertTrue(target.Contains(new char[] { 't', 'w', 'o' })); assertTrue(target.Contains(new StringCharSequenceWrapper("testing"))); }
public virtual void TestContainsWithNull() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); try { set.Contains((char[])null, 0, 10); fail("null value must raise NPE"); } catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption { } try { set.Contains((ICharSequence)null); fail("null value must raise NPE"); } catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption { } // LUCENENET Specific test for string (since it does not implement ICharSequence) try { set.Contains((string)null); fail("null value must raise NPE"); } catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption { } try { set.Contains((object)null); fail("null value must raise NPE"); } catch (System.ArgumentException) // NOTE: In .NET we throw an ArgumentExcpetion, not a NullReferenceExeption { } }
public override bool Matches(char[] s, int len) { return(base.Matches(s, len) && !m_exceptions.Contains(s, 0, len)); }
protected override bool Accept() { return(words.Contains(termAtt.Buffer, 0, termAtt.Length)); }
private void ProcessWord(char[] buffer, int offset, int length, int wordCount) { if (length < 1) { return; } if (onlyFirstWord && wordCount > 0) { for (int i = 0; i < length; i++) { buffer[offset + i] = char.ToLower(buffer[offset + i]); } return; } if (keep != null && keep.Contains(buffer, offset, length)) { if (wordCount == 0 && forceFirstLetter) { buffer[offset] = char.ToUpper(buffer[offset], CultureInfo.InvariantCulture); } return; } if (length < minWordLength) { return; } if (okPrefix != null) { foreach (char[] prefix in okPrefix) { if (length >= prefix.Length) //don't bother checking if the buffer length is less than the prefix { bool match = true; for (int i = 0; i < prefix.Length; i++) { if (prefix[i] != buffer[offset + i]) { match = false; break; } } if (match) { return; } } } } // We know it has at least one character /*char[] chars = w.toCharArray(); * StringBuilder word = new StringBuilder( w.length() ); * word.append( Character.toUpperCase( chars[0] ) );*/ buffer[offset] = char.ToUpper(buffer[offset]); for (int i = 1; i < length; i++) { buffer[offset + i] = char.ToLower(buffer[offset + i], CultureInfo.InvariantCulture); } //return word.toString(); }
public virtual void TestModifyOnUnmodifiable() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true); set.AddAll(TEST_STOP_WORDS); int size = set.size(); set = CharArraySet.UnmodifiableSet(set); assertEquals("Set size changed due to unmodifiableSet call", size, set.size()); string NOT_IN_SET = "SirGallahad"; assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET)); try { set.Add(NOT_IN_SET.ToCharArray()); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.Add(new StringBuilder(NOT_IN_SET)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.clear(); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.add(NOT_IN_SET); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); assertEquals("Size of unmodifiable set has changed", size, set.size()); } // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet, // but an extension method for the test fixture (which apparently has a bug), this test is non-critical //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call //// remove() on the iterator //try //{ // set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true)); // fail("Modified unmodifiable set"); //} //catch (System.NotSupportedException) //{ // // expected // assertEquals("Size of unmodifiable set has changed", size, set.size()); //} #region Added for better .NET support // This test was added for .NET to check the Remove method, since the extension method // above fails to execute. try { #pragma warning disable 612, 618 set.Remove(TEST_STOP_WORDS[0]); #pragma warning restore 612, 618 fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } #endregion try { set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true)); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertEquals("Size of unmodifiable set has changed", size, set.size()); } try { set.addAll(new[] { NOT_IN_SET}); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } // LUCENENET Specific - added to test .NETified UnionWith method try { set.UnionWith(new[] { NOT_IN_SET }); fail("Modified unmodifiable set"); } catch (System.NotSupportedException) { // expected assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); } for (int i = 0; i < TEST_STOP_WORDS.Length; i++) { assertTrue(set.contains(TEST_STOP_WORDS[i])); } }