Ejemplo n.º 1
0
        /// <summary>
        /// Test for NPE
        /// </summary>
        public virtual void testContainsWithNull()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            try
            {
                set.contains((char[])null, 0, 10);
                fail("null value must raise NPE");
            }
            catch (System.NullReferenceException)
            {
            }
            try
            {
                set.contains((CharSequence)null);
                fail("null value must raise NPE");
            }
            catch (System.NullReferenceException)
            {
            }
            try
            {
                set.contains((object)null);
                fail("null value must raise NPE");
            }
            catch (System.NullReferenceException)
            {
            }
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Test for NPE
 /// </summary>
 public virtual void testContainsWithNull()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     try
     {
       set.contains((char[]) null, 0, 10);
       fail("null value must raise NPE");
     }
     catch (System.NullReferenceException)
     {
     }
     try
     {
       set.contains((CharSequence) null);
       fail("null value must raise NPE");
     }
     catch (System.NullReferenceException)
     {
     }
     try
     {
       set.contains((object) null);
       fail("null value must raise NPE");
     }
     catch (System.NullReferenceException)
     {
     }
 }
Ejemplo n.º 3
0
        public virtual void testSingleHighSurrogate()
        {
            string missing  = "Term %s is missing in the set";
            string falsePos = "Term %s is in the set but shouldn't";

            string[] upperArr = new string[] { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" };

            string[]     lowerArr = new string[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" };
            CharArraySet set      = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);

            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i]));
            }
        }
Ejemplo n.º 4
0
        public virtual void testSupplementaryCharsBWCompat()
        {
            string missing  = "Term %s is missing in the set";
            string falsePos = "Term %s is in the set but shouldn't";

            // for reference see
            // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
            string[]     upperArr = new string[] { "Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB" };
            string[]     lowerArr = new string[] { "abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b" };
            CharArraySet set      = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);

            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
        }
Ejemplo n.º 5
0
        public virtual void testUnmodifiableSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
            set.add(Convert.ToInt32(1));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = set.size();
            int size = set.size();

            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (string stopword in TEST_STOP_WORDS)
            {
                assertTrue(set.contains(stopword));
            }
            assertTrue(set.contains(Convert.ToInt32(1)));
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));

            try
            {
                CharArraySet.unmodifiableSet(null);
                fail("can not make null unmodifiable");
            }
            catch (System.NullReferenceException)
            {
                // expected
            }
        }
Ejemplo n.º 6
0
 private void checkSet(CharArraySet wordset)
 {
     assertEquals(3, wordset.size());
     assertTrue(wordset.contains("ONE"));     // case is not modified
     assertTrue(wordset.contains("two"));     // surrounding whitespace is removed
     assertTrue(wordset.contains("three"));
     assertFalse(wordset.contains("four"));
 }
 private void CheckSet(CharArraySet wordset)
 {
     assertEquals(3, wordset.size());
     assertTrue(wordset.contains("ONE")); // case is not modified
     assertTrue(wordset.contains("two")); // surrounding whitespace is removed
     assertTrue(wordset.contains("three"));
     assertFalse(wordset.contains("four"));
 }
Ejemplo n.º 8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testComments() throws Exception
        public virtual void testComments()
        {
            string       s        = "ONE\n  two \nthree\n#comment";
            CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT);

            checkSet(wordSet1);
            assertFalse(wordSet1.contains("#comment"));
            assertFalse(wordSet1.contains("comment"));
        }
Ejemplo n.º 9
0
        public virtual void TestInform()
        {
            IResourceLoader loader = new ClasspathResourceLoader(this.GetType());

            assertTrue("loader is null and it shouldn't be", loader != null);
            StopFilterFactory factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
            CharArraySet      words   = factory.StopWords;

            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
            words   = factory.StopWords;
            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (StopFilterFactory)TokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
            words   = factory.StopWords;
            assertEquals(8, words.size());
            assertTrue(words.contains("he"));
            assertTrue(words.contains("him"));
            assertTrue(words.contains("his"));
            assertTrue(words.contains("himself"));
            assertTrue(words.contains("she"));
            assertTrue(words.contains("her"));
            assertTrue(words.contains("hers"));
            assertTrue(words.contains("herself"));

            // defaults
            factory = (StopFilterFactory)TokenFilterFactory("Stop");
            assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.StopWords);
            assertEquals(false, factory.IgnoreCase);
        }
Ejemplo n.º 10
0
        public virtual void TestInform()
        {
            //IResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter));
            IResourceLoader loader = new ClasspathResourceLoader(typeof(TestAnalyzers), "Lucene.Net"); // LUCENENET: Need to set to a type that is in the same path as the files

            assertTrue("loader is null and it shouldn't be", loader != null);
            CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true");
            CharArraySet words = factory.CommonWords;

            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
            words   = factory.CommonWords;
            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
            words   = factory.CommonWords;
            assertEquals(8, words.size());
            assertTrue(words.contains("he"));
            assertTrue(words.contains("him"));
            assertTrue(words.contains("his"));
            assertTrue(words.contains("himself"));
            assertTrue(words.contains("she"));
            assertTrue(words.contains("her"));
            assertTrue(words.contains("hers"));
            assertTrue(words.contains("herself"));
        }
Ejemplo n.º 11
0
        public virtual void testNonZeroOffset()
        {
            string[]     words  = new string[] { "Hello", "World", "this", "is", "a", "test" };
            char[]       findme = "xthisy".ToCharArray();
            CharArraySet set    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(words);
            assertTrue(set.contains(findme, 1, 4));
            assertTrue(set.contains(new string(findme, 1, 4)));

            // test unmodifiable
            set = CharArraySet.unmodifiableSet(set);
            assertTrue(set.contains(findme, 1, 4));
            assertTrue(set.contains(new string(findme, 1, 4)));
        }
Ejemplo n.º 12
0
        public virtual void TestStopListPositions()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false);
            StopAnalyzer newStop      = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            string       s            = "This is a good test of the english stop analyzer with positions";

            int[]       expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 };
            TokenStream stream       = newStop.TokenStream("test", s);

            try
            {
                assertNotNull(stream);
                int i = 0;
                ICharTermAttribute          termAtt    = stream.GetAttribute <ICharTermAttribute>();
                IPositionIncrementAttribute posIncrAtt = stream.AddAttribute <IPositionIncrementAttribute>();

                stream.Reset();
                while (stream.IncrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                    assertEquals(expectedIncr[i++], posIncrAtt.PositionIncrement);
                }
                stream.End();
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(stream);
            }
        }
Ejemplo n.º 13
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            while (input.incrementToken())
            {
                char[] text       = termAtt.buffer();
                int    termLength = termAtt.length();

                // why not key off token type here assuming ChineseTokenizer comes first?
                if (!stopTable.contains(text, 0, termLength))
                {
                    switch (char.getType(text[0]))
                    {
                    case char.LOWERCASE_LETTER:
                    case char.UPPERCASE_LETTER:

                        // English word/token should larger than 1 character.
                        if (termLength > 1)
                        {
                            return(true);
                        }
                        break;

                    case char.OTHER_LETTER:

                        // One Chinese character as one Chinese word.
                        // Chinese word extraction to be added later here.

                        return(true);
                    }
                }
            }
            return(false);
        }
Ejemplo n.º 14
0
 public virtual void testClear()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
     set.addAll(TEST_STOP_WORDS);
     assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
     set.clear();
     assertEquals("not empty", 0, set.size());
     for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
     {
       assertFalse(set.contains(TEST_STOP_WORDS[i]));
     }
     set.addAll(TEST_STOP_WORDS);
     assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
     for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
     {
       assertTrue(set.contains(TEST_STOP_WORDS[i]));
     }
 }
Ejemplo n.º 15
0
        public virtual void testClear()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
            assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
            set.clear();
            assertEquals("not empty", 0, set.size());
            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertFalse(set.contains(TEST_STOP_WORDS[i]));
            }
            set.addAll(TEST_STOP_WORDS);
            assertEquals("Not all words added", TEST_STOP_WORDS.Length, set.size());
            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void assertClasspathDelegation(ResourceLoader rl) throws Exception
        private void assertClasspathDelegation(ResourceLoader rl)
        {
            // try a stopwords file from classpath
            CharArraySet set = WordlistLoader.getSnowballWordSet(new System.IO.StreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), Encoding.UTF8), TEST_VERSION_CURRENT);

            assertTrue(set.contains("you"));
            // try to load a class; we use string comparison because classloader may be different...
//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
            assertEquals("org.apache.lucene.analysis.util.RollingCharBuffer", rl.newInstance("org.apache.lucene.analysis.util.RollingCharBuffer", typeof(object)).GetType().FullName);
            // theoretically classes should also be loadable:
            IOUtils.closeWhileHandlingException(rl.openResource("java/lang/String.class"));
        }
Ejemplo n.º 17
0
        public virtual void TestDefaults()
        {
            CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)TokenFilterFactory("CommonGramsQuery");
            CharArraySet words = factory.CommonWords;

            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue(words.contains("the"));
            Tokenizer   tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
            TokenStream stream    = factory.Create(tokenizer);

            AssertTokenStreamContents(stream, new string[] { "testing_the", "the_factory" });
        }
Ejemplo n.º 18
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRehash() throws Exception
        public virtual void testRehash()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                cas.add(TEST_STOP_WORDS[i]);
            }
            assertEquals(TEST_STOP_WORDS.Length, cas.size());
            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(cas.contains(TEST_STOP_WORDS[i]));
            }
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Test the static #copy() function with a CharArraySet as a source
        /// </summary>
        public virtual void testCopyCharArraySet()
        {
            CharArraySet setIngoreCase    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.add(Convert.ToInt32(1));

            CharArraySet copy         = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(setIngoreCase.contains(@string));
                assertFalse(setCaseSensitive.contains(@string));
            }
        }
Ejemplo n.º 20
0
        public virtual void testObjectContains()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            int?         val = Convert.ToInt32(1);

            set.add(val);
            assertTrue(set.contains(val));
            assertTrue(set.contains(new int?(1)));     // another integer
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));
            // test unmodifiable
            set = CharArraySet.unmodifiableSet(set);
            assertTrue(set.contains(val));
            assertTrue(set.contains(new int?(1)));     // another integer
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Test stopwords in snowball format
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSnowballListLoading() throws java.io.IOException
        public virtual void testSnowballListLoading()
        {
            string       s       = "|comment\n" + " |comment\n" + "\n" + "  \t\n" + " |comment | comment\n" + "ONE\n" + "   two   \n" + " three   four five \n" + "six seven | comment\n"; //multiple stopwords + comment -  multiple stopwords -  stopword with leading/trailing space -  stopword, in uppercase -  commented line with comment -  line with only whitespace -  blank line -  commented line with leading whitespace -  commented line
            CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT);

            assertEquals(7, wordset.size());
            assertTrue(wordset.contains("ONE"));
            assertTrue(wordset.contains("two"));
            assertTrue(wordset.contains("three"));
            assertTrue(wordset.contains("four"));
            assertTrue(wordset.contains("five"));
            assertTrue(wordset.contains("six"));
            assertTrue(wordset.contains("seven"));
        }
Ejemplo n.º 22
0
	  /// <summary>
	  /// Find the unique stem(s) of the provided word
	  /// </summary>
	  /// <param name="word"> Word to find the stems for </param>
	  /// <returns> List of stems for the word </returns>
	  public IList<CharsRef> uniqueStems(char[] word, int length)
	  {
		IList<CharsRef> stems = stem(word, length);
		if (stems.Count < 2)
		{
		  return stems;
		}
		CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
		IList<CharsRef> deduped = new List<CharsRef>();
		foreach (CharsRef s in stems)
		{
		  if (!terms.contains(s))
		  {
			deduped.Add(s);
			terms.add(s);
		  }
		}
		return deduped;
	  }
Ejemplo n.º 23
0
        /// <summary>
        /// Test the static #copy() function with a JDK <seealso cref="Set"/> as a source
        /// </summary>
        public virtual void testCopyJDKSet()
        {
            ISet <string> set = new HashSet <string>();

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            set.addAll(TEST_STOP_WORDS);

            CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);

            assertEquals(set.Count, copy.size());
            assertEquals(set.Count, copy.size());

            assertTrue(copy.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copy.contains(@string));
            }

            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(set.Contains(@string));
            }
        }
Ejemplo n.º 24
0
        public virtual void TestStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false);
            StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream stream = newStop.TokenStream("test", "This is a good test of the english stop analyzer");
            try
            {
                assertNotNull(stream);
                ICharTermAttribute termAtt = stream.GetAttribute<ICharTermAttribute>();

                stream.Reset();
                while (stream.IncrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                }
                stream.End();
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(stream);
            }
        }
Ejemplo n.º 25
0
        public virtual void TestStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false);
            StopAnalyzer newStop      = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream  stream       = newStop.TokenStream("test", "This is a good test of the english stop analyzer");

            try
            {
                assertNotNull(stream);
                ICharTermAttribute termAtt = stream.GetAttribute <ICharTermAttribute>();

                stream.Reset();
                while (stream.IncrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                }
                stream.End();
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(stream);
            }
        }
Ejemplo n.º 26
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopList() throws java.io.IOException
        public virtual void testStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop      = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream  stream       = newStop.tokenStream("test", "This is a good test of the english stop analyzer");

            try
            {
                assertNotNull(stream);
                CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));

                stream.reset();
                while (stream.incrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                }
                stream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(stream);
            }
        }
Ejemplo n.º 27
0
        /// <summary>
        /// Test the static #copy() function with a CharArraySet as a source
        /// </summary>
        public virtual void testCopyCharArraySet()
        {
            CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList<string> stopwords = TEST_STOP_WORDS;
            IList<string> stopwordsUpper = new List<string>();
            foreach (string @string in stopwords)
            {
              stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.add(Convert.ToInt32(1));

            CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
              assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList<string> newWords = new List<string>();
            foreach (string @string in stopwords)
            {
              newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
              assertFalse(setIngoreCase.contains(@string));
              assertFalse(setCaseSensitive.contains(@string));

            }
        }
Ejemplo n.º 28
0
        public virtual void testUnmodifiableSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
            set.addAll(TEST_STOP_WORDS);
            set.add(Convert.ToInt32(1));
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int size = set.size();
            int size = set.size();
            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (string stopword in TEST_STOP_WORDS)
            {
              assertTrue(set.contains(stopword));
            }
            assertTrue(set.contains(Convert.ToInt32(1)));
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[]{'1'}));

            try
            {
              CharArraySet.unmodifiableSet(null);
              fail("can not make null unmodifiable");
            }
            catch (System.NullReferenceException)
            {
              // expected
            }
        }
Ejemplo n.º 29
0
 public virtual void testSupplementaryCharsBWCompat()
 {
     string missing = "Term %s is missing in the set";
     string falsePos = "Term %s is in the set but shouldn't";
     // for reference see
     // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
     string[] upperArr = new string[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
     string[] lowerArr = new string[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
     CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);
     foreach (string upper in upperArr)
     {
       set.add(upper);
     }
     for (int i = 0; i < upperArr.Length; i++)
     {
       assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
       assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
     }
     set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
     foreach (string upper in upperArr)
     {
       set.add(upper);
     }
     for (int i = 0; i < upperArr.Length; i++)
     {
       assertTrue(string.format(Locale.ROOT,missing, upperArr[i]), set.contains(upperArr[i]));
       assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
     }
 }
Ejemplo n.º 30
0
        public virtual void testSingleHighSurrogateBWComapt()
        {
            string missing = "Term %s is missing in the set";
            string falsePos = "Term %s is in the set but shouldn't";
            string[] upperArr = new string[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"};

            string[] lowerArr = new string[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"};
            CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);
            foreach (string upper in upperArr)
            {
              set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
              assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
              if (i == lowerArr.Length - 1)
              {
            assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
              }
              else
              {
            assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
              }
            }
            set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
              set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
              assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
              assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
        }
Ejemplo n.º 31
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRehash() throws Exception
 public virtual void testRehash()
 {
     CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
     for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
     {
       cas.add(TEST_STOP_WORDS[i]);
     }
     assertEquals(TEST_STOP_WORDS.Length, cas.size());
     for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
     {
       assertTrue(cas.contains(TEST_STOP_WORDS[i]));
     }
 }
Ejemplo n.º 32
0
        public virtual void TestSingleHighSurrogate()
        {
            string missing = "Term {0} is missing in the set";
            string falsePos = "Term {0} is in the set but shouldn't";
            string[] upperArr = { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" };

            string[] lowerArr = { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" };
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertTrue(string.Format(missing, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.Format(falsePos, upperArr[i]), set.contains(lowerArr[i]));
            }
        }
Ejemplo n.º 33
0
 internal bool isStopWord(string text)
 {
     return(stopWords != null && stopWords.contains(text));
 }
Ejemplo n.º 34
0
        public virtual void testModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = set.size();
            int size = set.size();

            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";

            assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

            try
            {
                set.add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add((object)NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            // remove() on the iterator
            try
            {
                set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
Ejemplo n.º 35
0
        public virtual void testModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.addAll(TEST_STOP_WORDS);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int size = set.size();
            int size = set.size();
            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";
            assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

            try
            {
              set.add(NOT_IN_SET.ToCharArray());
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.add(NOT_IN_SET);
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.add(new StringBuilder(NOT_IN_SET));
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.clear();
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
              set.add((object) NOT_IN_SET);
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            // remove() on the iterator
            try
            {
              set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true));
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.addAll(NOT_IN_SET);
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
              assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
Ejemplo n.º 36
0
 public override bool matches(char[] s, int len)
 {
     return(base.matches(s, len) && !exceptions.contains(s, 0, len));
 }
Ejemplo n.º 37
0
 /// <summary>
 /// Returns the next input Token whose term() is not a stop word.
 /// </summary>
 protected internal override bool Accept()
 {
     return(!stopWords.contains(termAtt.buffer(), 0, termAtt.length()));
 }
Ejemplo n.º 38
0
        public virtual void testNonZeroOffset()
        {
            string[] words = new string[] {"Hello","World","this","is","a","test"};
            char[] findme = "xthisy".ToCharArray();
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.addAll(words);
            assertTrue(set.contains(findme, 1, 4));
            assertTrue(set.contains(new string(findme,1,4)));

            // test unmodifiable
            set = CharArraySet.unmodifiableSet(set);
            assertTrue(set.contains(findme, 1, 4));
            assertTrue(set.contains(new string(findme,1,4)));
        }
Ejemplo n.º 39
0
 public virtual void testObjectContains()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
     int? val = Convert.ToInt32(1);
     set.add(val);
     assertTrue(set.contains(val));
     assertTrue(set.contains(new int?(1))); // another integer
     assertTrue(set.contains("1"));
     assertTrue(set.contains(new char[]{'1'}));
     // test unmodifiable
     set = CharArraySet.unmodifiableSet(set);
     assertTrue(set.contains(val));
     assertTrue(set.contains(new int?(1))); // another integer
     assertTrue(set.contains("1"));
     assertTrue(set.contains(new char[]{'1'}));
 }
Ejemplo n.º 40
0
        public virtual void TestStopListPositions()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false);
            StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            string s = "This is a good test of the english stop analyzer with positions";
            int[] expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 };
            TokenStream stream = newStop.TokenStream("test", s);
            try
            {
                assertNotNull(stream);
                int i = 0;
                ICharTermAttribute termAtt = stream.GetAttribute<ICharTermAttribute>();
                IPositionIncrementAttribute posIncrAtt = stream.AddAttribute<IPositionIncrementAttribute>();

                stream.Reset();
                while (stream.IncrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                    assertEquals(expectedIncr[i++], posIncrAtt.PositionIncrement);
                }
                stream.End();
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(stream);
            }
        }
Ejemplo n.º 41
0
        public virtual void TestModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.AddAll(TEST_STOP_WORDS);
            int size = set.size();
            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";
            assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET));

            try
            {
                set.Add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.Add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet,
            // but an extension method for the test fixture (which apparently has a bug), this test is non-critical
            //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            //// remove() on the iterator
            //try
            //{
            //    set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
            //    fail("Modified unmodifiable set");
            //}
            //catch (System.NotSupportedException)
            //{
            //    // expected
            //    assertEquals("Size of unmodifiable set has changed", size, set.size());
            //}

            #region Added for better .NET support
            // This test was added for .NET to check the Remove method, since the extension method
            // above fails to execute.
            try
            {
#pragma warning disable 612, 618
                set.Remove(TEST_STOP_WORDS[0]);
#pragma warning restore 612, 618
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            #endregion

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(new[] { NOT_IN_SET});
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            // LUCENENET Specific - added to test .NETified UnionWith method
            try
            {
                set.UnionWith(new[] { NOT_IN_SET });
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }