示例#1
0
        public virtual void Test()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);

            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("mtqlpi", "");
            builder.Add("mwoknt", "jjp");
            builder.Add("tcgyreo", "zpfpajyws");
            NormalizeCharMap map = builder.Build();

            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer t   = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
                TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
                return(new TokenStreamComponents(t, f));
            }, initReader: (fieldName, reader) =>
            {
                reader = new MockCharFilter(reader, 0);
                reader = new MappingCharFilter(map, reader);
                return(reader);
            });

            CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj");
        }
示例#2
0
        public virtual void testToString()
        {
            CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test"));

            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));

            set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
            assertEquals("[test]", set.ToString());
            set.add("test2");
            assertTrue(set.ToString().Contains(", "));
        }
示例#3
0
 public virtual void TestWithStemExclusionSet()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("hole");
     CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
     AssertAnalyzesTo(cz, "hole desek", new string[] { "hole", "desk" });
 }
        public virtual void Test()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("mtqlpi", "");
            builder.Add("mwoknt", "jjp");
            builder.Add("tcgyreo", "zpfpajyws");
            NormalizeCharMap map = builder.Build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);
            CheckAnalysisConsistency(Random(), a, false, "wmgddzunizdomqyj");
        }
 public virtual void TestWithStemExclusionSet()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("строеве");
     Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
     AssertAnalyzesTo(a, "строевете строеве", new string[] { "строй", "строеве" });
 }
 public virtual void TestWithStemExclusionSet()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("представление");
     Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.DefaultStopSet, set);
     AssertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new string[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
 }
 public virtual void TestWithKeywordAttribute()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("fischen");
     GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set));
     AssertTokenStreamContents(filter, new string[] { "fischen", "trink" });
 }
 public virtual void TestWithStemExclusionSet()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("پیاوە");
     Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
     AssertAnalyzesTo(a, "پیاوە", new string[] { "پیاوە" });
 }
示例#9
0
        public virtual void testSupplementaryCharsBWCompat()
        {
            string missing  = "Term %s is missing in the set";
            string falsePos = "Term %s is in the set but shouldn't";

            // for reference see
            // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
            string[]     upperArr = new string[] { "Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB" };
            string[]     lowerArr = new string[] { "abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b" };
            CharArraySet set      = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);

            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
        }
 public virtual void TestWithStemExclusionSet()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("hole");
     CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
     AssertAnalyzesTo(cz, "hole desek", new string[] { "hole", "desk" });
 }
示例#11
0
        public virtual void testSingleHighSurrogate()
        {
            string missing  = "Term %s is missing in the set";
            string falsePos = "Term %s is in the set but shouldn't";

            string[] upperArr = new string[] { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" };

            string[]     lowerArr = new string[] { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" };
            CharArraySet set      = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);

            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i]));
            }
        }
示例#12
0
        public virtual void testUnmodifiableSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
            set.add(Convert.ToInt32(1));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = set.size();
            int size = set.size();

            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (string stopword in TEST_STOP_WORDS)
            {
                assertTrue(set.contains(stopword));
            }
            assertTrue(set.contains(Convert.ToInt32(1)));
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));

            try
            {
                CharArraySet.unmodifiableSet(null);
                fail("can not make null unmodifiable");
            }
            catch (System.NullReferenceException)
            {
                // expected
            }
        }
 public virtual void TestWithKeywordAttribute()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("yourselves");
     Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
     TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
     AssertTokenStreamContents(filter, new string[] { "yourselves", "your" });
 }
示例#14
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("hole");
            CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));

            AssertTokenStreamContents(filter, new string[] { "hole", "desk" });
        }
示例#15
0
        public virtual void TestWithStemExclusionSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("پیاوە");
            Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);

            AssertAnalyzesTo(a, "پیاوە", new string[] { "پیاوە" });
        }
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("fischen");
            GermanStemFilter filter = new GermanStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Fischen Trinken")), set));

            AssertTokenStreamContents(filter, new string[] { "fischen", "trink" });
        }
示例#17
0
        public virtual void Test()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);

            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("mtqlpi", "");
            builder.Add("mwoknt", "jjp");
            builder.Add("tcgyreo", "zpfpajyws");
            NormalizeCharMap map = builder.Build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);

            CheckAnalysisConsistency(Random(), a, false, "wmgddzunizdomqyj");
        }
示例#18
0
        public virtual void TestWithStemExclusionSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("представление");
            Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.DefaultStopSet, set);

            AssertAnalyzesTo(a, "Вместе с тем о силе электромагнитной энергии имели представление еще", new string[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" });
        }
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("Brasília");
            BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set));

            AssertTokenStreamContents(filter, new string[] { "brasília", "brasil" });
        }
示例#20
0
        public virtual void TestWithStemExclusionSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("строеве");
            Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);

            AssertAnalyzesTo(a, "строевете строеве", new string[] { "строй", "строеве" });
        }
示例#21
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("yourselves");
            Tokenizer   tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
            TokenStream filter    = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set));

            AssertTokenStreamContents(filter, new string[] { "yourselves", "your" });
        }
示例#22
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("ساهدهات");
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));

            ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set));

            AssertTokenStreamContents(filter, new string[] { "ساهدهات" });
        }
 public virtual void TestSetFilterIncrementToken()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true);
     set.add("lucenefox");
     string[] output = new string[] { "the", "quick", "brown", "LuceneFox", "jumps" };
     AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
     CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("LuceneFox"), false);
     AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output);
     CharArraySet set2 = set;
     AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
 }
示例#24
0
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("строеве");
            MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);

            BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set));

            AssertTokenStreamContents(filter, new string[] { "строй", "строеве" });
        }
示例#25
0
        public virtual void TestExclusionTableViaCtor()
        {
#pragma warning disable 612, 618
            CharArraySet set = new CharArraySet(LuceneVersion.LUCENE_30, 1, true);
#pragma warning restore 612, 618
            set.add("lichamelijk");
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

            a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });
        }
示例#26
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testExclusionTableViaCtor() throws java.io.IOException
        public virtual void testExclusionTableViaCtor()
        {
            CharArraySet set = new CharArraySet(Version.LUCENE_30, 1, true);

            set.add("lichamelijk");
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);

            assertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

            a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            assertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });
        }
示例#27
0
        public virtual void TestExclusionTableViaCtor()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("habitable");
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);

            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });

            fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });
        }
示例#28
0
            public object Create(Random random)
            {
                int          num = random.nextInt(10);
                CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());

                for (int i = 0; i < num; i++)
                {
                    // TODO: make nastier
                    set.add(TestUtil.RandomSimpleString(random));
                }
                return(set);
            }
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                CharArraySet keywords = new CharArraySet(TEST_VERSION_CURRENT, 1, false);

                keywords.add("liście");

                Tokenizer   src    = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
                TokenStream result = new SetKeywordMarkerFilter(src, keywords);

                result = new MorfologikFilter(result);

                return(new TokenStreamComponents(src, result));
            }
示例#30
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRehash() throws Exception
        public virtual void testRehash()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                cas.add(TEST_STOP_WORDS[i]);
            }
            assertEquals(TEST_STOP_WORDS.Length, cas.size());
            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(cas.contains(TEST_STOP_WORDS[i]));
            }
        }
示例#31
0
        public virtual void TestSetFilterIncrementToken()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true);

            set.add("lucenefox");
            string[] output = new string[] { "the", "quick", "brown", "LuceneFox", "jumps" };
            AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output);
            CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, AsSet("LuceneFox"), false);

            AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output);
            CharArraySet set2 = set;

            AssertTokenStreamContents(new LowerCaseFilterMock(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output);
        }
示例#32
0
        /// <summary>
        /// Test the static #copy() function with a CharArraySet as a source
        /// </summary>
        public virtual void testCopyCharArraySet()
        {
            CharArraySet setIngoreCase    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList <string> stopwords      = TEST_STOP_WORDS;
            IList <string> stopwordsUpper = new List <string>();

            foreach (string @string in stopwords)
            {
                stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.add(Convert.ToInt32(1));

            CharArraySet copy         = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
                assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList <string> newWords = new List <string>();

            foreach (string @string in stopwords)
            {
                newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
                assertFalse(setIngoreCase.contains(@string));
                assertFalse(setCaseSensitive.contains(@string));
            }
        }
示例#33
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws Exception
        public virtual void test()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false);
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);

            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("mtqlpi", "");
            builder.add("mwoknt", "jjp");
            builder.add("tcgyreo", "zpfpajyws");
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);

            checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
        }
示例#34
0
        public virtual void testObjectContains()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            int?         val = Convert.ToInt32(1);

            set.add(val);
            assertTrue(set.contains(val));
            assertTrue(set.contains(new int?(1)));     // another integer
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));
            // test unmodifiable
            set = CharArraySet.unmodifiableSet(set);
            assertTrue(set.contains(val));
            assertTrue(set.contains(new int?(1)));     // another integer
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));
        }
示例#35
0
	  /// <summary>
	  /// Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
	  /// leading and trailing whitespace). Every line of the Reader should contain only
	  /// one word. The words need to be in lowercase if you make use of an
	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
	  /// </summary>
	  /// <param name="reader"> Reader containing the wordlist </param>
	  /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
	  /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
	  public static CharArraySet GetWordSet(TextReader reader, CharArraySet result)
	  {
		BufferedReader br = null;
		try
		{
		  br = getBufferedReader(reader);
		  string word = null;
		  while ((word = br.readLine()) != null)
		  {
			result.add(word.Trim());
		  }
		}
		finally
		{
		  IOUtils.close(br);
		}
		return result;
	  }
示例#36
0
	  /// <summary>
	  /// Find the unique stem(s) of the provided word
	  /// </summary>
	  /// <param name="word"> Word to find the stems for </param>
	  /// <returns> List of stems for the word </returns>
	  public IList<CharsRef> uniqueStems(char[] word, int length)
	  {
		IList<CharsRef> stems = stem(word, length);
		if (stems.Count < 2)
		{
		  return stems;
		}
		CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
		IList<CharsRef> deduped = new List<CharsRef>();
		foreach (CharsRef s in stems)
		{
		  if (!terms.contains(s))
		  {
			deduped.Add(s);
			terms.add(s);
		  }
		}
		return deduped;
	  }
示例#37
0
        /// <summary>
        /// Test the static #copy() function with a CharArraySet as a source
        /// </summary>
        public virtual void testCopyCharArraySet()
        {
            CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

            IList<string> stopwords = TEST_STOP_WORDS;
            IList<string> stopwordsUpper = new List<string>();
            foreach (string @string in stopwords)
            {
              stopwordsUpper.Add(@string.ToUpper(Locale.ROOT));
            }
            setIngoreCase.addAll(TEST_STOP_WORDS);
            setIngoreCase.add(Convert.ToInt32(1));
            setCaseSensitive.addAll(TEST_STOP_WORDS);
            setCaseSensitive.add(Convert.ToInt32(1));

            CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
            CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

            assertEquals(setIngoreCase.size(), copy.size());
            assertEquals(setCaseSensitive.size(), copy.size());

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copyCaseSens.containsAll(stopwords));
            foreach (string @string in stopwordsUpper)
            {
              assertFalse(copyCaseSens.contains(@string));
            }
            // test adding terms to the copy
            IList<string> newWords = new List<string>();
            foreach (string @string in stopwords)
            {
              newWords.Add(@string + "_1");
            }
            copy.addAll(newWords);

            assertTrue(copy.containsAll(stopwords));
            assertTrue(copy.containsAll(stopwordsUpper));
            assertTrue(copy.containsAll(newWords));
            // new added terms are not in the source set
            foreach (string @string in newWords)
            {
              assertFalse(setIngoreCase.contains(@string));
              assertFalse(setCaseSensitive.contains(@string));

            }
        }
示例#38
0
	  /// <summary>
	  /// Reads stopwords from a stopword list in Snowball format.
	  /// <para>
	  /// The snowball format is the following:
	  /// <ul>
	  /// <li>Lines may contain multiple words separated by whitespace.
	  /// <li>The comment character is the vertical line (&#124;).
	  /// <li>Lines may contain trailing comments.
	  /// </ul>
	  /// </para>
	  /// </summary>
	  /// <param name="reader"> Reader containing a Snowball stopword list </param>
	  /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
	  /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static CharArraySet getSnowballWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException
	  public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
	  {
		BufferedReader br = null;
		try
		{
		  br = getBufferedReader(reader);
		  string line = null;
		  while ((line = br.readLine()) != null)
		  {
			int comment = line.IndexOf('|');
			if (comment >= 0)
			{
				line = line.Substring(0, comment);
			}
			string[] words = line.Split("\\s+", true);
			for (int i = 0; i < words.Length; i++)
			{
			  if (words[i].Length > 0)
			  {
				  result.add(words[i]);
			  }
			}
		  }
		}
		finally
		{
		  IOUtils.close(br);
		}
		return result;
	  }
示例#39
0
	  /// <summary>
	  /// Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
	  /// leading and trailing whitespace). Every line of the Reader should contain only
	  /// one word. The words need to be in lowercase if you make use of an
	  /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
	  /// </summary>
	  /// <param name="reader"> Reader containing the wordlist </param>
	  /// <param name="comment"> The string representing a comment. </param>
	  /// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
	  /// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
	  public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result)
	  {
		BufferedReader br = null;
		try
		{
		  br = getBufferedReader(reader);
		  string word = null;
		  while ((word = br.ReadLine()) != null)
		  {
			if (word.StartsWith(comment, StringComparison.Ordinal) == false)
			{
			  result.add(word.Trim());
			}
		  }
		}
		finally
		{
		  IOUtils.Close(br);
		}
		return result;
	  }
        public virtual void TestExclusionTableViaCtor()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
            set.add("habitable");
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });

            fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });
        }
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
            set.add("строеве");
            MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);

            BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
            AssertTokenStreamContents(filter, new string[] { "строй", "строеве" });
        }
        public virtual void TestExclusionTableViaCtor()
        {
#pragma warning disable 612, 618
            CharArraySet set = new CharArraySet(LuceneVersion.LUCENE_30, 1, true);
#pragma warning restore 612, 618
            set.add("lichamelijk");
            DutchAnalyzer a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

            a = new DutchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(a, "lichamelijk lichamelijke", new string[] { "lichamelijk", "licham" });

        }
示例#43
0
        public virtual void testModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.addAll(TEST_STOP_WORDS);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int size = set.size();
            int size = set.size();
            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";
            assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

            try
            {
              set.add(NOT_IN_SET.ToCharArray());
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.add(NOT_IN_SET);
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.add(new StringBuilder(NOT_IN_SET));
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.clear();
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
              set.add((object) NOT_IN_SET);
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            // remove() on the iterator
            try
            {
              set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true));
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
              set.addAll(NOT_IN_SET);
              fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
              // expected
              assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
              assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
        public virtual void TestWithKeywordAttribute()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
            set.add("ساهدهات");
#pragma warning disable 612, 618
            ArabicLetterTokenizer tokenStream = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
#pragma warning restore 612, 618

            ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
            AssertTokenStreamContents(filter, new string[] { "ساهدهات" });
        }
示例#45
0
 public virtual void testObjectContains()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
     int? val = Convert.ToInt32(1);
     set.add(val);
     assertTrue(set.contains(val));
     assertTrue(set.contains(new int?(1))); // another integer
     assertTrue(set.contains("1"));
     assertTrue(set.contains(new char[]{'1'}));
     // test unmodifiable
     set = CharArraySet.unmodifiableSet(set);
     assertTrue(set.contains(val));
     assertTrue(set.contains(new int?(1))); // another integer
     assertTrue(set.contains("1"));
     assertTrue(set.contains(new char[]{'1'}));
 }
        public virtual void TestSingleHighSurrogate()
        {
            string missing = "Term {0} is missing in the set";
            string falsePos = "Term {0} is in the set but shouldn't";
            string[] upperArr = { "ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB" };

            string[] lowerArr = { "abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b" };
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertTrue(string.Format(missing, lowerArr[i]), set.contains(lowerArr[i]));
            }
            set = new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
                set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
                assertTrue(string.Format(missing, upperArr[i]), set.contains(upperArr[i]));
                assertFalse(string.Format(falsePos, upperArr[i]), set.contains(lowerArr[i]));
            }
        }
示例#47
0
        public virtual void testMethods()
        {
            CharArrayMap <int?>       cm = new CharArrayMap <int?>(TEST_VERSION_CURRENT, 2, false);
            Dictionary <string, int?> hm = new Dictionary <string, int?>();

            hm["foo"] = 1;
            hm["bar"] = 2;
            cm.putAll(hm);
            assertEquals(hm.Count, cm.size());
            hm["baz"] = 3;
            cm.putAll(hm);
            assertEquals(hm.Count, cm.size());

            CharArraySet cs = cm.Keys;
            int          n  = 0;

            foreach (object o in cs)
            {
                assertTrue(cm.containsKey(o));
                char[] co = (char[])o;
                assertTrue(cm.containsKey(co, 0, co.Length));
                n++;
            }
            assertEquals(hm.Count, n);
            assertEquals(hm.Count, cs.size());
            assertEquals(cm.size(), cs.size());
            cs.clear();
            assertEquals(0, cs.size());
            assertEquals(0, cm.size());
            try
            {
                cs.add("test");
                fail("keySet() allows adding new keys");
            }
            catch (System.NotSupportedException)
            {
                // pass
            }
            cm.putAll(hm);
            assertEquals(hm.Count, cs.size());
            assertEquals(cm.size(), cs.size());

            IEnumerator <KeyValuePair <object, int?> > iter1 = cm.entrySet().GetEnumerator();

            n = 0;
            while (iter1.MoveNext())
            {
                KeyValuePair <object, int?> entry = iter1.Current;
                object key = entry.Key;
                int?   val = entry.Value;
                assertEquals(cm.get(key), val);
                entry.Value = val * 100;
                assertEquals(val * 100, (int)cm.get(key));
                n++;
            }
            assertEquals(hm.Count, n);
            cm.clear();
            cm.putAll(hm);
            assertEquals(cm.size(), n);

            CharArrayMap <int?> .EntryIterator iter2 = cm.entrySet().GetEnumerator();
            n = 0;
            while (iter2.hasNext())
            {
                char[] keyc = iter2.nextKey();
                int?   val  = iter2.currentValue();
                assertEquals(hm[new string(keyc)], val);
                iter2.Value = val * 100;
                assertEquals(val * 100, (int)cm.get(keyc));
                n++;
            }
            assertEquals(hm.Count, n);

            cm.entrySet().clear();
            assertEquals(0, cm.size());
            assertEquals(0, cm.entrySet().size());
            assertTrue(cm.Empty);
        }
 public virtual void TestWithKeywordAttribute()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("Brasília");
     BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader("Brasília Brasilia")), set));
     AssertTokenStreamContents(filter, new string[] { "brasília", "brasil" });
 }
        public virtual void TestModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            set.AddAll(TEST_STOP_WORDS);
            int size = set.size();
            set = CharArraySet.UnmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";
            assertFalse("Test String already exists in set", set.Contains(NOT_IN_SET));

            try
            {
                set.Add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.Add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // NOTE: This results in a StackOverflow exception. Since this is not a public member of CharArraySet,
            // but an extension method for the test fixture (which apparently has a bug), this test is non-critical
            //// This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            //// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            //// remove() on the iterator
            //try
            //{
            //    set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
            //    fail("Modified unmodifiable set");
            //}
            //catch (System.NotSupportedException)
            //{
            //    // expected
            //    assertEquals("Size of unmodifiable set has changed", size, set.size());
            //}

            #region Added for better .NET support
            // This test was added for .NET to check the Remove method, since the extension method
            // above fails to execute.
            try
            {
#pragma warning disable 612, 618
                set.Remove(TEST_STOP_WORDS[0]);
#pragma warning restore 612, 618
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            #endregion

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, new [] { NOT_IN_SET }, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(new[] { NOT_IN_SET});
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            // LUCENENET Specific - added to test .NETified UnionWith method
            try
            {
                set.UnionWith(new[] { NOT_IN_SET });
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
示例#50
0
 public virtual void testSupplementaryCharsBWCompat()
 {
     string missing = "Term %s is missing in the set";
     string falsePos = "Term %s is in the set but shouldn't";
     // for reference see
     // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
     string[] upperArr = new string[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
     string[] lowerArr = new string[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
     CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);
     foreach (string upper in upperArr)
     {
       set.add(upper);
     }
     for (int i = 0; i < upperArr.Length; i++)
     {
       assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
       assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
     }
     set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
     foreach (string upper in upperArr)
     {
       set.add(upper);
     }
     for (int i = 0; i < upperArr.Length; i++)
     {
       assertTrue(string.format(Locale.ROOT,missing, upperArr[i]), set.contains(upperArr[i]));
       assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
     }
 }
示例#51
0
        public virtual void testUnmodifiableSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
            set.addAll(TEST_STOP_WORDS);
            set.add(Convert.ToInt32(1));
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int size = set.size();
            int size = set.size();
            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (string stopword in TEST_STOP_WORDS)
            {
              assertTrue(set.contains(stopword));
            }
            assertTrue(set.contains(Convert.ToInt32(1)));
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[]{'1'}));

            try
            {
              CharArraySet.unmodifiableSet(null);
              fail("can not make null unmodifiable");
            }
            catch (System.NullReferenceException)
            {
              // expected
            }
        }
示例#52
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRehash() throws Exception
 public virtual void testRehash()
 {
     CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
     for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
     {
       cas.add(TEST_STOP_WORDS[i]);
     }
     assertEquals(TEST_STOP_WORDS.Length, cas.size());
     for (int i = 0;i < TEST_STOP_WORDS.Length;i++)
     {
       assertTrue(cas.contains(TEST_STOP_WORDS[i]));
     }
 }
示例#53
0
        public virtual void testModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = set.size();
            int size = set.size();

            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";

            assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

            try
            {
                set.add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add((object)NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            // remove() on the iterator
            try
            {
                set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }
示例#54
0
        public virtual void testSingleHighSurrogateBWComapt()
        {
            string missing = "Term %s is missing in the set";
            string falsePos = "Term %s is in the set but shouldn't";
            string[] upperArr = new string[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"};

            string[] lowerArr = new string[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"};
            CharArraySet set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, true);
            foreach (string upper in upperArr)
            {
              set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
              assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
              if (i == lowerArr.Length - 1)
              {
            assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
              }
              else
              {
            assertTrue(string.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
              }
            }
            set = new CharArraySet(Version.LUCENE_30, TEST_STOP_WORDS, false);
            foreach (string upper in upperArr)
            {
              set.add(upper);
            }
            for (int i = 0; i < upperArr.Length; i++)
            {
              assertTrue(string.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
              assertFalse(string.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
            }
        }
 public virtual void TestWithKeywordAttribute()
 {
     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
     set.add("hole");
     CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter(new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
     AssertTokenStreamContents(filter, new string[] { "hole", "desk" });
 }