Пример #1
0
        static StopAnalyzer()
        {
            IList <string> stopWords = Arrays.AsList("a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with");
            var            stopSet   = new CharArraySet(Version.LUCENE_CURRENT, stopWords, false);

            ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
        }
Пример #2
0
 public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict)
 {
     this.matchVersion = matchVersion;
     this.stoptable    = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.excltable    = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
     if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31))
     {
         this.stemdict     = null;
         this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
     }
     else
     {
         this.origStemdict = null;
         // we don't need to ignore case here since we lowercase in this analyzer anyway
         StemmerOverrideFilter.Builder        builder = new StemmerOverrideFilter.Builder(false);
         CharArrayMap <string> .EntryIterator iter    = stemOverrideDict.entrySet().GetEnumerator();
         CharsRef spare = new CharsRef();
         while (iter.hasNext())
         {
             char[] nextKey = iter.nextKey();
             spare.copyChars(nextKey, 0, nextKey.Length);
             builder.add(spare, iter.currentValue());
         }
         try
         {
             this.stemdict = builder.build();
         }
         catch (IOException ex)
         {
             throw new Exception("can not build stem dict", ex);
         }
     }
 }
Пример #3
0
        public virtual void testUnmodifiableSet()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
            set.add(Convert.ToInt32(1));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = set.size();
            int size = set.size();

            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            foreach (string stopword in TEST_STOP_WORDS)
            {
                assertTrue(set.contains(stopword));
            }
            assertTrue(set.contains(Convert.ToInt32(1)));
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));

            try
            {
                CharArraySet.unmodifiableSet(null);
                fail("can not make null unmodifiable");
            }
            catch (System.NullReferenceException)
            {
                // expected
            }
        }
Пример #4
0
        public virtual void testNonZeroOffset()
        {
            string[]     words  = new string[] { "Hello", "World", "this", "is", "a", "test" };
            char[]       findme = "xthisy".ToCharArray();
            CharArraySet set    = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(words);
            assertTrue(set.contains(findme, 1, 4));
            assertTrue(set.contains(new string(findme, 1, 4)));

            // test unmodifiable
            set = CharArraySet.unmodifiableSet(set);
            assertTrue(set.contains(findme, 1, 4));
            assertTrue(set.contains(new string(findme, 1, 4)));
        }
Пример #5
0
        public virtual void testObjectContains()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
            int?         val = Convert.ToInt32(1);

            set.add(val);
            assertTrue(set.contains(val));
            assertTrue(set.contains(new int?(1)));     // another integer
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));
            // test unmodifiable
            set = CharArraySet.unmodifiableSet(set);
            assertTrue(set.contains(val));
            assertTrue(set.contains(new int?(1)));     // another integer
            assertTrue(set.contains("1"));
            assertTrue(set.contains(new char[] { '1' }));
        }
Пример #6
0
 /// <summary>
 /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
 /// stemming.
 /// </summary>
 /// <param name="matchVersion"> lucene compatibility version </param>
 /// <param name="stopwords"> a stopword set </param>
 /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
 public IrishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
 {
     this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
 }
Пример #7
0
 /// <summary>
 /// Builds the named analyzer with the given stop words. </summary>
 public SnowballAnalyzer(Version matchVersion, string name, CharArraySet stopWords) : this(matchVersion, name)
 {
     stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopWords));
 }
Пример #8
0
 /// <summary>
 /// Builds an analyzer with the given stop words
 /// </summary>
 /// <param name="matchVersion">
 ///          lucene compatibility version </param>
 /// <param name="stopwords">
 ///          a stopword set </param>
 /// <param name="stemExclutionSet">
 ///          a stemming exclusion set </param>
 public FrenchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclutionSet) : base(matchVersion, stopwords)
 {
     this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclutionSet));
 }
Пример #9
0
 /// <summary>
 /// Builds an analyzer with the given stop words and a set of work to be
 /// excluded from the <seealso cref="CzechStemFilter"/>.
 /// </summary>
 /// <param name="matchVersion"> Lucene version to match See
 ///          <seealso cref="<a href="#version">above</a>"/> </param>
 /// <param name="stopwords"> a stopword set </param>
 /// <param name="stemExclusionTable"> a stemming exclusion set </param>
 public CzechAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) : base(matchVersion, stopwords)
 {
     this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
 }
Пример #10
0
 /// <summary>
 /// Builds an analyzer with the given stop words and stemming exclusion words
 /// </summary>
 /// <param name="matchVersion">
 ///          lucene compatibility version </param>
 /// <param name="stopwords">
 ///          a stopword set </param>
 public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : this(matchVersion, stopwords)
 {
     excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
 }
Пример #11
0
        public virtual void testModifyOnUnmodifiable()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);

            set.addAll(TEST_STOP_WORDS);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int size = set.size();
            int size = set.size();

            set = CharArraySet.unmodifiableSet(set);
            assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
            string NOT_IN_SET = "SirGallahad";

            assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));

            try
            {
                set.add(NOT_IN_SET.ToCharArray());
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.add(new StringBuilder(NOT_IN_SET));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.clear();
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }
            try
            {
                set.add((object)NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
            // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
            // remove() on the iterator
            try
            {
                set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, TEST_STOP_WORDS, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, NOT_IN_SET, true));
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertEquals("Size of unmodifiable set has changed", size, set.size());
            }

            try
            {
                set.addAll(NOT_IN_SET);
                fail("Modified unmodifiable set");
            }
            catch (System.NotSupportedException)
            {
                // expected
                assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
            }

            for (int i = 0; i < TEST_STOP_WORDS.Length; i++)
            {
                assertTrue(set.contains(TEST_STOP_WORDS[i]));
            }
        }