コード例 #1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testInform() throws Exception
        public virtual void testInform()
        {
            ResourceLoader loader = new ClasspathResourceLoader(this.GetType());

            assertTrue("loader is null and it shouldn't be", loader != null);
            StopFilterFactory factory = (StopFilterFactory)tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
            CharArraySet      words   = factory.StopWords;

            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (StopFilterFactory)tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
            words   = factory.StopWords;
            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (StopFilterFactory)tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
            words   = factory.StopWords;
            assertEquals(8, words.size());
            assertTrue(words.contains("he"));
            assertTrue(words.contains("him"));
            assertTrue(words.contains("his"));
            assertTrue(words.contains("himself"));
            assertTrue(words.contains("she"));
            assertTrue(words.contains("her"));
            assertTrue(words.contains("hers"));
            assertTrue(words.contains("herself"));

            // defaults
            factory = (StopFilterFactory)tokenFilterFactory("Stop");
            assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.StopWords);
            assertEquals(false, factory.IgnoreCase);
        }
コード例 #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testInform() throws Exception
        public virtual void testInform()
        {
            ResourceLoader loader = new ClasspathResourceLoader(typeof(TestStopFilter));

            assertTrue("loader is null and it shouldn't be", loader != null);
            CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt", "ignoreCase", "true");
            CharArraySet words = factory.CommonWords;

            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
            words   = factory.CommonWords;
            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
            assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);

            factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
            words   = factory.CommonWords;
            assertEquals(8, words.size());
            assertTrue(words.contains("he"));
            assertTrue(words.contains("him"));
            assertTrue(words.contains("his"));
            assertTrue(words.contains("himself"));
            assertTrue(words.contains("she"));
            assertTrue(words.contains("her"));
            assertTrue(words.contains("hers"));
            assertTrue(words.contains("herself"));
        }
コード例 #3
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule13(char[] s, int len)
        {
            if (len > 6 && endsWith(s, len, "ηθηκεσ"))
            {
                len -= 6;
            }
            else if (len > 5 && (endsWith(s, len, "ηθηκα") || endsWith(s, len, "ηθηκε")))
            {
                len -= 5;
            }

            bool removed = false;

            if (len > 4 && endsWith(s, len, "ηκεσ"))
            {
                len    -= 4;
                removed = true;
            }
            else if (len > 3 && (endsWith(s, len, "ηκα") || endsWith(s, len, "ηκε")))
            {
                len    -= 3;
                removed = true;
            }

            if (removed && (exc13.contains(s, 0, len) || endsWith(s, len, "σκωλ") || endsWith(s, len, "σκουλ") || endsWith(s, len, "ναρθ") || endsWith(s, len, "σφ") || endsWith(s, len, "οθ") || endsWith(s, len, "πιθ")))
            {
                len += 2;   // add back the -ηκ
            }

            return(len);
        }
コード例 #4
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule7(char[] s, int len)
        {
            if (len == 5 && endsWith(s, len, "αγαμε"))
            {
                return(len - 1);
            }

            if (len > 7 && endsWith(s, len, "ηθηκαμε"))
            {
                len -= 7;
            }
            else if (len > 6 && endsWith(s, len, "ουσαμε"))
            {
                len -= 6;
            }
            else if (len > 5 && (endsWith(s, len, "αγαμε") || endsWith(s, len, "ησαμε") || endsWith(s, len, "ηκαμε")))
            {
                len -= 5;
            }

            if (len > 3 && endsWith(s, len, "αμε"))
            {
                len -= 3;
                if (exc7.contains(s, 0, len))
                {
                    len += 2;     // add back -αμ
                }
            }

            return(len);
        }
コード例 #5
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule15(char[] s, int len)
        {
            bool removed = false;

            if (len > 4 && endsWith(s, len, "αγεσ"))
            {
                len    -= 4;
                removed = true;
            }
            else if (len > 3 && (endsWith(s, len, "αγα") || endsWith(s, len, "αγε")))
            {
                len    -= 3;
                removed = true;
            }

            if (removed)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final boolean cond1 = exc15a.contains(s, 0, len) || endsWith(s, len, "οφ") || endsWith(s, len, "πελ") || endsWith(s, len, "χορτ") || endsWith(s, len, "λλ") || endsWith(s, len, "σφ") || endsWith(s, len, "ρπ") || endsWith(s, len, "φρ") || endsWith(s, len, "πρ") || endsWith(s, len, "λοχ") || endsWith(s, len, "σμην");
                bool cond1 = exc15a.contains(s, 0, len) || endsWith(s, len, "οφ") || endsWith(s, len, "πελ") || endsWith(s, len, "χορτ") || endsWith(s, len, "λλ") || endsWith(s, len, "σφ") || endsWith(s, len, "ρπ") || endsWith(s, len, "φρ") || endsWith(s, len, "πρ") || endsWith(s, len, "λοχ") || endsWith(s, len, "σμην");

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final boolean cond2 = exc15b.contains(s, 0, len) || endsWith(s, len, "κολλ");
                bool cond2 = exc15b.contains(s, 0, len) || endsWith(s, len, "κολλ");

                if (cond1 && !cond2)
                {
                    len += 2;     // add back -αγ
                }
            }

            return(len);
        }
コード例 #6
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
 private int rule4(char[] s, int len)
 {
     if (len > 3 && (endsWith(s, len, "εωσ") || endsWith(s, len, "εων")))
     {
         len -= 3;
         if (exc4.contains(s, 0, len))
         {
             len++;     // add back -ε
         }
     }
     return(len);
 }
コード例 #7
0
        /// <summary>
        /// If no words are provided, then a set of english default stopwords is used.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testDefaults() throws Exception
        public virtual void testDefaults()
        {
            CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory)tokenFilterFactory("CommonGramsQuery");
            CharArraySet words = factory.CommonWords;

            assertTrue("words is null and it shouldn't be", words != null);
            assertTrue(words.contains("the"));
            Tokenizer   tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
            TokenStream stream    = factory.create(tokenizer);

            assertTokenStreamContents(stream, new string[] { "testing_the", "the_factory" });
        }
コード例 #8
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule17(char[] s, int len)
        {
            if (len > 4 && endsWith(s, len, "ηστε"))
            {
                len -= 4;
                if (exc17.contains(s, 0, len))
                {
                    len += 3;     // add back the -ηστ
                }
            }

            return(len);
        }
コード例 #9
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule8(char[] s, int len)
        {
            bool removed = false;

            if (len > 8 && endsWith(s, len, "ιουντανε"))
            {
                len    -= 8;
                removed = true;
            }
            else if (len > 7 && endsWith(s, len, "ιοντανε") || endsWith(s, len, "ουντανε") || endsWith(s, len, "ηθηκανε"))
            {
                len    -= 7;
                removed = true;
            }
            else if (len > 6 && endsWith(s, len, "ιοτανε") || endsWith(s, len, "οντανε") || endsWith(s, len, "ουσανε"))
            {
                len    -= 6;
                removed = true;
            }
            else if (len > 5 && endsWith(s, len, "αγανε") || endsWith(s, len, "ησανε") || endsWith(s, len, "οτανε") || endsWith(s, len, "ηκανε"))
            {
                len    -= 5;
                removed = true;
            }

            if (removed && exc8a.contains(s, 0, len))
            {
                // add -αγαν (we removed > 4 chars so its safe)
                len       += 4;
                s[len - 4] = 'α';
                s[len - 3] = 'γ';
                s[len - 2] = 'α';
                s[len - 1] = 'ν';
            }

            if (len > 3 && endsWith(s, len, "ανε"))
            {
                len -= 3;
                if (endsWithVowelNoY(s, len) || exc8b.contains(s, 0, len))
                {
                    len += 2;     // add back -αν
                }
            }

            return(len);
        }
コード例 #10
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule9(char[] s, int len)
        {
            if (len > 5 && endsWith(s, len, "ησετε"))
            {
                len -= 5;
            }

            if (len > 3 && endsWith(s, len, "ετε"))
            {
                len -= 3;
                if (exc9.contains(s, 0, len) || endsWithVowelNoY(s, len) || endsWith(s, len, "οδ") || endsWith(s, len, "αιρ") || endsWith(s, len, "φορ") || endsWith(s, len, "ταθ") || endsWith(s, len, "διαθ") || endsWith(s, len, "σχ") || endsWith(s, len, "ενδ") || endsWith(s, len, "ευρ") || endsWith(s, len, "τιθ") || endsWith(s, len, "υπερθ") || endsWith(s, len, "ραθ") || endsWith(s, len, "ενθ") || endsWith(s, len, "ροθ") || endsWith(s, len, "σθ") || endsWith(s, len, "πυρ") || endsWith(s, len, "αιν") || endsWith(s, len, "συνδ") || endsWith(s, len, "συν") || endsWith(s, len, "συνθ") || endsWith(s, len, "χωρ") || endsWith(s, len, "πον") || endsWith(s, len, "βρ") || endsWith(s, len, "καθ") || endsWith(s, len, "ευθ") || endsWith(s, len, "εκθ") || endsWith(s, len, "νετ") || endsWith(s, len, "ρον") || endsWith(s, len, "αρκ") || endsWith(s, len, "βαρ") || endsWith(s, len, "βολ") || endsWith(s, len, "ωφελ"))
                {
                    len += 2;     // add back -ετ
                }
            }

            return(len);
        }
コード例 #11
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule14(char[] s, int len)
        {
            bool removed = false;

            if (len > 5 && endsWith(s, len, "ουσεσ"))
            {
                len    -= 5;
                removed = true;
            }
            else if (len > 4 && (endsWith(s, len, "ουσα") || endsWith(s, len, "ουσε")))
            {
                len    -= 4;
                removed = true;
            }

            if (removed && (exc14.contains(s, 0, len) || endsWithVowel(s, len) || endsWith(s, len, "ποδαρ") || endsWith(s, len, "βλεπ") || endsWith(s, len, "πανταχ") || endsWith(s, len, "φρυδ") || endsWith(s, len, "μαντιλ") || endsWith(s, len, "μαλλ") || endsWith(s, len, "κυματ") || endsWith(s, len, "λαχ") || endsWith(s, len, "ληγ") || endsWith(s, len, "φαγ") || endsWith(s, len, "ομ") || endsWith(s, len, "πρωτ")))
            {
                len += 3;   // add back -ουσ
            }

            return(len);
        }
コード例 #12
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule12(char[] s, int len)
        {
            if (len > 5 && endsWith(s, len, "ιεστε"))
            {
                len -= 5;
                if (exc12a.contains(s, 0, len))
                {
                    len += 4;     // add back -ιεστ
                }
            }

            if (len > 4 && endsWith(s, len, "εστε"))
            {
                len -= 4;
                if (exc12b.contains(s, 0, len))
                {
                    len += 3;     // add back -εστ
                }
            }

            return(len);
        }
コード例 #13
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule16(char[] s, int len)
        {
            bool removed = false;

            if (len > 4 && endsWith(s, len, "ησου"))
            {
                len    -= 4;
                removed = true;
            }
            else if (len > 3 && (endsWith(s, len, "ησε") || endsWith(s, len, "ησα")))
            {
                len    -= 3;
                removed = true;
            }

            if (removed && exc16.contains(s, 0, len))
            {
                len += 2;   // add back -ησ
            }

            return(len);
        }
コード例 #14
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule19(char[] s, int len)
        {
            bool removed = false;

            if (len > 6 && (endsWith(s, len, "ησουμε") || endsWith(s, len, "ηθουμε")))
            {
                len    -= 6;
                removed = true;
            }
            else if (len > 4 && endsWith(s, len, "ουμε"))
            {
                len    -= 4;
                removed = true;
            }

            if (removed && exc19.contains(s, 0, len))
            {
                len       += 3;
                s[len - 3] = 'ο';
                s[len - 2] = 'υ';
                s[len - 1] = 'μ';
            }
            return(len);
        }
コード例 #15
0
ファイル: GreekStemmer.cs プロジェクト: zfxsss/lucenenet
        private int rule6(char[] s, int len)
        {
            bool removed = false;

            if (len > 3 && (endsWith(s, len, "ικα") || endsWith(s, len, "ικο")))
            {
                len    -= 3;
                removed = true;
            }
            else if (len > 4 && (endsWith(s, len, "ικου") || endsWith(s, len, "ικων")))
            {
                len    -= 4;
                removed = true;
            }

            if (removed)
            {
                if (endsWithVowel(s, len) || exc6.contains(s, 0, len))
                {
                    len += 2;     // add back -ικ
                }
            }
            return(len);
        }
コード例 #16
0
ファイル: Stemmer.cs プロジェクト: Cefa68000/lucenenet
 /// <summary>
 /// Find the unique stem(s) of the provided word
 /// </summary>
 /// <param name="word"> Word to find the stems for </param>
 /// <returns> List of stems for the word </returns>
 public IList<CharsRef> uniqueStems(char[] word, int length)
 {
     IList<CharsRef> stems = stem(word, length);
     if (stems.Count < 2)
     {
       return stems;
     }
     CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
     IList<CharsRef> deduped = new List<CharsRef>();
     foreach (CharsRef s in stems)
     {
       if (!terms.contains(s))
       {
     deduped.Add(s);
     terms.add(s);
       }
     }
     return deduped;
 }
コード例 #17
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException
        public virtual void testStopListPositions()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            string s = "This is a good test of the english stop analyzer with positions";
            int[] expectedIncr = new int[] {1, 1, 1, 3, 1, 1, 1, 2, 1};
            TokenStream stream = newStop.tokenStream("test", s);
            try
            {
              assertNotNull(stream);
              int i = 0;
              CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));
              PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute));

              stream.reset();
              while (stream.incrementToken())
              {
            string text = termAtt.ToString();
            assertFalse(stopWordsSet.contains(text));
            assertEquals(expectedIncr[i++],posIncrAtt.PositionIncrement);
              }
              stream.end();
            }
            finally
            {
              IOUtils.closeWhileHandlingException(stream);
            }
        }
コード例 #18
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testStopList() throws java.io.IOException
        public virtual void testStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer");
            try
            {
              assertNotNull(stream);
              CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));

              stream.reset();
              while (stream.incrementToken())
              {
            string text = termAtt.ToString();
            assertFalse(stopWordsSet.contains(text));
              }
              stream.end();
            }
            finally
            {
              IOUtils.closeWhileHandlingException(stream);
            }
        }