상속: Lucene.Net.Analysis.Analyzer
예제 #1
0
        public virtual void TestStopwordsCasing()
        {
#pragma warning disable 612, 618
            FrenchAnalyzer a = new FrenchAnalyzer(LuceneVersion.LUCENE_31);
#pragma warning restore 612, 618
            AssertAnalyzesTo(a, "Votre", new string[] { });
        }
예제 #2
0
        public virtual void TestAccentInsensitive()
        {
            Analyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT);

            CheckOneTerm(a, "sécuritaires", "securitair");
            CheckOneTerm(a, "securitaires", "securitair");
        }
예제 #3
0
        public virtual void TestReusableTokenStream()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);

            // stopwords
            AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" });

            // some nouns and adjectives
            AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habitabl", "chist", "element", "captif" });
        }
예제 #4
0
        public virtual void TestExclusionTableViaCtor()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);

            set.add("habitable");
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);

            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });

            fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });
        }
        public virtual void TestAnalyzer30()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(LuceneVersion.LUCENE_30);

            AssertAnalyzesTo(fa, "", new string[] { });

            AssertAnalyzesTo(fa, "chien chat cheval", new string[] { "chien", "chat", "cheval" });

            AssertAnalyzesTo(fa, "chien CHAT CHEVAL", new string[] { "chien", "chat", "cheval" });

            AssertAnalyzesTo(fa, "  chien  ,? + = -  CHAT /: > CHEVAL", new string[] { "chien", "chat", "cheval" });

            AssertAnalyzesTo(fa, "chien++", new string[] { "chien" });

            AssertAnalyzesTo(fa, "mot \"entreguillemet\"", new string[] { "mot", "entreguillemet" });

            // let's do some french specific tests now

            /* 1. couldn't resist
             I would expect this to stay one term as in French the minus
            sign is often used for composing words */
            AssertAnalyzesTo(fa, "Jean-François", new string[] { "jean", "françois" });

            // 2. stopwords
            AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" });

            // some nouns and adjectives
            AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habit", "chist", "élément", "captif" });

            // some verbs
            AssertAnalyzesTo(fa, "finissions souffrirent rugissante", new string[] { "fin", "souffr", "rug" });

            // some everything else
            // aujourd'hui stays one term which is OK
            AssertAnalyzesTo(fa, "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ", new string[] { "c3po", "aujourd'hui", "oeuf", "ïâöûàä", "anticonstitutionnel", "jav" });

            // some more everything else
            // here 1940-1945 stays as one term, 1940:1945 not ?
            AssertAnalyzesTo(fa, "33Bis 1940-1945 1940:1945 (---i+++)*", new string[] { "33bis", "1940-1945", "1940", "1945", "i" });

        }
예제 #6
0
        public virtual void TestAnalyzer30()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(LuceneVersion.LUCENE_30);

            AssertAnalyzesTo(fa, "", new string[] { });

            AssertAnalyzesTo(fa, "chien chat cheval", new string[] { "chien", "chat", "cheval" });

            AssertAnalyzesTo(fa, "chien CHAT CHEVAL", new string[] { "chien", "chat", "cheval" });

            AssertAnalyzesTo(fa, "  chien  ,? + = -  CHAT /: > CHEVAL", new string[] { "chien", "chat", "cheval" });

            AssertAnalyzesTo(fa, "chien++", new string[] { "chien" });

            AssertAnalyzesTo(fa, "mot \"entreguillemet\"", new string[] { "mot", "entreguillemet" });

            // let's do some french specific tests now

            /* 1. couldn't resist
             * I would expect this to stay one term as in French the minus
             * sign is often used for composing words */
            AssertAnalyzesTo(fa, "Jean-François", new string[] { "jean", "françois" });

            // 2. stopwords
            AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" });

            // some nouns and adjectives
            AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habit", "chist", "élément", "captif" });

            // some verbs
            AssertAnalyzesTo(fa, "finissions souffrirent rugissante", new string[] { "fin", "souffr", "rug" });

            // some everything else
            // aujourd'hui stays one term which is OK
            AssertAnalyzesTo(fa, "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ", new string[] { "c3po", "aujourd'hui", "oeuf", "ïâöûàä", "anticonstitutionnel", "jav" });

            // some more everything else
            // here 1940-1945 stays as one term, 1940:1945 not ?
            AssertAnalyzesTo(fa, "33Bis 1940-1945 1940:1945 (---i+++)*", new string[] { "33bis", "1940-1945", "1940", "1945", "i" });
        }
예제 #7
0
 public void TestExclusionTableReuse()
 {
     FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
     AssertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
     fa.SetStemExclusionTable(new String[] { "habitable" });
     AssertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
 }
예제 #8
0
        public void TestReusableTokenStream()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
            // stopwords
            AssertAnalyzesToReuse(
                fa,
                "le la chien les aux chat du des à cheval",
                new String[] {"chien", "chat", "cheval"});

            // some nouns and adjectives
            AssertAnalyzesToReuse(
                fa,
                "lances chismes habitable chiste éléments captifs",
                new String[]
                    {
                        "lanc",
                        "chism",
                        "habit",
                        "chist",
                        "élément",
                        "captif"
                    });
        }
예제 #9
0
        public virtual void TestBuggyStopwordsCasing()
        {
            FrenchAnalyzer a = new FrenchAnalyzer(LuceneVersion.LUCENE_30);

            AssertAnalyzesTo(a, "Votre", new string[] { "votr" });
        }
예제 #10
0
        public virtual void TestElision()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);

            AssertAnalyzesTo(fa, "voir l'embrouille", new string[] { "voir", "embrouil" });
        }
 public virtual void TestAccentInsensitive()
 {
     Analyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT);
     CheckOneTerm(a, "sécuritaires", "securitair");
     CheckOneTerm(a, "securitaires", "securitair");
 }
        public virtual void TestStopwordsCasing()
        {
#pragma warning disable 612, 618
            FrenchAnalyzer a = new FrenchAnalyzer(LuceneVersion.LUCENE_31);
#pragma warning restore 612, 618
            AssertAnalyzesTo(a, "Votre", new string[] { });
        }
 public virtual void TestBuggyStopwordsCasing()
 {
     FrenchAnalyzer a = new FrenchAnalyzer(LuceneVersion.LUCENE_30);
     AssertAnalyzesTo(a, "Votre", new string[] { "votr" });
 }
 public virtual void TestElision()
 {
     FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
     AssertAnalyzesTo(fa, "voir l'embrouille", new string[] { "voir", "embrouil" });
 }
        public virtual void TestExclusionTableViaCtor()
        {
            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
            set.add("habitable");
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });

            fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
            AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" });
        }
        public virtual void TestReusableTokenStream()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT);
            // stopwords
            AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" });

            // some nouns and adjectives
            AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habitabl", "chist", "element", "captif" });
        }