public virtual void TestStopwordsCasing() { #pragma warning disable 612, 618 FrenchAnalyzer a = new FrenchAnalyzer(LuceneVersion.LUCENE_31); #pragma warning restore 612, 618 AssertAnalyzesTo(a, "Votre", new string[] { }); }
public virtual void TestAccentInsensitive() { Analyzer a = new FrenchAnalyzer(TEST_VERSION_CURRENT); CheckOneTerm(a, "sécuritaires", "securitair"); CheckOneTerm(a, "securitaires", "securitair"); }
public virtual void TestReusableTokenStream() { FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); // stopwords AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" }); // some nouns and adjectives AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habitabl", "chist", "element", "captif" }); }
public virtual void TestExclusionTableViaCtor() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("habitable"); FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" }); fa = new FrenchAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); AssertAnalyzesTo(fa, "habitable chiste", new string[] { "habitable", "chist" }); }
public virtual void TestAnalyzer30() { FrenchAnalyzer fa = new FrenchAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(fa, "", new string[] { }); AssertAnalyzesTo(fa, "chien chat cheval", new string[] { "chien", "chat", "cheval" }); AssertAnalyzesTo(fa, "chien CHAT CHEVAL", new string[] { "chien", "chat", "cheval" }); AssertAnalyzesTo(fa, " chien ,? + = - CHAT /: > CHEVAL", new string[] { "chien", "chat", "cheval" }); AssertAnalyzesTo(fa, "chien++", new string[] { "chien" }); AssertAnalyzesTo(fa, "mot \"entreguillemet\"", new string[] { "mot", "entreguillemet" }); // let's do some french specific tests now /* 1. couldn't resist I would expect this to stay one term as in French the minus sign is often used for composing words */ AssertAnalyzesTo(fa, "Jean-François", new string[] { "jean", "françois" }); // 2. stopwords AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" }); // some nouns and adjectives AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habit", "chist", "élément", "captif" }); // some verbs AssertAnalyzesTo(fa, "finissions souffrirent rugissante", new string[] { "fin", "souffr", "rug" }); // some everything else // aujourd'hui stays one term which is OK AssertAnalyzesTo(fa, "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ", new string[] { "c3po", "aujourd'hui", "oeuf", "ïâöûàä", "anticonstitutionnel", "jav" }); // some more everything else // here 1940-1945 stays as one term, 1940:1945 not ? AssertAnalyzesTo(fa, "33Bis 1940-1945 1940:1945 (---i+++)*", new string[] { "33bis", "1940-1945", "1940", "1945", "i" }); }
public virtual void TestAnalyzer30() { FrenchAnalyzer fa = new FrenchAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(fa, "", new string[] { }); AssertAnalyzesTo(fa, "chien chat cheval", new string[] { "chien", "chat", "cheval" }); AssertAnalyzesTo(fa, "chien CHAT CHEVAL", new string[] { "chien", "chat", "cheval" }); AssertAnalyzesTo(fa, " chien ,? + = - CHAT /: > CHEVAL", new string[] { "chien", "chat", "cheval" }); AssertAnalyzesTo(fa, "chien++", new string[] { "chien" }); AssertAnalyzesTo(fa, "mot \"entreguillemet\"", new string[] { "mot", "entreguillemet" }); // let's do some french specific tests now /* 1. couldn't resist * I would expect this to stay one term as in French the minus * sign is often used for composing words */ AssertAnalyzesTo(fa, "Jean-François", new string[] { "jean", "françois" }); // 2. stopwords AssertAnalyzesTo(fa, "le la chien les aux chat du des à cheval", new string[] { "chien", "chat", "cheval" }); // some nouns and adjectives AssertAnalyzesTo(fa, "lances chismes habitable chiste éléments captifs", new string[] { "lanc", "chism", "habit", "chist", "élément", "captif" }); // some verbs AssertAnalyzesTo(fa, "finissions souffrirent rugissante", new string[] { "fin", "souffr", "rug" }); // some everything else // aujourd'hui stays one term which is OK AssertAnalyzesTo(fa, "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ", new string[] { "c3po", "aujourd'hui", "oeuf", "ïâöûàä", "anticonstitutionnel", "jav" }); // some more everything else // here 1940-1945 stays as one term, 1940:1945 not ? AssertAnalyzesTo(fa, "33Bis 1940-1945 1940:1945 (---i+++)*", new string[] { "33bis", "1940-1945", "1940", "1945", "i" }); }
public void TestExclusionTableReuse() { FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); AssertAnalyzesToReuse(fa, "habitable", new String[] { "habit" }); fa.SetStemExclusionTable(new String[] { "habitable" }); AssertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" }); }
public void TestReusableTokenStream() { FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT); // stopwords AssertAnalyzesToReuse( fa, "le la chien les aux chat du des à cheval", new String[] {"chien", "chat", "cheval"}); // some nouns and adjectives AssertAnalyzesToReuse( fa, "lances chismes habitable chiste éléments captifs", new String[] { "lanc", "chism", "habit", "chist", "élément", "captif" }); }
public virtual void TestBuggyStopwordsCasing() { FrenchAnalyzer a = new FrenchAnalyzer(LuceneVersion.LUCENE_30); AssertAnalyzesTo(a, "Votre", new string[] { "votr" }); }
public virtual void TestElision() { FrenchAnalyzer fa = new FrenchAnalyzer(TEST_VERSION_CURRENT); AssertAnalyzesTo(fa, "voir l'embrouille", new string[] { "voir", "embrouil" }); }