public void ExtensiveTest() { // Create an array with words to test string[] words = new string[] { "consign", "consigned", "consigning", "consignment", "consist", "consisted", "consistency", "consistent", "consistently", "consisting", "consists", "consolation", "consolations", "consolatory", "console", "consoled", "consoles", "consolidate", "consolidated", "consolidating", "consoling", "consolingly", "consols", "consonant", "consort", "consorted", "consorting", "conspicuous", "conspicuously", "conspiracy", "conspirator", "conspirators", "conspire", "conspired", "conspiring", "constable", "constables", "constance", "constancy", "constant", "knack", "knackeries", "knacks", "knag", "knave", "knaves", "knavish", "kneaded", "kneading", "knee", "kneel", "kneeled", "kneeling", "kneels", "knees", "knell", "knelt", "templates", "cry", "Sky", "absolutely" }; // Create an array with correct steams string[] steams = new string[] { "consign", "consigned", "consigning", "consignment", "consist", "consisted", "consistency", "consistent", "consistently", "consisting", "consists", "consolation", "consolations", "consolatory", "console", "consoled", "consoles", "consolidate", "consolidated", "consolidating", "consoling", "consolingly", "consols", "consonant", "consort", "consorted", "consorting", "conspicuous", "conspicuously", "conspiracy", "conspirator", "conspirators", "conspire", "conspired", "conspiring", "constable", "constables", "constance", "constancy", "constant", "knack", "knackeries", "knacks", "knag", "knave", "knaves", "knavish", "kneaded", "kneading", "knee", "kneel", "kneeled", "kneeling", "kneels", "knees", "knell", "knelt", "templates", "cry", "Sky", "absolutely" }; // Create a default stemmer Stemmer stemmer = new DefaultStemmer(); // Test the stemmer for (int i = 0; i < words.Length; i++) { Assert.AreEqual(steams[i], stemmer.GetSteamWord(words[i])); } } // End of the ExtensiveTest method
public void StemTokens() { var Result = new DefaultStemmer(new IStemmerLanguage[] { new EnglishLanguage() }).Stem(new DefaultTokenizer(new ITokenizerLanguage[] { new Enlighten.Tokenizer.Languages.English.EnglishLanguage(new IEnglishTokenFinder[] { new Word(), new Whitespace(), new Symbol() }) }).Tokenize("This is a test.", TokenizerLanguage.EnglishRuleBased), StemmerLanguage.EnglishPorter2); Assert.Equal("this", Result[0].Value); Assert.Equal(" ", Result[1].Value); Assert.Equal("is", Result[2].Value); Assert.Equal(" ", Result[3].Value); Assert.Equal("a", Result[4].Value); Assert.Equal(" ", Result[5].Value); Assert.Equal("test", Result[6].Value); Assert.Equal(".", Result[7].Value); Result = new DefaultStemmer(new IStemmerLanguage[] { new EnglishLanguage() }).Stem(new DefaultTokenizer(new ITokenizerLanguage[] { new Enlighten.Tokenizer.Languages.English.EnglishLanguage(new IEnglishTokenFinder[] { new Word(), new Whitespace(), new Symbol() }) }).Tokenize("These are some more tests.", TokenizerLanguage.EnglishRuleBased), StemmerLanguage.EnglishPorter2); Assert.Equal("these", Result[0].Value); Assert.Equal(" ", Result[1].Value); Assert.Equal("are", Result[2].Value); Assert.Equal(" ", Result[3].Value); Assert.Equal("some", Result[4].Value); Assert.Equal(" ", Result[5].Value); Assert.Equal("more", Result[6].Value); Assert.Equal(" ", Result[7].Value); Assert.Equal("test", Result[8].Value); Assert.Equal(".", Result[9].Value); }
public void ExtensiveTest() { // Create an array with words to test string[] words = new string[] { "consign", "consigned", "consigning", "consignment", "consist", "consisted", "consistency", "consistent", "consistently", "consisting", "consists", "consolation", "consolations", "consolatory", "console", "consoled", "consoles", "consolidate", "consolidated", "consolidating", "consoling", "consolingly", "consols", "consonant", "consort", "consorted", "consorting", "conspicuous", "conspicuously", "conspiracy", "conspirator", "conspirators", "conspire", "conspired", "conspiring", "constable", "constables", "constance", "constancy", "constant", "knack", "knackeries", "knacks", "knag", "knave", "knaves", "knavish", "kneaded", "kneading", "knee", "kneel", "kneeled", "kneeling", "kneels", "knees", "knell", "knelt", "templates", "cry", "Sky", "absolutely" }; // Create an array with correct steams string[] steams = new string[] { "consign", "consigned", "consigning", "consignment", "consist", "consisted", "consistency", "consistent", "consistently", "consisting", "consists", "consolation", "consolations", "consolatory", "console", "consoled", "consoles", "consolidate", "consolidated", "consolidating", "consoling", "consolingly", "consols", "consonant", "consort", "consorted", "consorting", "conspicuous", "conspicuously", "conspiracy", "conspirator", "conspirators", "conspire", "conspired", "conspiring", "constable", "constables", "constance", "constancy", "constant", "knack", "knackeries", "knacks", "knag", "knave", "knaves", "knavish", "kneaded", "kneading", "knee", "kneel", "kneeled", "kneeling", "kneels", "knees", "knell", "knelt", "templates", "cry", "Sky", "absolutely" }; // Create a default stemmer Stemmer stemmer = new DefaultStemmer(); // Test the stemmer for (int i = 0; i < words.Length; i++) { Assert.AreEqual(steams[i], stemmer.GetSteamWord(words[i])); } }
public void Stem() { var Result = new DefaultStemmer(new IStemmerLanguage[] { new EnglishLanguage() }).Stem(new string[] { "This", "is", "a", "test" }, StemmerLanguage.EnglishPorter2); Assert.Equal("this", Result[0]); Assert.Equal("is", Result[1]); Assert.Equal("a", Result[2]); Assert.Equal("test", Result[3]); Result = new DefaultStemmer(new IStemmerLanguage[] { new EnglishLanguage() }).Stem(new string[] { "These", "are", "some", "more", "tests" }, StemmerLanguage.EnglishPorter2); Assert.Equal("these", Result[0]); Assert.Equal("are", Result[1]); Assert.Equal("some", Result[2]); Assert.Equal("more", Result[3]); Assert.Equal("test", Result[4]); }
} // End of the GetCurrentDomain method /// <summary> /// Get the stemmer based on the language /// </summary> /// <param name="language">A reference to the language</param> /// <returns>A reference to a Stemmer</returns> public static Stemmer GetStemmer(Language language) { // Create a default stemmer Stemmer stemmer = new DefaultStemmer(); // Get the language code in lower case string language_code = language.language_code.ToLower(); // Get a stemmer depending on the language if (language_code == "da") { stemmer = new DanishStemmer(); } else if (language_code == "nl") { stemmer = new DutchStemmer(); } else if (language_code == "en") { stemmer = new EnglishStemmer(); } else if (language_code == "fi") { stemmer = new FinnishStemmer(); } else if (language_code == "fr") { stemmer = new FrenchStemmer(); } else if (language_code == "de") { stemmer = new GermanStemmer(); } else if (language_code == "it") { stemmer = new ItalianStemmer(); } else if (language_code == "no") { stemmer = new NorwegianStemmer(); } else if (language_code == "pt") { stemmer = new PortugueseStemmer(); } else if (language_code == "ro") { stemmer = new RomanianStemmer(); } else if (language_code == "es") { stemmer = new SpanishStemmer(); } else if (language_code == "sv") { stemmer = new SwedishStemmer(); } // Return the stemmer return stemmer; } // End of the GetStemmer method