static void Stem(Sample sample) { EnglishPorter2Stemmer stemmer = new EnglishPorter2Stemmer(); Dictionary <string, int> stemmedWords = new Dictionary <string, int>(); foreach (var word in sample.words) { var value = word.Value; var key = word.Key; var stemmedKey = stemmer.Stem(key).Value; if (stemmedWords.ContainsKey(stemmedKey)) { var valueHolder = stemmedWords.GetValueOrDefault(stemmedKey); stemmedWords.Remove(stemmedKey); stemmedWords.Add(stemmedKey, value + valueHolder); } else { stemmedWords.Add(stemmedKey, value); } } sample.words.Clear(); sample.words = stemmedWords; }
public void Stem_WithBatchData_StemsAllWordsCorrectly(string unstemmed, string expected) { var stemmer = new EnglishPorter2Stemmer(); var stemmed = stemmer.Stem(unstemmed).Value; Assert.AreEqual(expected, stemmed); }
public PatternsFinder(string patternsFileNameJSON) { stemmer = new EnglishPorter2Stemmer(); _wordsTypes = new Dictionary <string, HashSet <string> >(); _patterns = new Dictionary <string, Dictionary <string, string> >(); _cutWords = new Dictionary <string, LinkedList <string> >(); _keyWords = new LinkedList <string>(); LoadWordsStructures(patternsFileNameJSON); }
public void MarkVowelsAsConsonants_WithInitialY_MarksYAsConsonant() { const string word = "youth"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.MarkYsAsConsonants(word); // Assert Assert.AreEqual("Youth", actual); }
public void MarkVowelsAsConsonants_WithYBetweenTwoVowels_MarksYAsConsonant() { const string word = "boyish"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.MarkYsAsConsonants(word); // Assert Assert.AreEqual("boYish", actual); }
public void MarkVowelsAsConsonants_WithVowelOnlyFollowingY_DoesNotMarkYAsConsonant() { const string word = "flying"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.MarkYsAsConsonants(word); // Assert Assert.AreEqual("flying", actual); }
public void MarkVowelsAsConsonants_WithNoVowelsButY_DoesNotMarkAnyYAsConsonant() { const string word = "syzygy"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.MarkYsAsConsonants(word); // Assert Assert.AreEqual("syzygy", actual); }
public void MarkVowelsAsConsonants_WithDoubledY_MarksFirstButNotSecondYAsConsonant() { const string word = "sayyid"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.MarkYsAsConsonants(word); // Assert Assert.AreEqual("saYyid", actual); }
public void RemoveLySuffixes_EndingInInglyAndAtProceedsThat_ReplacesSuffixWithE() { const string word = "luxuriated"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1BRemoveLySuffixes(word, stemmer.GetRegion1(word)); // Assert Assert.AreEqual("luxuriate", actual); }
public void RemoveSPluralSuffix_WithWordEndingInApostropheSApostrophe_RemovesSuffix() { const string word = "holy's'"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step0RemoveSPluralSuffix(word); // Assert Assert.AreEqual("holy", actual); }
public void RemoveOtherSPluralSuffix_EndingInSAndContainingAVowelRightBeforeAndEarlierInWord_DeletesTheS() { const string word = "kiwis"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("kiwi", actual); }
public void RemoveOtherSPluralSuffix_WithShortWordEndingInIed_ReplaceWithIe() { const string word = "tied"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("tie", actual); }
public void RemoveLySuffixes_EndingInEedAndInR1_ReplacesSuffixWithEe() { const string word = "inbreed"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1BRemoveLySuffixes(word, stemmer.GetRegion1(word)); // Assert Assert.AreEqual("inbree", actual); }
public void RemoveOtherSPluralSuffix_EndingInUs_LeavesWordAlone() { const string word = "consensus"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("consensus", actual); }
public void RemoveLySuffixes_EndingInIngAndIsShortWord_ReplacesSuffixWithE() { const string word = "hoping"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1BRemoveLySuffixes(word, stemmer.GetRegion1(word)); // Assert Assert.AreEqual("hope", actual); }
public void ReplaceYSuffix_NotPreceededyConsonant_DoesNotReplaceSuffix() { const string word = "say"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1CReplaceSuffixYWithIIfPreceededWithConsonant(word); // Assert Assert.AreEqual("say", actual); }
public void RemoveLySuffixes_EndingInIngAndDoubledLetterProceedsThat_RemovesDoubledLetter() { const string word = "hopping"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1BRemoveLySuffixes(word, stemmer.GetRegion1(word)); // Assert Assert.AreEqual("hop", actual); }
public void ReplaceYSuffix_PreceededByConsonant_ReplacesSuffixWithI() { const string word = "cry"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1CReplaceSuffixYWithIIfPreceededWithConsonant(word); // Assert Assert.AreEqual("cri", actual); }
public void RemoveOtherSPluralSuffix_WithLongWordEndingInIes_ReplaceWithI() { const string word = "cries"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("cri", actual); }
public void RemoveLySuffixes_EndingInEdAndDoesNotContainVowel_LeavesWord() { const string word = "fred"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1BRemoveLySuffixes(word, stemmer.GetRegion1(word)); // Assert Assert.AreEqual("fred", actual); }
public void EndInShortSyllable_TestingDisturb_IsCountedAsShort() { // Arrange const string word = "disturb"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.EndsInShortSyllable(word); // Assert Assert.IsFalse(actual); }
public void GetRegion2_WithWordContainingRegion1AndRegion2_ProvidesCorrectRangeForRegion2() { // Arrange const string word = "beautiful"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.GetRegion2(word); // Assert Assert.AreEqual(7, actual); }
public void EndInShortSyllable_TestingEntrap_IsCountedAsShort() { // Arrange const string word = "entrap"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.EndsInShortSyllable(word); // Assert Assert.IsTrue(actual); }
public void EndInShortSyllable_TestingUproot_IsNotCountedAsShort() { // Arrange const string word = "uproot"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.EndsInShortSyllable(word); // Assert Assert.IsFalse(actual); }
public void GetRegion2_WithWordContainingOnlyRegion1_ProvidesRangeWithLength0() { // Arrange const string word = "beauty"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.GetRegion2(word); // Assert Assert.AreEqual(0, actual - word.Length); }
public void IsShortWord_TestingBeds_IsNotCountedAsShort() { // Arrange const string word = "beds"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.IsShortWord(word); // Assert Assert.IsFalse(actual); }
public void IsShortWord_TestingShred_IsCountedAsShort() { // Arrange const string word = "shred"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.IsShortWord(word); // Assert Assert.IsTrue(actual); }
public void GetRegion1_WithWordContainingOnlyRegion1_ProvidesCorrectRangeForRegion1() { // Arrange const string word = "beauty"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.GetRegion1(word); // Assert Assert.AreEqual(5, actual); }
public void EndInShortSyllable_TestingOn_IsCountedAsShort() { // Arrange const string word = "on"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.EndsInShortSyllable(word); // Assert Assert.IsTrue(actual); }
public void Stem_WithBatchData_StemsAllWordsCorrectly() { // Arrange var stemmer = new EnglishPorter2Stemmer(); var row = TestContext.DataRow; var unstemmed = row[0].ToString(); var expected = row[1].ToString(); // Act var stemmed = stemmer.Stem(unstemmed).Value; // Asssert Assert.AreEqual(expected, stemmed); }
public static string NormalizeIndexWord(string input) { if (input == null) { return(null); } var result = FullWidthCharToHalfWidthChar(input); result = RemoveSpecialCharacters(result); result = new EnglishPorter2Stemmer().Stem(result).Value; result = result.ToLower(); return(result); }
public string stemming(string webcontent) { //the code for stemming was already implemented online. I just added the project in my solution. EnglishPorter2Stemmer stem = new EnglishPorter2Stemmer(); string[] words = webcontent.Split(' '); string stemmedwords = ""; foreach (var word in words) { stemmedwords = stemmedwords + " " + stem.Stem(word).Value; } return(stemmedwords); }
public void RemoveOtherSPluralSuffix_EndingInSAndContainingAVowelRightBefore_LeavesTheS() { const string word = "gas"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("gas", actual); }
public void RemoveOtherSPluralSuffix_WithWordEndingInSses_ReplaceWithSs() { const string word = "assesses"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("assess", actual); }
public void RemoveSPluralSuffix_WithWordEndingInApostrophe_RemovesSuffix() { const string word = "holy'"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step0RemoveSPluralSuffix(word); // Assert Assert.AreEqual("holy", actual); }
public void ReplaceYSuffix_PreceededByConsonantAsFirstLetterOfWord_DoesNotReplaceSuffix() { const string word = "by"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1CReplaceSuffixYWithIIfPreceededWithConsonant(word); // Assert Assert.AreEqual("by", actual); }
public void MarkVowelsAsConsonants_WithYAfterConsonant_DoesNotMarkYAsConsonant() { const string word = "fly"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.MarkYsAsConsonants(word); // Assert Assert.AreEqual("fly", actual); }
public void RemoveOtherSPluralSuffix_EndingInSAndContainingAVowelEarlierInWord_DeletesTheS() { const string word = "gaps"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1ARemoveOtherSPluralSuffixes(word); // Assert Assert.AreEqual("gap", actual); }
public void RemoveLySuffixes_EndingInEedlyAndInR1_ReplacesSuffixWithEe() { const string word = "inbreedly"; var stemmer = new EnglishPorter2Stemmer(); // Act var actual = stemmer.Step1BRemoveLySuffixes(word, stemmer.GetRegion1(word)); // Assert Assert.AreEqual("inbree", actual); }