public static void ExtractSuffixFeatureFromSingleTokenAndUpdateItemFeatures(Dictionary<string, double> item, string tokenKey, int ngramLength) { string ngramVal = GetWordSuffix(tokenKey, ngramLength); if (!string.IsNullOrWhiteSpace(ngramVal)) { item.IncreaseFeatureFrequency(string.Format("nsuff{0}_{1}", ngramLength, ngramVal), 1); } }
public static void ExtractCharNgramFeaturesFromSingleTokenAndUpdateItemFeatures(Dictionary<string, double> item, string tokenKey, int ngramLength) { List<string> ngramValues = GetCharNgramsFromWord(tokenKey, ngramLength); if (ngramValues != null) { foreach (var ngramVal in ngramValues) { if (!string.IsNullOrWhiteSpace(ngramVal)) { item.IncreaseFeatureFrequency(string.Format("ngram{0}_{1}", ngramLength, ngramVal), 1); } } } }
public static void ExtractWord2gramFeaturesFromTextTokensAndUpdateItemFeatures(Dictionary<string, double> item, List<string> commentTokens) { string prefix = "word2gram"; for (int i = 0; i < commentTokens.Count - 2; i++) { string ngramToken = string.Format("{0}_{1}_{2}", prefix, commentTokens[i], commentTokens[i + 1]); item.IncreaseFeatureFrequency(ngramToken, 1); } }
public static string ExtractStemFeatureFromSingleTokenAndUpdateItemFeatures(Stemmer stemmer, Dictionary<string, double> item, string tokenKey) { tokenKey = stemmer.Stem(tokenKey); item.IncreaseFeatureFrequency("stem_" + tokenKey, 1); return tokenKey; }
public static void ExtractBagOfWordFeatureFromSingleTokenAndUpdateItemFeatures(Dictionary<string, double> item, string tokenKey) { item.IncreaseFeatureFrequency(string.Format("bow_{0}", tokenKey), 1); }
public static void ExtractWordNGramFeaturesFromTextTokensAndUpdateItemFeatures(Dictionary<string, double> item, List<string> commentTokens, int ngramLength) { string prefix = string.Format("word{0}gram", ngramLength); if (commentTokens.Count < ngramLength) { return; } for (int i = 0; i < commentTokens.Count - ngramLength; i++) { StringBuilder sbNgramToken = new StringBuilder(); sbNgramToken.AppendFormat("{0}_{1}", prefix, commentTokens[i]); for (int j = 1; j < ngramLength; j++) { sbNgramToken.AppendFormat("_{0}", commentTokens[i + j]); } item.IncreaseFeatureFrequency(sbNgramToken.ToString(), 1); } }