/// <summary> /// convert input string to letter-n-gram sequence, each word is a letter-n-gram vector /// </summary> /// <param name="s">input string</param> /// <param name="v">vocab</param> /// <param name="N">ngram</param> /// <param name="nMaxLength">max length</param> /// <returns></returns> /// <summary> /// convert input string to letter-n-gram sequence, each word is a letter-n-gram vector /// </summary> /// <param name="s">input string</param> /// <param name="v">vocab</param> /// <param name="N">ngram</param> /// <param name="nMaxLength">max length</param> /// <returns></returns> public static List <Dictionary <string, double> > String2FeatStrSeq(string s, int N, int nMaxLength, FeatureType feaType) { List <Dictionary <string, double> > rgWfs = new List <Dictionary <string, double> >(); string[] rgw = TokenizeToArray(s); for (int i = 0; i < Math.Min(rgw.Length, nMaxLength - 1); ++i) { switch (feaType) { case FeatureType.l3g: { rgWfs.Add(String2L3g(rgw[i], N)); break; } case FeatureType.root: { RootModel rootModelIns = RootModel.getInstance(); rgWfs.Add(String2Root(rgw[i], rootModelIns.dicWord2Roots)); break; } case FeatureType.infl: { InflModel inflModelIns = InflModel.getInstance(); rgWfs.Add(String2Root(rgw[i], inflModelIns.dicInfl2Ori)); break; } } } Dictionary <string, double> dict = new Dictionary <string, double>(); for (int i = nMaxLength - 1; i < rgw.Length; ++i) { Dictionary <string, double> tmp_dict = null; switch (feaType) { case FeatureType.l3g: { tmp_dict = String2L3g(rgw[i], N); break; } case FeatureType.root: { RootModel rootModelIns = RootModel.getInstance(); tmp_dict = String2Root(rgw[i], rootModelIns.dicWord2Roots); break; } case FeatureType.infl: { InflModel inflModelIns = InflModel.getInstance(); tmp_dict = String2Root(rgw[i], inflModelIns.dicInfl2Ori); break; } } foreach (KeyValuePair <string, double> kv in tmp_dict) { if (dict.ContainsKey(kv.Key)) { dict[kv.Key] += kv.Value; } else { dict.Add(kv.Key, kv.Value); } } } if (dict.Count > 0) { rgWfs.Add(dict); } return(rgWfs); }