public static InflModel getInstance() { if (ins == null) { ins = new InflModel(); } return(ins); }
public static List <Dictionary <int, double> > StrFreq2IdFreq(List <Dictionary <string, double> > strFeqList, FeatureType featureType, int pos, ref int count) { Dictionary <string, int> dic = new Dictionary <string, int>(); switch (featureType) { case FeatureType.root: { RootModel ins = RootModel.getInstance(); dic = ins.dic; count = dic.Count; break; } case FeatureType.infl: { InflModel ins = InflModel.getInstance(); dic = ins.dic; count = dic.Count; break; } } List <Dictionary <int, double> > ans = new List <Dictionary <int, double> >(strFeqList.Count); int id; foreach (var inpDict in strFeqList) { Dictionary <int, double> dict = new Dictionary <int, double>(); foreach (var kvp in inpDict) { if (dic.ContainsKey(kvp.Key) && (id = dic[kvp.Key]) >= 0) { dict[id + pos] = kvp.Value; } } ans.Add(dict); } return(ans); }
/// <summary> /// convert input string to letter-n-gram sequence, each word is a letter-n-gram vector /// </summary> /// <param name="s">input string</param> /// <param name="v">vocab</param> /// <param name="N">ngram</param> /// <param name="nMaxLength">max length</param> /// <returns></returns> /// <summary> /// convert input string to letter-n-gram sequence, each word is a letter-n-gram vector /// </summary> /// <param name="s">input string</param> /// <param name="v">vocab</param> /// <param name="N">ngram</param> /// <param name="nMaxLength">max length</param> /// <returns></returns> public static List <Dictionary <string, double> > String2FeatStrSeq(string s, int N, int nMaxLength, FeatureType feaType) { List <Dictionary <string, double> > rgWfs = new List <Dictionary <string, double> >(); string[] rgw = TokenizeToArray(s); for (int i = 0; i < Math.Min(rgw.Length, nMaxLength - 1); ++i) { switch (feaType) { case FeatureType.l3g: { rgWfs.Add(String2L3g(rgw[i], N)); break; } case FeatureType.root: { RootModel rootModelIns = RootModel.getInstance(); rgWfs.Add(String2Root(rgw[i], rootModelIns.dicWord2Roots)); break; } case FeatureType.infl: { InflModel inflModelIns = InflModel.getInstance(); rgWfs.Add(String2Root(rgw[i], inflModelIns.dicInfl2Ori)); break; } } } Dictionary <string, double> dict = new Dictionary <string, double>(); for (int i = nMaxLength - 1; i < rgw.Length; ++i) { Dictionary <string, double> tmp_dict = null; switch (feaType) { case FeatureType.l3g: { tmp_dict = String2L3g(rgw[i], N); break; } case FeatureType.root: { RootModel rootModelIns = RootModel.getInstance(); tmp_dict = String2Root(rgw[i], rootModelIns.dicWord2Roots); break; } case FeatureType.infl: { InflModel inflModelIns = InflModel.getInstance(); tmp_dict = String2Root(rgw[i], inflModelIns.dicInfl2Ori); break; } } foreach (KeyValuePair <string, double> kv in tmp_dict) { if (dict.ContainsKey(kv.Key)) { dict[kv.Key] += kv.Value; } else { dict.Add(kv.Key, kv.Value); } } } if (dict.Count > 0) { rgWfs.Add(dict); } return(rgWfs); }