예제 #1
0
        /// <summary>
        /// convert input string to letter-n-gram sequence, each word is a letter-n-gram vector
        /// </summary>
        /// <param name="s">input string</param>
        /// <param name="v">vocab</param>
        /// <param name="N">ngram</param>
        /// <param name="nMaxLength">max length</param>
        /// <returns></returns>
        /// <summary>
        /// convert input string to letter-n-gram sequence, each word is a letter-n-gram vector
        /// </summary>
        /// <param name="s">input string</param>
        /// <param name="v">vocab</param>
        /// <param name="N">ngram</param>
        /// <param name="nMaxLength">max length</param>
        /// <returns></returns>
        public static List <Dictionary <string, double> > String2FeatStrSeq(string s, int N, int nMaxLength, FeatureType feaType)
        {
            List <Dictionary <string, double> > rgWfs = new List <Dictionary <string, double> >();

            string[] rgw = TokenizeToArray(s);

            for (int i = 0; i < Math.Min(rgw.Length, nMaxLength - 1); ++i)
            {
                switch (feaType)
                {
                case FeatureType.l3g:
                {
                    rgWfs.Add(String2L3g(rgw[i], N));
                    break;
                }

                case FeatureType.root:
                {
                    RootModel rootModelIns = RootModel.getInstance();
                    rgWfs.Add(String2Root(rgw[i], rootModelIns.dicWord2Roots));
                    break;
                }

                case FeatureType.infl:
                {
                    InflModel inflModelIns = InflModel.getInstance();
                    rgWfs.Add(String2Root(rgw[i], inflModelIns.dicInfl2Ori));
                    break;
                }
                }
            }

            Dictionary <string, double> dict = new Dictionary <string, double>();

            for (int i = nMaxLength - 1; i < rgw.Length; ++i)
            {
                Dictionary <string, double> tmp_dict = null;
                switch (feaType)
                {
                case FeatureType.l3g:
                {
                    tmp_dict = String2L3g(rgw[i], N);
                    break;
                }

                case FeatureType.root:
                {
                    RootModel rootModelIns = RootModel.getInstance();
                    tmp_dict = String2Root(rgw[i], rootModelIns.dicWord2Roots);
                    break;
                }

                case FeatureType.infl:
                {
                    InflModel inflModelIns = InflModel.getInstance();
                    tmp_dict = String2Root(rgw[i], inflModelIns.dicInfl2Ori);
                    break;
                }
                }

                foreach (KeyValuePair <string, double> kv in tmp_dict)
                {
                    if (dict.ContainsKey(kv.Key))
                    {
                        dict[kv.Key] += kv.Value;
                    }
                    else
                    {
                        dict.Add(kv.Key, kv.Value);
                    }
                }
            }
            if (dict.Count > 0)
            {
                rgWfs.Add(dict);
            }

            return(rgWfs);
        }