Esempio n. 1
0
        //convert number-based list to letter-based list
        static void getNewTagList(baseHashMap <int, string> tagMap, ref List <string> tagList)
        {
            List <string> tmpList = new List <string>();

            foreach (string im in tagList)
            {
                string[] tagAry = im.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries);

                for (int i = 0; i < tagAry.Length; i++)
                {
                    int index = int.Parse(tagAry[i]);
                    if (!tagMap.ContainsKey(index))
                    {
                        throw new Exception("error");
                    }
                    tagAry[i] = tagMap[index];
                }
                string newTags = string.Join(",", tagAry);
                tmpList.Add(newTags);
            }
            tagList.Clear();
            foreach (string im in tmpList)
            {
                tagList.Add(im);
            }
        }
Esempio n. 2
0
        public void getMaps(string file)
        {
            if (!File.Exists(file))
            {
                Console.WriteLine("file {0} no exist!", file);
                return;
            }
            Console.WriteLine("file {0} converting...", file);
            StreamReader sr = new StreamReader(file);

            baseHashMap <string, int> featureFreqMap = new baseHashMap <string, int>();
            baseHashSet <string>      tagSet         = new baseHashSet <string>();

            //get feature-freq info and tagset
            int nFeatTemp = 0;

            while (!sr.EndOfStream)
            {
                string line = sr.ReadLine();
                line = line.Replace("\t", " ");
                line = line.Replace("\r", "");

                if (line == "")
                {
                    continue;
                }

                string[] ary = line.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries);
                nFeatTemp = ary.Length - 2;
                for (int i = 1; i < ary.Length - 1; i++)
                {
                    if (ary[i] == "/")//no feature here
                    {
                        continue;
                    }

                    if (Global.weightRegMode == "GL")
                    {
                        if (Global.GL_init == false && Global.groupTrim[i - 1])//this feature is removed in GL 1st step
                        {
                            continue;
                        }
                    }

                    string[] ary2    = ary[i].Split(Global.slashAry, StringSplitOptions.RemoveEmptyEntries);//for real-value features
                    string   feature = i.ToString() + "." + ary2[0];
                    featureFreqMap[feature]++;
                }

                string tag = ary[ary.Length - 1];
                tagSet.Add(tag);
            }

            //sort features
            List <string> sortList = new List <string>();

            foreach (baseHashMap <string, int> .KeyValuePair kv in featureFreqMap)
            {
                sortList.Add(kv.Key + " " + kv.Value);
            }
            if (Global.weightRegMode == "GL")//sort based on feature templates
            {
                sortList.Sort(listSortFunc.compareKV_key);
                //sortList.Reverse();

                StreamWriter sw = new StreamWriter("featureTemp_sorted.txt");
                foreach (string f in sortList)
                {
                    sw.WriteLine(f);
                }
                sw.Close();

                Global.groupStart = new List <int>();
                Global.groupEnd   = new List <int>();
                Global.groupStart.Add(0);
                for (int k = 1; k < sortList.Count; k++)
                {
                    string[] thisAry = sortList[k].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries);
                    string[] preAry = sortList[k - 1].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries);
                    string   str = thisAry[0], preStr = preAry[0];
                    if (str != preStr)
                    {
                        Global.groupStart.Add(k);
                        Global.groupEnd.Add(k);
                    }
                }
                Global.groupEnd.Add(sortList.Count);
            }
            else//sort based on feature frequency
            {
                sortList.Sort(listSortFunc.compareKV_value);//sort feature based on freq, for 1)compress .txt file 2)better edge features
                sortList.Reverse();
            }

            if (Global.weightRegMode == "GL" && Global.GL_init)
            {
                if (nFeatTemp != Global.groupStart.Count)
                {
                    throw new Exception("inconsistent # of features per line, check the feature file for consistency!");
                }
            }

            //feature index should begin from 0
            StreamWriter swFeat = new StreamWriter(Global.modelDir + "/featureIndex.txt");

            for (int i = 0; i < sortList.Count; i++)
            {
                string[] ary = sortList[i].Split(Global.blankAry);
                featureIndexMap[ary[0]] = i;
                swFeat.WriteLine("{0} {1}", ary[0].Trim(), i);
            }
            swFeat.Close();

            //label index should begin from 0
            StreamWriter  swTag       = new StreamWriter(Global.modelDir + "/tagIndex.txt");
            List <string> tagSortList = new List <string>();

            foreach (string tag in tagSet)
            {
                tagSortList.Add(tag);
            }
            tagSortList.Sort();//sort tags
            for (int i = 0; i < tagSortList.Count; i++)
            {
                tagIndexMap[tagSortList[i]] = i;
                swTag.WriteLine("{0} {1}", tagSortList[i], i);
            }
            swTag.Close();

            sr.Close();
        }