Exemplo n.º 1
0
        //convert number-based list to letter-based list
        static void getNewTagList(baseHashMap <int, string> tagMap, ref List <string> tagList)
        {
            List <string> tmpList = new List <string>();

            foreach (string im in tagList)
            {
                string[] tagAry = im.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries);

                for (int i = 0; i < tagAry.Length; i++)
                {
                    int index = int.Parse(tagAry[i]);
                    if (!tagMap.ContainsKey(index))
                    {
                        throw new Exception("error");
                    }
                    tagAry[i] = tagMap[index];
                }
                string newTags = string.Join(",", tagAry);
                tmpList.Add(newTags);
            }
            tagList.Clear();
            foreach (string im in tmpList)
            {
                tagList.Add(im);
            }
        }
Exemplo n.º 2
0
        //public void getMaps(string file)
        //{


        //    StreamReader swFeat = new StreamReader(Global.modelDir + "/featureIndex.txt");
        //    string line = "";
        //    int i = 0;
        //    while ((line = swFeat.ReadLine()) != null)
        //    {
        //        string[] strs = line.Split(' ');

        //        featureIndexMap[strs[0]] = i;
        //        i += 1;
        //    }

        //    swFeat.Close();

        //    StreamReader swTag = new StreamReader(Global.modelDir + "/tagIndex.txt");
        //    line = "";
        //    i = 0;
        //    while ((line = swTag.ReadLine()) != null)
        //    {
        //        string[] strs = line.Split(' ');

        //        tagIndexMap[strs[0]] = i;
        //        i += 1;
        //    }

        //    swTag.Close();

        //}



        //for small memory load, should read line by line
        public void convertFile(string file)
        {
            if (!File.Exists(file))
            {
                Console.WriteLine("file {0} no exist!", file);
                return;
            }
            //Console.WriteLine("file {0} converting...", file);
            Console.WriteLine("file converting...");
            StreamReader sr = new StreamReader(file);

            //convert to files of new format
            StreamWriter swFeature, swGold;

            if (file == Global.fTrain)
            {
                swFeature = new StreamWriter(Global.fFeatureTrain);
                swGold    = new StreamWriter(Global.fGoldTrain);
            }
            else
            {
                swFeature = new StreamWriter(Global.fFeatureTest);
                swGold    = new StreamWriter(Global.fGoldTest);
            }


            swFeature.WriteLine(featureIndexMap.Count);
            swFeature.WriteLine();
            swGold.WriteLine(tagIndexMap.Count);
            swGold.WriteLine();

            while (!sr.EndOfStream)
            {
                string line = sr.ReadLine();
                line = line.Replace("\t", " ");
                line = line.Replace("\r", "\r");
                if (line == "")//end of a sample
                {
                    swFeature.WriteLine();
                    swGold.WriteLine();
                    swGold.WriteLine();
                    continue;
                }
                int      flag = 0;
                string[] ary  = line.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries);
                for (int i = 1; i < ary.Length - 1; i++)
                {
                    if (ary[i] == "/")//no feature here
                    {
                        continue;
                    }
                    string[] ary2    = ary[i].Split(Global.slashAry, StringSplitOptions.RemoveEmptyEntries);//for real-value features
                    string   feature = i.ToString() + "." + ary2[0];
                    string   value   = "";
                    bool     real    = false;
                    if (ary2.Length > 1)
                    {
                        value = ary2[1];
                        real  = true;
                    }

                    if (featureIndexMap.ContainsKey(feature) == false)
                    {
                        continue;
                    }
                    flag = 1;
                    int fIndex = featureIndexMap[feature];
                    if (!real)
                    {
                        swFeature.Write("{0},", fIndex);
                    }
                    else
                    {
                        swFeature.Write("{0}/{1},", fIndex, value);
                    }
                }
                if (flag == 0)
                {
                    swFeature.Write("0");
                }
                swFeature.WriteLine();

                string tag    = ary[ary.Length - 1];
                int    tIndex = tagIndexMap[tag];
                swGold.Write("{0},", tIndex);
            }

            sr.Close();
            swFeature.Close();
            swGold.Close();
        }