Example #1
0
        //convert number-based list to letter-based list
        static void getNewTagList(baseHashMap <int, string> tagMap, ref List <string> tagList)
        {
            List <string> tmpList = new List <string>();

            foreach (string im in tagList)
            {
                string[] tagAry = im.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries);

                for (int i = 0; i < tagAry.Length; i++)
                {
                    int index = int.Parse(tagAry[i]);
                    if (!tagMap.ContainsKey(index))
                    {
                        throw new Exception("error");
                    }
                    tagAry[i] = tagMap[index];
                }
                string newTags = string.Join(",", tagAry);
                tmpList.Add(newTags);
            }
            tagList.Clear();
            foreach (string im in tmpList)
            {
                tagList.Add(im);
            }
        }
Example #2
0
        public void getMaps(string file)
        {
            if (!File.Exists(file))
            {
                Console.WriteLine("file {0} no exist!", file);
                return;
            }
            Console.WriteLine("file {0} converting...", file);
            StreamReader sr = new StreamReader(file);

            baseHashMap <string, int> featureFreqMap = new baseHashMap <string, int>();
            baseHashSet <string>      tagSet         = new baseHashSet <string>();

            //get feature-freq info and tagset
            int nFeatTemp = 0;

            while (!sr.EndOfStream)
            {
                string line = sr.ReadLine();
                line = line.Replace("\t", " ");
                line = line.Replace("\r", "");

                if (line == "")
                {
                    continue;
                }

                string[] ary = line.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries);
                nFeatTemp = ary.Length - 2;
                for (int i = 1; i < ary.Length - 1; i++)
                {
                    if (ary[i] == "/")//no feature here
                    {
                        continue;
                    }
                    string[] ary2    = ary[i].Split(Global.slashAry, StringSplitOptions.RemoveEmptyEntries);//for real-value features
                    string   feature = i.ToString() + "." + ary2[0];
                    if (featureFreqMap.ContainsKey(feature) == false)
                    {
                        featureFreqMap[feature] = 1;
                    }
                    else
                    {
                        featureFreqMap[feature]++;
                    }
                }

                string tag = ary[ary.Length - 1];
                tagSet.Add(tag);
            }

            //sort features
            List <string> sortList = new List <string>();

            foreach (baseHashMap <string, int> .KeyValuePair kv in featureFreqMap)
            {
                sortList.Add(kv.Key + " " + kv.Value);
            }
            if (Global.regMode == "GL")//sort based on feature templates
            {
                sortList.Sort(listSortFunc.compareKV_key);
                //sortList.Reverse();

                Global.groupStart = new List <int>();
                Global.groupEnd   = new List <int>();
                Global.groupStart.Add(0);
                for (int k = 1; k < sortList.Count; k++)
                {
                    string[] thisAry = sortList[k].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries);
                    string[] preAry = sortList[k - 1].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries);
                    string   str = thisAry[0], preStr = preAry[0];
                    if (str != preStr)
                    {
                        Global.groupStart.Add(k);
                        Global.groupEnd.Add(k);
                    }
                }
                Global.groupEnd.Add(sortList.Count);
            }
            else//sort based on feature frequency
            {
                sortList.Sort(listSortFunc.compareKV_value);//sort feature based on freq, for 1)compress .txt file 2)better edge features
                sortList.Reverse();
            }

            if (Global.regMode == "GL")
            {
                if (nFeatTemp != Global.groupStart.Count)
                {
                    throw new Exception("inconsistent # of features per line, check the feature file for consistency!");
                }
            }

            //feature index should begin from 0
            StreamWriter swFeat = new StreamWriter("featureIndex.txt");

            for (int i = 0; i < sortList.Count; i++)
            {
                string[] ary = sortList[i].Split(Global.blankAry);
                featureIndexMap[ary[0]] = i;
                swFeat.WriteLine("{0} {1}", ary[0], i);
            }
            swFeat.Close();

            //label index should begin from 0
            StreamWriter  swTag       = new StreamWriter("tagIndex.txt");
            List <string> tagSortList = new List <string>();

            foreach (string tag in tagSet)
            {
                tagSortList.Add(tag);
            }
            tagSortList.Sort();//sort tags
            for (int i = 0; i < tagSortList.Count; i++)
            {
                tagIndexMap[tagSortList[i]] = i;
                swTag.WriteLine("{0} {1}", tagSortList[i], i);
            }
            swTag.Close();

            sr.Close();
        }
Example #3
0
        //for mira
        void updateWeights(dataSeq x, List <int> outStates, List <int> goldStates, float[] w, float[] accumW, int nSamples, int k, double diff)
        {
            float t = nSamples - k;

            //get a_t = F(y*) - F(y)
            baseHashMap <int, double> a = new baseHashMap <int, double>();

            for (int n = 0; n < x.Count; n++)
            {
                int outState             = outStates[n];
                int goldState            = goldStates[n];
                List <featureTemp> fList = _fGene.getFeatureTemp(x, n);

                //node feature
                foreach (featureTemp im in fList)
                {
                    double fv = im.val;
                    foreach (nodeFeature feat in Global.idNodeFeatures[im.id])
                    {
                        int s = feat._s;
                        int f = feat._id;

                        if (s == outState)
                        {
                            a[f] -= fv;
                        }
                        if (s == goldState)
                        {
                            a[f] += fv;
                        }
                    }
                }

                //edge feature
                if (n > 0)
                {
                    //non-rich
                    if (Global.useTraditionalEdge)
                    {
                        int f = _fGene.getEdgeFeatID(outStates[n - 1], outState);
                        a[f]--;

                        f = _fGene.getEdgeFeatID(goldStates[n - 1], goldState);
                        a[f]++;
                    }

                    //rich
                    foreach (featureTemp im in fList)
                    {
                        double fv = im.val;
                        foreach (edgeFeature feat in Global.idEdgeFeatures[im.id])
                        {
                            int s    = feat._s;
                            int sPre = feat._sPre;
                            int f    = feat._id;

                            if (sPre == outStates[n - 1] && s == outState)
                            {
                                a[f] -= fv;
                            }
                            if (sPre == goldStates[n - 1] && s == goldState)
                            {
                                a[f] += fv;
                            }
                        }
                    }

                    //rich2
                    if (Global.richFeat2)
                    {
                        fList = _fGene.getFeatureTemp(x, n - 1);
                        foreach (featureTemp im in fList)
                        {
                            double fv = im.val;
                            foreach (edgeFeature feat in Global.idEdgeFeatures2[im.id])
                            {
                                int s    = feat._s;
                                int sPre = feat._sPre;
                                int f    = feat._id;

                                if (sPre == outStates[n - 1] && s == outState)
                                {
                                    a[f] -= fv;
                                }
                                if (sPre == goldStates[n - 1] && s == goldState)
                                {
                                    a[f] += fv;
                                }
                            }
                        }
                    }
                }
            }

            //compute w*a, ||a||^2
            double wa = 0, norm = 0;

            foreach (baseHashMap <int, double> .KeyValuePair kv in a)
            {
                wa   += w[kv.Key] * kv.Value;
                norm += kv.Value * kv.Value;
            }

            //compute the scalar
            double scale = (Math.Sqrt(diff) - wa) / norm;

            //compute w_{t+1}
            foreach (baseHashMap <int, double> .KeyValuePair kv in a)
            {
                int   f   = kv.Key;
                float val = (float)(scale * kv.Value);
                w[f]      += val;
                accumW[f] += t * val;
            }
        }