//return the gradient of -log{P(y*|x,w)} as follows: E_{P(y|x)}(F(x,y)) - F(x,y*)
        virtual public double getGrad(List <double> vecGrad, model m, dataSeq x, baseHashSet <int> idSet)
        {
            if (idSet != null)
            {
                idSet.Clear();
            }
            int nTag = m.NTag;
            //compute beliefs
            belief bel       = new belief(x.Count, nTag);
            belief belMasked = new belief(x.Count, nTag);

            _inf.getBeliefs(bel, m, x, false);
            _inf.getBeliefs(belMasked, m, x, true);
            double ZGold = belMasked.Z;
            double Z     = bel.Z;

            List <featureTemp> fList;

            for (int i = 0; i < x.Count; i++)
            {
                fList = _fGene.getFeatureTemp(x, i);
                for (int j = 0; j < fList.Count; j++)
                {
                    featureTemp im = fList[j];
                    int         id = im.id;
                    double      v  = im.val;
                    for (int s = 0; s < nTag; s++)
                    {
                        int f = _fGene.getNodeFeatID(id, s);
                        if (idSet != null)
                        {
                            idSet.Add(f);
                        }
                        vecGrad[f] += bel.belState[i][s] * v;
                        vecGrad[f] -= belMasked.belState[i][s] * v;
                    }
                }
            }

            for (int i = 1; i < x.Count; i++)
            {
                for (int s = 0; s < nTag; s++)
                {
                    for (int sPre = 0; sPre < nTag; sPre++)
                    {
                        int f = _fGene.getEdgeFeatID(sPre, s);
                        if (idSet != null)
                        {
                            idSet.Add(f);
                        }
                        vecGrad[f] += bel.belEdge[i][sPre, s];
                        vecGrad[f] -= belMasked.belEdge[i][sPre, s];
                    }
                }
            }
            return(Z - ZGold);
        }
 public void weightUpdate(List <double> w, List <double> grad, baseHashSet <int> idSet, double rs)
 {
     foreach (int i in idSet)
     {
         //minus the gradient to find the minumum point
         w[i] -= rs * grad[i];
         //reset
         grad[i] = 0;
     }
 }
Beispiel #3
0
        public static List <double> getFscore(List <string> goldTagList, List <string> resTagList, List <double> infoList)
        {
            List <double> scoreList = new List <double>();

            if (resTagList.Count != goldTagList.Count)
            {
                throw new Exception("error");
            }

            //convert original tags to 3 tags: B(x), I, O
            getNewTagList(Global.chunkTagMap, ref goldTagList);
            getNewTagList(Global.chunkTagMap, ref resTagList);
            List <string> goldChunkList = getChunks(goldTagList);
            List <string> resChunkList  = getChunks(resTagList);

            int gold_chunk = 0, res_chunk = 0, correct_chunk = 0;

            for (int i = 0; i < goldChunkList.Count; i++)
            {
                string   res          = resChunkList[i];
                string   gold         = goldChunkList[i];
                string[] resChunkAry  = res.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries);
                string[] goldChunkAry = gold.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries);
                gold_chunk += goldChunkAry.Length;
                res_chunk  += resChunkAry.Length;
                baseHashSet <string> goldChunkSet = new baseHashSet <string>();
                foreach (string im in goldChunkAry)
                {
                    goldChunkSet.Add(im);
                }

                foreach (string im in resChunkAry)
                {
                    if (goldChunkSet.Contains(im))
                    {
                        correct_chunk++;
                    }
                }
            }
            double pre = (double)correct_chunk / (double)res_chunk * 100;
            double rec = (double)correct_chunk / (double)gold_chunk * 100;
            double f1  = 2 * pre * rec / (pre + rec);

            scoreList.Add(f1);
            scoreList.Add(pre);
            scoreList.Add(rec);

            infoList.Add(gold_chunk);
            infoList.Add(res_chunk);
            infoList.Add(correct_chunk);
            return(scoreList);
        }
        public double sgd_lazyReg()
        {
            List <double> w     = _model.W;
            int           fsize = w.Count;
            int           xsize = _X.Count;

            double[]      ary  = new double[fsize];
            List <double> grad = new List <double>(ary);

            List <int> ri = randomTool <int> .getShuffledIndexList(xsize);

            double error = 0;
            double r_k   = 0;

            for (int t = 0; t < xsize; t++)
            {
                int               ii   = ri[t];
                dataSeq           x    = _X[ii];
                baseHashSet <int> fset = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD(grad, _model, x, fset);
                error += err;

                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize);
                if (Global.countWithIter % (xsize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                foreach (int i in fset)
                {
                    w[i] -= r_k * grad[i];
                    //reset
                    grad[i] = 0;
                }
                Global.countWithIter++;
            }

            if (Global.reg != 0)
            {
                for (int i = 0; i < fsize; i++)
                {
                    double grad_i = w[i] / (Global.reg * Global.reg);
                    w[i] -= r_k * grad_i;
                }

                double sum = listTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            return(error);
        }
Beispiel #5
0
        public double getGrad_SGD(List <double> g, model m, dataSeq x, baseHashSet <int> idset)
        {
            if (idset != null)
            {
                idset.Clear();
            }

            if (x == null)
            {
                return(0);
            }

            return(getGradCRF(g, m, x, idset));
        }
Beispiel #6
0
        //the mini-batch version
        public double getGrad_SGD_miniBatch(List <double> g, model m, List <dataSeq> X, baseHashSet <int> idset)
        {
            if (idset != null)
            {
                idset.Clear();
            }
            double error = 0;

            foreach (dataSeq x in X)
            {
                baseHashSet <int> idset2 = new baseHashSet <int>();

                error += getGradCRF(g, m, x, idset2);

                if (idset != null)
                {
                    foreach (int i in idset2)
                    {
                        idset.Add(i);
                    }
                }
            }
            return(error);
        }
Beispiel #7
0
        public void getMaps(string file)
        {
            if (!File.Exists(file))
            {
                Console.WriteLine("file {0} no exist!", file);
                return;
            }
            Console.WriteLine("file {0} converting...", file);
            StreamReader sr = new StreamReader(file);

            baseHashMap <string, int> featureFreqMap = new baseHashMap <string, int>();
            baseHashSet <string>      tagSet         = new baseHashSet <string>();

            //get feature-freq info and tagset
            int nFeatTemp = 0;

            while (!sr.EndOfStream)
            {
                string line = sr.ReadLine();
                line = line.Replace("\t", " ");
                line = line.Replace("\r", "");

                if (line == "")
                {
                    continue;
                }

                string[] ary = line.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries);
                nFeatTemp = ary.Length - 2;
                for (int i = 1; i < ary.Length - 1; i++)
                {
                    if (ary[i] == "/")//no feature here
                    {
                        continue;
                    }
                    string[] ary2    = ary[i].Split(Global.slashAry, StringSplitOptions.RemoveEmptyEntries);//for real-value features
                    string   feature = i.ToString() + "." + ary2[0];
                    if (featureFreqMap.ContainsKey(feature) == false)
                    {
                        featureFreqMap[feature] = 1;
                    }
                    else
                    {
                        featureFreqMap[feature]++;
                    }
                }

                string tag = ary[ary.Length - 1];
                tagSet.Add(tag);
            }

            //sort features
            List <string> sortList = new List <string>();

            foreach (baseHashMap <string, int> .KeyValuePair kv in featureFreqMap)
            {
                sortList.Add(kv.Key + " " + kv.Value);
            }
            if (Global.regMode == "GL")//sort based on feature templates
            {
                sortList.Sort(listSortFunc.compareKV_key);
                //sortList.Reverse();

                Global.groupStart = new List <int>();
                Global.groupEnd   = new List <int>();
                Global.groupStart.Add(0);
                for (int k = 1; k < sortList.Count; k++)
                {
                    string[] thisAry = sortList[k].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries);
                    string[] preAry = sortList[k - 1].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries);
                    string   str = thisAry[0], preStr = preAry[0];
                    if (str != preStr)
                    {
                        Global.groupStart.Add(k);
                        Global.groupEnd.Add(k);
                    }
                }
                Global.groupEnd.Add(sortList.Count);
            }
            else//sort based on feature frequency
            {
                sortList.Sort(listSortFunc.compareKV_value);//sort feature based on freq, for 1)compress .txt file 2)better edge features
                sortList.Reverse();
            }

            if (Global.regMode == "GL")
            {
                if (nFeatTemp != Global.groupStart.Count)
                {
                    throw new Exception("inconsistent # of features per line, check the feature file for consistency!");
                }
            }

            //feature index should begin from 0
            StreamWriter swFeat = new StreamWriter("featureIndex.txt");

            for (int i = 0; i < sortList.Count; i++)
            {
                string[] ary = sortList[i].Split(Global.blankAry);
                featureIndexMap[ary[0]] = i;
                swFeat.WriteLine("{0} {1}", ary[0], i);
            }
            swFeat.Close();

            //label index should begin from 0
            StreamWriter  swTag       = new StreamWriter("tagIndex.txt");
            List <string> tagSortList = new List <string>();

            foreach (string tag in tagSet)
            {
                tagSortList.Add(tag);
            }
            tagSortList.Sort();//sort tags
            for (int i = 0; i < tagSortList.Count; i++)
            {
                tagIndexMap[tagSortList[i]] = i;
                swTag.WriteLine("{0} {1}", tagSortList[i], i);
            }
            swTag.Close();

            sr.Close();
        }
Beispiel #8
0
        //ADF training
        public double adf()
        {
            float[]       w                = _model.W;
            int           fsize            = w.Length;
            int           xsize            = _X.Count;
            List <double> grad             = new List <double>(new double[fsize]);
            double        error            = 0;
            List <int>    featureCountList = new List <int>(new int[fsize]);
            List <int>    ri               = randomTool <int> .getShuffledIndexList(xsize);//random shuffle of training samples

            Global.interval = xsize / Global.nUpdate;
            int nSample = 0;//#sample in an update interval

            for (int t = 0; t < xsize; t += Global.miniBatch)
            {
                List <dataSeq> XX  = new List <dataSeq>();
                bool           end = false;
                for (int k = t; k < t + Global.miniBatch; k++)
                {
                    int     i = ri[k];
                    dataSeq x = _X[i];
                    XX.Add(x);
                    if (k == xsize - 1)
                    {
                        end = true;
                        break;
                    }
                }
                int mbSize = XX.Count;
                nSample += mbSize;
                baseHashSet <int> fSet = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fSet);
                error += err;

                foreach (int i in fSet)
                {
                    featureCountList[i]++;
                }

                bool check = false;
                for (int k = t; k < t + Global.miniBatch; k++)
                {
                    if (t != 0 && k % Global.interval == 0)
                    {
                        check = true;
                    }
                }
                //update decay rates
                if (check || end)
                {
                    for (int i = 0; i < fsize; i++)
                    {
                        int    v   = featureCountList[i];
                        double u   = (double)v / (double)nSample;
                        double eta = Global.upper - (Global.upper - Global.lower) * u;
                        Global.decayList[i] *= eta;
                    }
                    //reset
                    for (int i = 0; i < featureCountList.Count; i++)
                    {
                        featureCountList[i] = 0;
                    }
                }
                //update weights
                foreach (int i in fSet)
                {
                    w[i] -= (float)(Global.decayList[i] * grad[i]);
                    //reset
                    grad[i] = 0;
                }
                //reg
                if (check || end)
                {
                    if (Global.reg != 0)
                    {
                        for (int i = 0; i < fsize; i++)
                        {
                            double grad_i = w[i] / (Global.reg * Global.reg) * ((double)nSample / (double)xsize);
                            w[i] -= (float)(Global.decayList[i] * grad_i);
                        }
                    }
                    //reset
                    nSample = 0;
                }
                Global.countWithIter += mbSize;
            }

            if (Global.reg != 0)
            {
                double sum = arrayTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            return(error);
        }
Beispiel #9
0
        public double sgd_exactReg()
        {
            double scalar = 1, scalarOld = 1;

            float[] w      = _model.W;
            int     fsize  = w.Length;
            int     xsize  = _X.Count;
            double  newReg = Global.reg * Math.Sqrt(xsize);
            double  oldReg = Global.reg;

            Global.reg = newReg;

            double[]      tmpAry = new double[fsize];
            List <double> grad   = new List <double>(tmpAry);

            List <int> ri = randomTool <int> .getShuffledIndexList(xsize);

            double error = 0;
            double r_k   = 0;

            for (int t = 0; t < xsize; t++)
            {
                int               ii   = ri[t];
                dataSeq           x    = _X[ii];
                baseHashSet <int> fset = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD(grad, scalar, _model, x, fset);
                error += err;
                //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1
                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize);
                if (Global.countWithIter % (xsize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                //reg
                if (t % Global.scalarResetStep == 0)
                {
                    //reset
                    for (int i = 0; i < fsize; i++)
                    {
                        w[i] *= (float)scalar;
                    }
                    scalar = scalarOld = 1;
                }
                else
                {
                    scalarOld = scalar;
                    scalar   *= 1 - r_k / (Global.reg * Global.reg);
                }

                foreach (int i in fset)
                {
                    double realWeight = w[i] * scalarOld;
                    double grad_i     = grad[i] + realWeight / (Global.reg * Global.reg);
                    realWeight = realWeight - r_k * grad_i;
                    w[i]       = (float)(realWeight / scalar);

                    //reset
                    grad[i] = 0;
                }
                Global.countWithIter++;
            }

            //recover the real weights
            for (int i = 0; i < fsize; i++)
            {
                w[i] *= (float)scalar;
            }

            if (Global.reg != 0.0)
            {
                double sum = arrayTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            Global.reg  = oldReg;
            return(error);
        }
Beispiel #10
0
        //SGD with lazy reg
        public double sgd_lazyReg()
        {
            float[] w     = _model.W;
            int     fsize = w.Length;
            int     xsize = _X.Count;

            double[]      ary  = new double[fsize];
            List <double> grad = new List <double>(ary);

            List <int> ri = randomTool <int> .getShuffledIndexList(xsize);

            double error = 0;
            double r_k   = 0;

            for (int t = 0; t < xsize; t += Global.miniBatch)
            {
                List <dataSeq> XX = new List <dataSeq>();
                for (int k = t; k < t + Global.miniBatch; k++)
                {
                    int     i = ri[k];
                    dataSeq x = _X[i];
                    XX.Add(x);
                    if (k == xsize - 1)
                    {
                        break;
                    }
                }
                int mbSize             = XX.Count;
                baseHashSet <int> fset = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fset);
                error += err;

                //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1
                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize);

                if (Global.countWithIter % (xsize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                foreach (int i in fset)
                {
                    //because dgrad[i] is the grad of -log(obj), minus the gradient to find the minumum point
                    w[i] -= (float)(r_k * grad[i]);
                    //reset
                    grad[i] = 0;
                }
                Global.countWithIter += mbSize;
            }

            if (Global.reg != 0)
            {
                for (int i = 0; i < fsize; i++)
                {
                    double grad_i = w[i] / (Global.reg * Global.reg);
                    w[i] -= (float)(r_k * grad_i);
                }

                double sum = arrayTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            return(error);
        }
Beispiel #11
0
        //the scalar version
        virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet)
        {
            idSet.Clear();
            int nTag = m.NTag;
            //compute beliefs
            belief bel       = new belief(x.Count, nTag);
            belief belMasked = new belief(x.Count, nTag);

            _inf.getBeliefs(bel, m, x, scalar, false);
            _inf.getBeliefs(belMasked, m, x, scalar, true);
            double ZGold = belMasked.Z;
            double Z     = bel.Z;

            List <featureTemp> fList;

            //Loop over nodes to compute features and update the gradient
            for (int i = 0; i < x.Count; i++)
            {
                fList = _fGene.getFeatureTemp(x, i);
                foreach (featureTemp im in fList)
                {
                    for (int s = 0; s < nTag; s++)
                    {
                        int f = _fGene.getNodeFeatID(im.id, s);
                        idSet.Add(f);

                        vecGrad[f] += bel.belState[i][s] * im.val;
                        vecGrad[f] -= belMasked.belState[i][s] * im.val;
                    }
                }
            }

            //Loop over edges to compute features and update the gradient
            for (int i = 1; i < x.Count; i++)
            {
                for (int s = 0; s < nTag; s++)
                {
                    for (int sPre = 0; sPre < nTag; sPre++)
                    {
                        int f = _fGene.getEdgeFeatID(sPre, s);
                        idSet.Add(f);

                        vecGrad[f] += bel.belEdge[i][sPre, s];
                        vecGrad[f] -= belMasked.belEdge[i][sPre, s];
                    }
                }
            }
            return(Z - ZGold);//-log{P(y*|x,w)}
        }
Beispiel #12
0
 //the scalar version
 public double getGrad_SGD(List <double> g, double scalar, model m, dataSeq x, baseHashSet <int> idset)
 {
     return(getGradCRF(g, scalar, m, x, idset));
 }
Beispiel #13
0
        public static void loadData_multi(List <dataSet> XList, dataSet X, List <dataSet> XXList)
        {
            XList.Clear();
            XXList.Clear();
            //load train data
            baseHashSet <int> checkSet = new baseHashSet <int>();

            for (int i = 0; i < Global.nTask; i++)
            {
                string  dat_i = i.ToString() + Global.fFeatureTrain;
                string  tag_i = i.ToString() + Global.fGoldTrain;
                dataSet Xi    = new dataSet(dat_i, tag_i);
                dataSizeScale(Xi);
                checkSet.Add(Xi.NFeatureTemp);
                XList.Add(Xi);
            }
            if (checkSet.Count > 1)
            {
                throw new Exception("inconsistent features among multi tasks!");
            }

            //make nTag consistent among different tasks
            int maxNTag = 0;

            foreach (dataSet Xi in XList)
            {
                if (maxNTag < Xi.NTag)
                {
                    maxNTag = Xi.NTag;
                }
            }
            for (int i = 0; i < Global.nTask; i++)
            {
                XList[i].NTag = maxNTag;
            }

            //add to merged data
            X.NTag         = XList[0].NTag;
            X.NFeatureTemp = XList[0].NFeatureTemp;
            foreach (dataSet Xi in XList)
            {
                foreach (dataSeq im in Xi)
                {
                    X.Add(im);
                }
            }
            Global.swLog.WriteLine("data sizes (1, ..., T):");
            for (int i = 0; i < Global.nTask; i++)
            {
                dataSet Xi = XList[i];
                Global.swLog.WriteLine(" " + Xi.Count.ToString());
            }
            Global.swLog.WriteLine();

            //load test data
            for (int i = 0; i < Global.nTask; i++)
            {
                string  dat_i = i.ToString() + Global.fFeatureTest;
                string  tag_i = i.ToString() + Global.fGoldTest;
                dataSet Xtest = new dataSet(dat_i, tag_i);
                XXList.Add(Xtest);
            }
            for (int i = 0; i < Global.nTask; i++)
            {
                XXList[i].NTag = maxNTag;
            }
        }
Beispiel #14
0
        override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet)
        {
            if (idSet != null)
            {
                idSet.Clear();
            }
            int nTag = m.NTag;
            //compute beliefs
            belief bel       = new belief(x.Count, nTag);
            belief belMasked = new belief(x.Count, nTag);
            //store the YY and Y
            List <dMatrix>        YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>();
            List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >();

            _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist);
            _inf.getBeliefs(bel, m, x, YYlist, Ylist);
            _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist);
            double ZGold = belMasked.Z;
            double Z = bel.Z;

            List <featureTemp> fList;

            //Loop over nodes to compute features and update the gradient
            for (int i = 0; i < x.Count; i++)
            {
                fList = _fGene.getFeatureTemp(x, i);
                foreach (featureTemp im in fList)
                {
                    for (int s = 0; s < nTag; s++)
                    {
                        int f = _fGene.getNodeFeatID(im.id, s);
                        if (idSet != null)
                        {
                            idSet.Add(f);
                        }

                        gradList[f] += bel.belState[i][s] * im.val;
                        gradList[f] -= belMasked.belState[i][s] * im.val;
                    }
                }
            }

            //Loop over edges to compute features and update the gradient
            for (int i = 1; i < x.Count; i++)
            {
                //non-rich
                if (Global.useTraditionalEdge)
                {
                    for (int s = 0; s < nTag; s++)
                    {
                        for (int sPre = 0; sPre < nTag; sPre++)
                        {
                            int f = _fGene.getEdgeFeatID(sPre, s);
                            if (idSet != null)
                            {
                                idSet.Add(f);
                            }

                            gradList[f] += bel.belEdge[i][sPre, s];
                            gradList[f] -= belMasked.belEdge[i][sPre, s];
                        }
                    }
                }

                //rich
                fList = _fGene.getFeatureTemp(x, i);
                foreach (featureTemp im in fList)
                {
                    int id = im.id;
                    if (id < _fGene.getNRichFeatTemp())
                    {
                        for (int s = 0; s < nTag; s++)
                        {
                            for (int sPre = 0; sPre < nTag; sPre++)
                            {
                                int f = _fGene.getEdgeFeatID(id, sPre, s);
                                if (idSet != null)
                                {
                                    idSet.Add(f);
                                }

                                gradList[f] += bel.belEdge[i][sPre, s] * im.val;
                                gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val;
                            }
                        }
                    }
                }
            }
            return(Z - ZGold);//-log{P(y*|x,w)}
        }
Beispiel #15
0
        //for training
        public featureGenerator(dataSet X)
        {
            _nFeatureTemp = X.NFeature;
            int ft_richEdge = (int)(X.NFeature * Global.edgeReduce);

            _nTag  = X.NTag;
            _nEdge = _nTag * _nTag;
            Global.swLog.WriteLine("feature templates: {0}", _nFeatureTemp);

            //build feature mapping etc. information
            //baseHashMap<string, int> strIntMap = new baseHashMap<string, int>(_nFeatureTemp * _nTag, 0.65,2);
            baseHashSet <int>[] setAry = new baseHashSet <int> [_nFeatureTemp];
            for (int i = 0; i < setAry.Length; i++)
            {
                setAry[i] = new baseHashSet <int>();
            }
            List <nodeFeature>[] idNodeFeatures  = new List <nodeFeature> [_nFeatureTemp];
            List <edgeFeature>[] idEdgeFeatures  = new List <edgeFeature> [_nFeatureTemp];
            List <edgeFeature>[] idEdgeFeatures2 = new List <edgeFeature> [_nFeatureTemp];
            for (int i = 0; i < _nFeatureTemp; i++)
            {
                idNodeFeatures[i]  = new List <nodeFeature>();
                idEdgeFeatures[i]  = new List <edgeFeature>();
                idEdgeFeatures2[i] = new List <edgeFeature>();
            }
            int fIndex = _nEdge;//start from this

            int factor = 10000, factor2 = 100000;

            if (Global.negFeatureMode == "node")//neg features for node features
            {
                for (int id = 0; id < _nFeatureTemp; id++)
                {
                    for (int tag = 0; tag < _nTag; tag++)
                    {
                        //node feature
                        int mark = tag;
                        if (!setAry[id].Contains(mark))
                        {
                            int fid = fIndex;
                            setAry[id].Add(mark);
                            fIndex++;

                            nodeFeature feat = new nodeFeature(tag, fid);
                            idNodeFeatures[id].Add(feat);
                        }
                    }
                }
            }
            else if (Global.negFeatureMode == "edge")//neg features for node & edge features
            {
                //s2 case
                for (int id = 0; id < _nFeatureTemp; id++)
                {
                    for (int tag = 0; tag < _nTag; tag++)
                    {
                        //node feature
                        int mark = tag;
                        if (!setAry[id].Contains(mark))
                        {
                            int fid = fIndex;
                            setAry[id].Add(mark);
                            fIndex++;

                            nodeFeature feat = new nodeFeature(tag, fid);
                            idNodeFeatures[id].Add(feat);
                        }
                    }
                }

                //neg rich edge feature
                for (int id = 0; id < _nFeatureTemp; id++)
                {
                    //rich edge here, non-rich edge feature is already coded before
                    if (id < ft_richEdge)//pruning rich edge features, id relates to frequency of features
                    {
                        for (int random = 0; random < Global.nNegEdgeFeat; random++)
                        {
                            int tag = randomTool.getOneRandom_int(0, _nTag), preTag = randomTool.getOneRandom_int(0, _nTag);
                            int mark = tag * factor + preTag;
                            if (!setAry[id].Contains(mark))
                            {
                                int fid = fIndex;
                                setAry[id].Add(mark);
                                fIndex++;

                                edgeFeature feat = new edgeFeature(tag, preTag, fid);
                                idEdgeFeatures[id].Add(feat);
                            }
                        }

                        //rich2
                        if (Global.richFeat2)
                        {
                            for (int random = 0; random < Global.nNegEdgeFeat; random++)
                            {
                                int tag = randomTool.getOneRandom_int(0, _nTag), preTag = randomTool.getOneRandom_int(0, _nTag);
                                int mark = tag * factor2 + preTag;
                                if (!setAry[id].Contains(mark))
                                {
                                    int fid = fIndex;
                                    setAry[id].Add(mark);
                                    fIndex++;

                                    edgeFeature feat = new edgeFeature(tag, preTag, fid);
                                    idEdgeFeatures2[id].Add(feat);
                                }
                            }
                        }
                    }
                }
            }
            else if (Global.negFeatureMode == "full")//full negative features for node features & edge features
            {
                //s2 case
                for (int id = 0; id < _nFeatureTemp; id++)
                {
                    for (int tag = 0; tag < _nTag; tag++)
                    {
                        //node feature
                        int mark = tag;
                        if (!setAry[id].Contains(mark))
                        {
                            int fid = fIndex;
                            setAry[id].Add(mark);
                            fIndex++;

                            nodeFeature feat = new nodeFeature(tag, fid);
                            idNodeFeatures[id].Add(feat);
                        }
                    }
                }

                //neg rich edge feature
                for (int id = 0; id < _nFeatureTemp; id++)
                {
                    //rich edge here, non-rich edge feature is already coded before
                    if (id < ft_richEdge)//pruning rich edge features, id relates to frequency of features
                    {
                        for (int tag = 0; tag < _nTag; tag++)
                        {
                            for (int preTag = 0; preTag < _nTag; preTag++)
                            {
                                int mark = tag * factor + preTag;
                                if (!setAry[id].Contains(mark))
                                {
                                    int fid = fIndex;
                                    setAry[id].Add(mark);
                                    fIndex++;

                                    edgeFeature feat = new edgeFeature(tag, preTag, fid);
                                    idEdgeFeatures[id].Add(feat);
                                }
                            }
                        }

                        //rich2
                        if (Global.richFeat2)
                        {
                            for (int tag = 0; tag < _nTag; tag++)
                            {
                                for (int preTag = 0; preTag < _nTag; preTag++)
                                {
                                    int mark = tag * factor2 + preTag;
                                    if (!setAry[id].Contains(mark))
                                    {
                                        int fid = fIndex;
                                        setAry[id].Add(mark);
                                        fIndex++;

                                        edgeFeature feat = new edgeFeature(tag, preTag, fid);
                                        idEdgeFeatures2[id].Add(feat);
                                    }
                                }
                            }
                        }
                    }
                }
            }

            //true features
            foreach (dataSeq x in X)
            {
                for (int i = 0; i < x.Count; i++)
                {
                    List <featureTemp> fList = getFeatureTemp(x, i);
                    int tag = x.getTags(i);
                    foreach (featureTemp im in fList)
                    {
                        int id = im.id;
                        //node feature
                        int mark = tag;
                        if (!setAry[id].Contains(mark))
                        {
                            int fid = fIndex;
                            setAry[id].Add(mark);
                            fIndex++;

                            nodeFeature feat = new nodeFeature(tag, fid);
                            idNodeFeatures[id].Add(feat);
                        }
                        //rich edge here, non-rich edge feature is already coded before
                        if (i > 0 && id < ft_richEdge)//pruning rich edge features, id relates to frequency of features
                        {
                            int preTag = x.getTags(i - 1);
                            mark = tag * factor + preTag;
                            if (!setAry[id].Contains(mark))
                            {
                                int fid = fIndex;
                                setAry[id].Add(mark);
                                fIndex++;

                                edgeFeature feat = new edgeFeature(tag, preTag, fid);
                                idEdgeFeatures[id].Add(feat);
                            }
                        }

                        //rich2 feature
                        if (Global.richFeat2)
                        {
                            if (i < x.Count - 1 && id < ft_richEdge)//pruning rich edge features, id relates to frequency of features
                            {
                                int postTag = x.getTags(i + 1);
                                mark = tag * factor2 + postTag;
                                if (!setAry[id].Contains(mark))
                                {
                                    int fid = fIndex;
                                    setAry[id].Add(mark);
                                    fIndex++;

                                    edgeFeature feat = new edgeFeature(postTag, tag, fid);
                                    idEdgeFeatures2[id].Add(feat);
                                }
                            }
                        }
                    }
                }
            }

            //build globals
            Global.idNodeFeatures  = new nodeFeature[_nFeatureTemp][];
            Global.idEdgeFeatures  = new edgeFeature[_nFeatureTemp][];
            Global.idEdgeFeatures2 = new edgeFeature[_nFeatureTemp][];
            for (int i = 0; i < _nFeatureTemp; i++)
            {
                Global.idNodeFeatures[i]  = idNodeFeatures[i].ToArray();
                Global.idEdgeFeatures[i]  = idEdgeFeatures[i].ToArray();
                Global.idEdgeFeatures2[i] = idEdgeFeatures2[i].ToArray();
            }

            _nCompleteFeature = fIndex;

            Global.swLog.WriteLine("feature templates & rich-edge feature templates: {0}, {1}", _nFeatureTemp, ft_richEdge);
            //Global.swLog.WriteLine("nNodeFeature, nEdgeFeature1, nEdgeFeature2: {0}, {1}, {2}", nNodeFeature, nEdgeFeature1, nEdgeFeature2);
            Global.swLog.WriteLine("complete features: {0}", _nCompleteFeature);
            Global.swLog.WriteLine();
            Global.swLog.Flush();

            setAry          = null;
            idNodeFeatures  = null;
            idEdgeFeatures  = null;
            idEdgeFeatures2 = null;
            GC.Collect();//should set null before memo collect
        }
        //fast multi-task learning via approximation
        public double sgd_multi_fast()
        {
            int        fsize    = (_modelList[0]).W.Count;
            List <int> sizeList = new List <int>();
            int        maxSize  = 0;

            for (int i = 0; i < _newXList.Count; i++)
            {
                dataSet Xi   = _newXList[i];
                int     size = Xi.Count;
                sizeList.Add(size);
                if (maxSize < size)
                {
                    maxSize = size;
                }
            }

            double        error   = 0;
            double        r_k     = 0;
            List <double> vecGrad = new List <double>(new double[fsize]);

            List <List <int> > riList = new List <List <int> >();

            for (int i = 0; i < _newXList.Count; i++)
            {
                int        size = sizeList[i];
                List <int> ri   = randomTool <int> .getShuffledIndexList(size);

                riList.Add(ri);
            }

            for (int t = 0; t < maxSize; t++)
            {
                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)maxSize);
                if (Global.countWithIter % (maxSize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                List <dataSeq> X = new List <dataSeq>();
                for (int i = 0; i < _newXList.Count; i++)
                {
                    dataSet    Xi   = _newXList[i];
                    List <int> ri   = riList[i];
                    int        size = sizeList[i];
                    int        idx  = ri[t % size];
                    dataSeq    x    = Xi[idx];
                    X.Add(x);
                }

                baseHashSet <int> fset = new baseHashSet <int>();
                for (int i = 0; i < Global.nTask; i++)
                {
                    for (int j = 0; j < Global.nTask; j++)
                    {
                        if (i == j)
                        {
                            model         m   = _modelList[i];
                            List <double> w   = m.W;
                            dataSeq       x   = X[j];
                            double        err = _grad.getGrad_SGD(vecGrad, m, x, fset);
                            weightUpdate(w, vecGrad, fset, r_k);

                            error += err;
                        }
                        else if (t % Global.sampleFactor == 0)//probabilistic sampling for faster speed
                        {
                            model         m    = _modelList[i];
                            List <double> w    = m.W;
                            dataSeq       x    = X[j];
                            double        err  = _grad.getGrad_SGD(vecGrad, m, x, fset);
                            double        simi = _simiBiAry[i, j];
                            weightUpdate(w, vecGrad, fset, r_k * simi * Global.sampleFactor);
                        }
                    }
                }

                Global.countWithIter++;
            }

            //reg
            for (int i = 0; i < Global.nTask; i++)
            {
                error += reg(_modelList[i], fsize, r_k);
            }

            //update the similarity biAry
            if (Global.glbIter == Global.simiUpdateIter)
            {
                if (Global.simiMode == "cov")
                {
                    updateSimi_covariance(_modelList);
                }
                else if (Global.simiMode == "poly")
                {
                    updateSimi_polynomial(_modelList);
                }
                else if (Global.simiMode == "rbf")
                {
                    updateSimi_RBF(_modelList);
                }
                Console.WriteLine("updated simi-matrix!");
            }
            return(error);
        }