예제 #1
0
        public double decodeViterbi(model m, dataSeq x, List <int> tags)
        {
            tags.Clear();
            int     nNode = x.Count;
            int     nTag  = m.NTag;
            dMatrix YY    = new dMatrix(nTag, nTag);

            double[]      dAry  = new double[nTag];
            List <double> Y     = new List <double>(dAry);
            Viterbi       viter = new Viterbi(nNode, nTag);

            for (int i = 0; i < nNode; i++)
            {
                getLogYY(m, x, i, ref YY, ref Y, false, false);
                viter.setScores(i, Y, YY);
            }

            List <int> states = new List <int>();
            double     numer  = viter.runViterbi(ref states, false);

            for (int i = 0; i < states.Count; i++)
            {
                int tag = states[i];
                tags.Add(tag);
            }
            double Z = getZ(m, x, false);

            return(Math.Exp(numer - Z));
        }
예제 #2
0
        public double getZ(model m, dataSeq x, bool mask)
        {
            belief bel = new belief(x.Count, m.NTag);

            getBeliefs(bel, m, x, mask);
            return(bel.Z);
        }
예제 #3
0
        //fast viterbi decode without probability
        public void decodeViterbi_test(model m, dataSeq x, List <int> tags)
        {
            tags.Clear();

            int     nNode  = x.Count;
            int     nState = m.NState;
            dMatrix YY     = new dMatrix(nState, nState);

            double[]      dAry  = new double[nState];
            List <double> Y     = new List <double>(dAry);
            Viterbi       viter = new Viterbi(nNode, nState);

            for (int i = 0; i < nNode; i++)
            {
                getLogYY(m, x, i, ref YY, ref Y, false, false);
                viter.setScores(i, Y, YY);
            }

            List <int> states = new List <int>();
            double     numer  = viter.runViterbi(ref states, false);

            for (int i = 0; i < states.Count; i++)
            {
                int tag = m.hStateToTag(states[i]);
                tags.Add(tag);
            }
        }
예제 #4
0
        public static dataSet structSplit(dataSet X)
        {
            //make fractions
            dataSet X2 = new dataSet(X.NTag, X.NFeature);

            for (int t = 0; t < X.Count; t++)
            {
                dataSeq x = X[t];

                if (Global.structReg && Global.miniSize != 0)
                {
                    /*int step = getStep();
                     * //if (x.Count > 4 * step)
                     * if (x.Count > 4 * step && Global.segStep.ToString().Contains(".") == false)//divide x to 2 segments, then do fine segments
                     * {
                     *  int rand = randomTool.getOneRandom_int(step, x.Count - step);
                     *  dataSeq x1 = new dataSeq(x, 0, rand);
                     *  dataSeq x2 = new dataSeq(x, rand, x.Count);
                     *  getSegments(x1, X2);
                     *  getSegments(x2, X2);
                     * }
                     * else*/
                    getSegments(x, X2);
                }
                else
                {
                    X2.Add(x);
                }
            }

            return(X2);
        }
예제 #5
0
        static void getSegments(dataSeq x, dataSet X2)
        {
            int rand = randomTool.getOneRandom_int(-100, 100);

            if (rand <= 0)//forward
            {
                for (int node = 0; node < x.Count;)
                {
                    int     step = getStep();
                    dataSeq x2   = new dataSeq(x, node, step + Global.overlapLength);
                    X2.Add(x2);
                    node += step;
                }
            }
            else//backward
            {
                for (int node = x.Count - 1; node >= 0;)
                {
                    int     step = getStep();
                    dataSeq x2   = new dataSeq(x, node, step + Global.overlapLength, false);
                    X2.Add(x2);
                    node -= step;
                }
            }
        }
예제 #6
0
        public void getYYandY(model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist)
        {
            int nNodes = x.Count;
            //int nTag = m.NTag;
            int nTag = m.NTag;

            double[] dAry = new double[nTag];
            bool     mask = false;

            try
            {
                //Global.rwlock.AcquireReaderLock(Global.readWaitTime);

                for (int i = 0; i < nNodes; i++)
                {
                    dMatrix       YYi = new dMatrix(nTag, nTag);
                    List <double> Yi  = new List <double>(dAry);
                    //compute the Mi matrix
                    getLogYY(m, x, i, ref YYi, ref Yi, false, mask);
                    YYlist.Add(YYi);
                    Ylist.Add(Yi);
                }

                //Global.rwlock.ReleaseReaderLock();
            }
            catch (ApplicationException)
            {
                Console.WriteLine("read out time!");
            }
        }
예제 #7
0
        public dataSeq(dataSeq x, int n, int length, bool forward = true)
        {
            int start = -1, end = -1;

            if (forward)//forward
            {
                start = n;
                if (n + length < x.Count)
                {
                    end = n + length;
                }
                else
                {
                    end = x.Count;
                }
            }
            else//backward
            {
                end = n + 1;
                if (end - length >= 0)
                {
                    start = end - length;
                }
                else
                {
                    start = 0;
                }
            }

            for (int i = start; i < end; i++)
            {
                featureTemps.Add(x.featureTemps[i]);
                yGold.Add(x.yGold[i]);
            }
        }
예제 #8
0
        public Lattice(model m, inference inf, dataSeq x)
        {
            _w = x.Count;
            _h = m.NTag;

            _logBel = new belief(_w, _h);

            List <dMatrix>        YYlist = new List <dMatrix>();
            List <List <double> > Ylist  = new List <List <double> >();

            inf.getYYandY(m, x, YYlist, Ylist);

            for (int i = 0; i < _w; i++)
            {
                _logBel.belState[i] = new List <double>(Ylist[i]);

                if (i > 0)
                {
                    _logBel.belEdge[i] = new dMatrix(YYlist[i]);
                }
            }

            _heuListList = new List <List <double> >();
            for (int i = 0; i < _w; i++)
            {
                _heuListList.Add(new List <double>(new double[_h]));
            }

            Viterbi _bwdViterbi = new Viterbi(_w, _h);

            for (int i = 0; i < _w; i++)
            {
                _bwdViterbi.setScores(i, Ylist[i], YYlist[i]);
            }
            List <int> tags = new List <int>();

            _bwdViterbi.runViterbi(ref tags);
            //update the viterbiHeuristicMap
            for (int i = 0; i < _w; i++)
            {
                for (int j = 0; j < _h; j++)
                {
                    double h = _bwdViterbi.getPathScore(i, j);
                    setHeuMap(i, j, h);
                }
            }

            //get zGold
            ZGold = 0;
            for (int i = 0; i < x.Count; i++)
            {
                int s = x.getTags(i);
                ZGold += Ylist[i][s];
                if (i > 0)
                {
                    int sPre = x.getTags(i - 1);
                    ZGold += YYlist[i][sPre, s];
                }
            }
        }
예제 #9
0
        void updateWeights(dataSeq x, List <int> outStates, List <int> goldStates, float[] w, float[] accumW, int xsize, int k)
        {
            for (int n = 0; n < x.Count; n++)
            {
                int outState  = outStates[n];
                int goldState = goldStates[n];

                //update the weights and accumulative weights
                foreach (featureTemp im in _fGene.getFeatureTemp(x, n))
                {
                    int   f  = _fGene.getNodeFeatID(im.id, outState);
                    float fv = (float)im.val;
                    w[f] -= fv;
                    float t = xsize - k;
                    accumW[f] -= t * fv;

                    f          = _fGene.getNodeFeatID(im.id, goldState);
                    w[f]      += fv;
                    accumW[f] += t * fv;
                }

                if (n > 0)
                {
                    int   f  = _fGene.getEdgeFeatID(outStates[n - 1], outState);
                    float fv = 1;
                    w[f] -= fv;
                    float t = xsize - k;
                    accumW[f] -= t * fv;

                    f          = _fGene.getEdgeFeatID(goldStates[n - 1], goldState);
                    w[f]      += fv;
                    accumW[f] += t * fv;
                }
            }
        }
예제 #10
0
        //return the gradient of -log{P(y*|x,w)} as follows: E_{P(y|x)}(F(x,y)) - F(x,y*)
        virtual public double getGrad(List <double> vecGrad, model m, dataSeq x, baseHashSet <int> idSet)
        {
            if (idSet != null)
            {
                idSet.Clear();
            }
            int nTag = m.NTag;
            //compute beliefs
            belief bel       = new belief(x.Count, nTag);
            belief belMasked = new belief(x.Count, nTag);

            _inf.getBeliefs(bel, m, x, false);
            _inf.getBeliefs(belMasked, m, x, true);
            double ZGold = belMasked.Z;
            double Z     = bel.Z;

            List <featureTemp> fList;

            for (int i = 0; i < x.Count; i++)
            {
                fList = _fGene.getFeatureTemp(x, i);
                for (int j = 0; j < fList.Count; j++)
                {
                    featureTemp im = fList[j];
                    int         id = im.id;
                    double      v  = im.val;
                    for (int s = 0; s < nTag; s++)
                    {
                        int f = _fGene.getNodeFeatID(id, s);
                        if (idSet != null)
                        {
                            idSet.Add(f);
                        }
                        vecGrad[f] += bel.belState[i][s] * v;
                        vecGrad[f] -= belMasked.belState[i][s] * v;
                    }
                }
            }

            for (int i = 1; i < x.Count; i++)
            {
                for (int s = 0; s < nTag; s++)
                {
                    for (int sPre = 0; sPre < nTag; sPre++)
                    {
                        int f = _fGene.getEdgeFeatID(sPre, s);
                        if (idSet != null)
                        {
                            idSet.Add(f);
                        }
                        vecGrad[f] += bel.belEdge[i][sPre, s];
                        vecGrad[f] -= belMasked.belEdge[i][sPre, s];
                    }
                }
            }
            return(Z - ZGold);
        }
        public double sgd_lazyReg()
        {
            List <double> w     = _model.W;
            int           fsize = w.Count;
            int           xsize = _X.Count;

            double[]      ary  = new double[fsize];
            List <double> grad = new List <double>(ary);

            List <int> ri = randomTool <int> .getShuffledIndexList(xsize);

            double error = 0;
            double r_k   = 0;

            for (int t = 0; t < xsize; t++)
            {
                int               ii   = ri[t];
                dataSeq           x    = _X[ii];
                baseHashSet <int> fset = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD(grad, _model, x, fset);
                error += err;

                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize);
                if (Global.countWithIter % (xsize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                foreach (int i in fset)
                {
                    w[i] -= r_k * grad[i];
                    //reset
                    grad[i] = 0;
                }
                Global.countWithIter++;
            }

            if (Global.reg != 0)
            {
                for (int i = 0; i < fsize; i++)
                {
                    double grad_i = w[i] / (Global.reg * Global.reg);
                    w[i] -= r_k * grad_i;
                }

                double sum = listTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            return(error);
        }
예제 #12
0
        //fast viterbi decode without probability
        public void decodeViterbi_train(model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist, List <int> tags)
        {
            int     nNode  = x.Count;
            int     nState = m.NState;
            Viterbi viter  = new Viterbi(nNode, nState);

            for (int i = 0; i < nNode; i++)
            {
                viter.setScores(i, Ylist[i], YYlist[i]);
            }

            double numer = viter.runViterbi(ref tags, false);
        }
예제 #13
0
        //the scalar version
        virtual public void getLogYY(double scalar, model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask)
        {
            YY.set(0);
            listTool.listSet(ref Y, 0);

            List <double>      w     = m.W;
            List <featureTemp> fList = _fGene.getFeatureTemp(x, i);
            int nTag = m.NTag;

            for (int j = 0; j < fList.Count; j++)
            {
                featureTemp ptr = fList[j];
                int         id  = ptr.id;
                double      v   = ptr.val;
                for (int s = 0; s < nTag; s++)
                {
                    int f = _fGene.getNodeFeatID(id, s);
                    Y[s] += w[f] * scalar * v;
                }
            }
            if (i > 0)
            {
                for (int s = 0; s < nTag; s++)
                {
                    for (int sPre = 0; sPre < nTag; sPre++)
                    {
                        int f = _fGene.getEdgeFeatID(sPre, s);
                        YY[sPre, s] += w[f] * scalar;
                    }
                }
            }
            double maskValue = double.MinValue;

            if (takeExp)
            {
                listTool.listExp(ref Y);
                YY.eltExp();
                maskValue = 0;
            }
            if (mask)
            {
                List <int> tagList = x.getTags();
                for (int s = 0; s < Y.Count; s++)
                {
                    if (tagList[i] != s)
                    {
                        Y[s] = maskValue;
                    }
                }
            }
        }
예제 #14
0
        override public void getLogYY(model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask)
        {
            YY.set(0);
            listTool.listSet(ref Y, 0);

            float[]            w     = m.W;
            List <featureTemp> fList = _fGene.getFeatureTemp(x, i);
            int nState = m.NState;

            foreach (featureTemp ft in fList)
            {
                for (int s = 0; s < nState; s++)
                {
                    int f = _fGene.getNodeFeatID(ft.id, s);
                    Y[s] += w[f] * ft.val;
                }
            }
            if (i > 0)
            {
                foreach (featureTemp im in fList)
                {
                    for (int s = 0; s < nState; s++)
                    {
                        for (int sPre = 0; sPre < nState; sPre++)
                        {
                            int f = _fGene.getEdgeFeatID(im.id, sPre, s);
                            YY[sPre, s] += w[f] * im.val;
                        }
                    }
                }
            }
            double maskValue = double.MinValue;

            if (takeExp)
            {
                listTool.listExp(ref Y);
                YY.eltExp();
                maskValue = 0;
            }
            if (mask)
            {
                dMatrix statesPerNodes = m.getStatesPerNode(x);
                for (int s = 0; s < Y.Count; s++)
                {
                    if (statesPerNodes[i, s] == 0)
                    {
                        Y[s] = maskValue;
                    }
                }
            }
        }
예제 #15
0
        public double getGrad_SGD(List <double> g, model m, dataSeq x, baseHashSet <int> idset)
        {
            if (idset != null)
            {
                idset.Clear();
            }

            if (x == null)
            {
                return(0);
            }

            return(getGradCRF(g, m, x, idset));
        }
예제 #16
0
        public void getYYandY(model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist, List <dMatrix> maskYYlist, List <List <double> > maskYlist)
        {
            int nNodes = x.Count;
            //int nTag = m.NTag;
            int nState = m.NState;

            double[] dAry = new double[nState];
            bool     mask = false;

            try
            {
                //Global.rwlock.AcquireReaderLock(Global.readWaitTime);

                for (int i = 0; i < nNodes; i++)
                {
                    dMatrix       YYi = new dMatrix(nState, nState);
                    List <double> Yi  = new List <double>(dAry);
                    //compute the Mi matrix
                    getLogYY(m, x, i, ref YYi, ref Yi, false, mask);
                    YYlist.Add(YYi);
                    Ylist.Add(Yi);

                    maskYYlist.Add(new dMatrix(YYi));
                    maskYlist.Add(new List <double>(Yi));
                }

                //Global.rwlock.ReleaseReaderLock();
            }
            catch (ApplicationException)
            {
                Console.WriteLine("read out time!");
            }

            //get the masked YY and Y
            double  maskValue      = double.MinValue;
            dMatrix statesPerNodes = m.getStatesPerNode(x);

            for (int i = 0; i < nNodes; i++)
            {
                List <double> Y       = maskYlist[i];
                List <int>    tagList = x.getTags();
                for (int s = 0; s < Y.Count; s++)
                {
                    if (statesPerNodes[i, s] == 0)
                    {
                        Y[s] = maskValue;
                    }
                }
            }
        }
예제 #17
0
        virtual public void getLogYY(model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask)
        {
            YY.set(0);
            listTool.listSet(ref Y, 0);

            float[]            w     = m.W;
            List <featureTemp> fList = _fGene.getFeatureTemp(x, i);
            int nTag = m.NTag;

            foreach (featureTemp ft in fList)
            {
                for (int s = 0; s < nTag; s++)
                {
                    int f = _fGene.getNodeFeatID(ft.id, s);
                    Y[s] += w[f] * ft.val;
                }
            }
            if (i > 0)
            {
                for (int s = 0; s < nTag; s++)
                {
                    for (int sPre = 0; sPre < nTag; sPre++)
                    {
                        int f = _fGene.getEdgeFeatID(sPre, s);
                        YY[sPre, s] += w[f];
                    }
                }
            }
            double maskValue = double.MinValue;

            if (takeExp)
            {
                listTool.listExp(ref Y);
                YY.eltExp();
                maskValue = 0;
            }
            if (mask)
            {
                List <int> tagList = x.getTags();
                for (int s = 0; s < Y.Count; s++)
                {
                    if (tagList[i] != s)
                    {
                        Y[s] = maskValue;
                    }
                }
            }
        }
예제 #18
0
        public double getNBest(model m, inference inf, dataSeq x, int N, ref List <List <int> > nBestTaggings, ref List <double> scores)
        {
            nBestTaggings.Clear();
            _w       = x.Count;
            _h       = m.NTag;
            _lattice = new Lattice(m, inf, x);
            setStartAndGoal(-1, 0, _w, 0);//a virtual begin node & a virtual end node

            for (int n = 0; n < N; n++)
            {
                List <int> tagging  = new List <int>();
                double     logNumer = searchForPath(ref tagging);
                if (logNumer == -2)//search fail
                {
                    break;
                }

                nBestTaggings.Add(tagging);
                scores.Add(logNumer);//log numerator

                double check = Math.Exp((scores[0] - scores[n]));
                if (check >= Global.stopSearchFactor)//20 times bigger then break
                {
                    break;
                }
            }

            double Z = logSum(scores);

            listTool.listAdd(ref scores, -Z);
            listTool.listExp(ref scores);//prob
            //error
            double error = Z - _lattice.ZGold;


            //update the profiler
            Global.nbestCount += scores.Count;
            Global.nbestNorm++;
            int small = scores.Count < 10 ? scores.Count : 10;

            for (int i = 0; i < small; i++)
            {
                Global.nbestProbList[i] += scores[i];
            }

            return(error);
        }
예제 #19
0
        public dataSeq(dataSeq x, int n, int length)
        {
            int end = 0;

            if (n + length < x.Count)
            {
                end = n + length;
            }
            else
            {
                end = x.Count;
            }
            for (int i = n; i < end; i++)
            {
                featureTemps.Add(x.featureTemps[i]);
                yGold.Add(x.yGold[i]);
            }
        }
예제 #20
0
        public dMatrix getStatesPerNode(dataSeq x)
        {
            int     n   = x.Count;
            dMatrix spn = x.GoldStatesPerNode;

            if (spn == null || spn.R == 0)
            {
                List <int> tList = x.getTags();
                spn = new dMatrix(tList.Count, _nTag);
                for (int i = 0; i < tList.Count; i++)
                {
                    int tag = tList[i];
                    spn[i, tag] = 1;
                }
                x.GoldStatesPerNode = spn;
            }
            return(spn);
        }
예제 #21
0
        public datasetList(string fileFeature, string fileTag)
        {
            StreamReader srfileFeature = new StreamReader(fileFeature);
            StreamReader srfileTag     = new StreamReader(fileTag);

            string txt = srfileFeature.ReadToEnd();

            txt = txt.Replace("\r", "");
            string[] fAry = txt.Split(Global.triLineEndAry, StringSplitOptions.RemoveEmptyEntries);

            txt = srfileTag.ReadToEnd();
            txt = txt.Replace("\r", "");
            string[] tAry = txt.Split(Global.triLineEndAry, StringSplitOptions.RemoveEmptyEntries);

            if (fAry.Length != tAry.Length)
            {
                throw new Exception("error");
            }

            _nFeature = int.Parse(fAry[0]);
            _nTag     = int.Parse(tAry[0]);

            for (int i = 1; i < fAry.Length; i++)
            {
                string   fBlock = fAry[i];
                string   tBlock = tAry[i];
                dataSet  ds     = new dataSet();
                string[] fbAry  = fBlock.Split(Global.biLineEndAry, StringSplitOptions.RemoveEmptyEntries);
                string[] lbAry  = tBlock.Split(Global.biLineEndAry, StringSplitOptions.RemoveEmptyEntries);

                for (int k = 0; k < fbAry.Length; k++)
                {
                    string  fm  = fbAry[k];
                    string  tm  = lbAry[k];
                    dataSeq seq = new dataSeq();
                    seq.read(fm, tm);
                    ds.Add(seq);
                }
                Add(ds);
            }
            srfileFeature.Close();
            srfileTag.Close();
        }
예제 #22
0
        //fast viterbi decode without probability
        public void decodeViterbi_test(model m, dataSeq x, List <int> tags)
        {
            tags.Clear();

            int     nNode = x.Count;
            int     nTag  = m.NTag;
            dMatrix YY    = new dMatrix(nTag, nTag);

            double[]      dAry  = new double[nTag];
            List <double> Y     = new List <double>(dAry);
            Viterbi       viter = new Viterbi(nNode, nTag);

            for (int i = 0; i < nNode; i++)
            {
                getLogYY(m, x, i, ref YY, ref Y, false, false);
                viter.setScores(i, Y, YY);
            }

            viter.runViterbi(ref tags);
        }
예제 #23
0
        public dMatrix getStatesPerNode(dataSeq x)
        {
            int     n   = x.Count;
            dMatrix spn = x.GoldStatesPerNode;

            if (spn == null || spn.R == 0)
            {
                List <int> tList = x.getTags();
                spn = new dMatrix(tList.Count, _nHiddenState);
                for (int i = 0; i < tList.Count; i++)
                {
                    int        tag    = tList[i];
                    List <int> states = tagToHiddenStates(tag);
                    foreach (int s in states)
                    {
                        spn[i, s] = 1;
                    }
                }
                x.GoldStatesPerNode = spn;
            }
            return(spn);
        }
예제 #24
0
 public dataSeqTest(dataSeq x, List <int> yOutput)
 {
     _x       = x;
     _yOutput = yOutput;
 }
예제 #25
0
 virtual public void getFeatures(dataSeq x, int node, ref List <List <int> > nodeFeature, ref int[,] edgeFeature)
 {
     throw new Exception("error");
 }
예제 #26
0
 public List <featureTemp> getFeatureTemp(dataSeq x, int node)
 {
     return(x.getFeatureTemp(node));
 }
예제 #27
0
        //ADF training
        public double adf()
        {
            float[]       w                = _model.W;
            int           fsize            = w.Length;
            int           xsize            = _X.Count;
            List <double> grad             = new List <double>(new double[fsize]);
            double        error            = 0;
            List <int>    featureCountList = new List <int>(new int[fsize]);
            List <int>    ri               = randomTool <int> .getShuffledIndexList(xsize);//random shuffle of training samples

            Global.interval = xsize / Global.nUpdate;
            int nSample = 0;//#sample in an update interval

            for (int t = 0; t < xsize; t += Global.miniBatch)
            {
                List <dataSeq> XX  = new List <dataSeq>();
                bool           end = false;
                for (int k = t; k < t + Global.miniBatch; k++)
                {
                    int     i = ri[k];
                    dataSeq x = _X[i];
                    XX.Add(x);
                    if (k == xsize - 1)
                    {
                        end = true;
                        break;
                    }
                }
                int mbSize = XX.Count;
                nSample += mbSize;
                baseHashSet <int> fSet = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fSet);
                error += err;

                foreach (int i in fSet)
                {
                    featureCountList[i]++;
                }

                bool check = false;
                for (int k = t; k < t + Global.miniBatch; k++)
                {
                    if (t != 0 && k % Global.interval == 0)
                    {
                        check = true;
                    }
                }
                //update decay rates
                if (check || end)
                {
                    for (int i = 0; i < fsize; i++)
                    {
                        int    v   = featureCountList[i];
                        double u   = (double)v / (double)nSample;
                        double eta = Global.upper - (Global.upper - Global.lower) * u;
                        Global.decayList[i] *= eta;
                    }
                    //reset
                    for (int i = 0; i < featureCountList.Count; i++)
                    {
                        featureCountList[i] = 0;
                    }
                }
                //update weights
                foreach (int i in fSet)
                {
                    w[i] -= (float)(Global.decayList[i] * grad[i]);
                    //reset
                    grad[i] = 0;
                }
                //reg
                if (check || end)
                {
                    if (Global.reg != 0)
                    {
                        for (int i = 0; i < fsize; i++)
                        {
                            double grad_i = w[i] / (Global.reg * Global.reg) * ((double)nSample / (double)xsize);
                            w[i] -= (float)(Global.decayList[i] * grad_i);
                        }
                    }
                    //reset
                    nSample = 0;
                }
                Global.countWithIter += mbSize;
            }

            if (Global.reg != 0)
            {
                double sum = arrayTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            return(error);
        }
예제 #28
0
        public double sgd_exactReg()
        {
            double scalar = 1, scalarOld = 1;

            float[] w      = _model.W;
            int     fsize  = w.Length;
            int     xsize  = _X.Count;
            double  newReg = Global.reg * Math.Sqrt(xsize);
            double  oldReg = Global.reg;

            Global.reg = newReg;

            double[]      tmpAry = new double[fsize];
            List <double> grad   = new List <double>(tmpAry);

            List <int> ri = randomTool <int> .getShuffledIndexList(xsize);

            double error = 0;
            double r_k   = 0;

            for (int t = 0; t < xsize; t++)
            {
                int               ii   = ri[t];
                dataSeq           x    = _X[ii];
                baseHashSet <int> fset = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD(grad, scalar, _model, x, fset);
                error += err;
                //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1
                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize);
                if (Global.countWithIter % (xsize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                //reg
                if (t % Global.scalarResetStep == 0)
                {
                    //reset
                    for (int i = 0; i < fsize; i++)
                    {
                        w[i] *= (float)scalar;
                    }
                    scalar = scalarOld = 1;
                }
                else
                {
                    scalarOld = scalar;
                    scalar   *= 1 - r_k / (Global.reg * Global.reg);
                }

                foreach (int i in fset)
                {
                    double realWeight = w[i] * scalarOld;
                    double grad_i     = grad[i] + realWeight / (Global.reg * Global.reg);
                    realWeight = realWeight - r_k * grad_i;
                    w[i]       = (float)(realWeight / scalar);

                    //reset
                    grad[i] = 0;
                }
                Global.countWithIter++;
            }

            //recover the real weights
            for (int i = 0; i < fsize; i++)
            {
                w[i] *= (float)scalar;
            }

            if (Global.reg != 0.0)
            {
                double sum = arrayTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            Global.reg  = oldReg;
            return(error);
        }
예제 #29
0
        //SGD with lazy reg
        public double sgd_lazyReg()
        {
            float[] w     = _model.W;
            int     fsize = w.Length;
            int     xsize = _X.Count;

            double[]      ary  = new double[fsize];
            List <double> grad = new List <double>(ary);

            List <int> ri = randomTool <int> .getShuffledIndexList(xsize);

            double error = 0;
            double r_k   = 0;

            for (int t = 0; t < xsize; t += Global.miniBatch)
            {
                List <dataSeq> XX = new List <dataSeq>();
                for (int k = t; k < t + Global.miniBatch; k++)
                {
                    int     i = ri[k];
                    dataSeq x = _X[i];
                    XX.Add(x);
                    if (k == xsize - 1)
                    {
                        break;
                    }
                }
                int mbSize             = XX.Count;
                baseHashSet <int> fset = new baseHashSet <int>();
                double            err  = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fset);
                error += err;

                //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1
                r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize);

                if (Global.countWithIter % (xsize / 4) == 0)
                {
                    Global.swLog.WriteLine("iter{0}    decay_rate={1}", Global.glbIter, r_k.ToString("e2"));
                }

                foreach (int i in fset)
                {
                    //because dgrad[i] is the grad of -log(obj), minus the gradient to find the minumum point
                    w[i] -= (float)(r_k * grad[i]);
                    //reset
                    grad[i] = 0;
                }
                Global.countWithIter += mbSize;
            }

            if (Global.reg != 0)
            {
                for (int i = 0; i < fsize; i++)
                {
                    double grad_i = w[i] / (Global.reg * Global.reg);
                    w[i] -= (float)(r_k * grad_i);
                }

                double sum = arrayTool.squareSum(w);
                error += sum / (2.0 * Global.reg * Global.reg);
            }

            Global.diff = convergeTest(error);
            return(error);
        }
예제 #30
0
        public void getBeliefs(belief bel, model m, dataSeq x, bool mask)
        {
            int nNodes  = x.Count;
            int nStates = m.NTag;

            dMatrix YY = new dMatrix(nStates, nStates);

            double[]      dAry       = new double[nStates];
            List <double> Y          = new List <double>(dAry);
            List <double> alpha_Y    = new List <double>(dAry);
            List <double> newAlpha_Y = new List <double>(dAry);
            List <double> tmp_Y      = new List <double>(dAry);

            for (int i = nNodes - 1; i > 0; i--)
            {
                getLogYY(m, x, i, ref YY, ref Y, false, mask);
                listTool.listSet(ref tmp_Y, bel.belState[i]);
                listTool.listAdd(ref tmp_Y, Y);
                logMultiply(YY, tmp_Y, bel.belState[i - 1]);
            }
            //compute Alpha values
            for (int i = 0; i < nNodes; i++)
            {
                getLogYY(m, x, i, ref YY, ref Y, false, mask);
                if (i > 0)
                {
                    listTool.listSet(ref tmp_Y, alpha_Y);
                    YY.transpose();
                    logMultiply(YY, tmp_Y, newAlpha_Y);
                    listTool.listAdd(ref newAlpha_Y, Y);
                }
                else
                {
                    listTool.listSet(ref newAlpha_Y, Y);
                }
                if (i > 0)
                {
                    listTool.listSet(ref tmp_Y, Y);
                    listTool.listAdd(ref tmp_Y, bel.belState[i]);
                    YY.transpose();
                    bel.belEdge[i].set(YY);
                    for (int yPre = 0; yPre < nStates; yPre++)
                    {
                        for (int y = 0; y < nStates; y++)
                        {
                            bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre];
                        }
                    }
                }
                List <double> tmp = bel.belState[i];
                listTool.listAdd(ref tmp, newAlpha_Y);
                listTool.listSet(ref alpha_Y, newAlpha_Y);
            }
            double Z = logSum(alpha_Y);

            for (int i = 0; i < nNodes; i++)
            {
                List <double> tmp = bel.belState[i];
                listTool.listAdd(ref tmp, -Z);
                listTool.listExp(ref tmp);
            }
            for (int i = 1; i < nNodes; i++)
            {
                bel.belEdge[i].add(-Z);
                bel.belEdge[i].eltExp();
            }
            bel.Z = Z;
        }