public double getGrad_SGD(List <double> g, model m, dataSeq x, baseHashSet <int> idset) { if (idset != null) { idset.Clear(); } if (x == null) { return(0); } return(getGradCRF(g, m, x, idset)); }
//the scalar version virtual public void getLogYY(double scalar, model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask) { YY.set(0); listTool.listSet(ref Y, 0); float[] w = m.W; List <featureTemp> fList = _fGene.getFeatureTemp(x, i); int nTag = m.NTag; foreach (featureTemp ft in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(ft.id, s); Y[s] += w[f] * scalar * ft.val; } } if (i > 0) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); YY[sPre, s] += w[f] * scalar; } } } double maskValue = double.MinValue; if (takeExp) { listTool.listExp(ref Y); YY.eltExp(); maskValue = 0; } if (mask) { List <int> tagList = x.getTags(); for (int s = 0; s < Y.Count; s++) { if (tagList[i] != s) { Y[s] = maskValue; } } } }
public void getYYandY(model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist, List <dMatrix> maskYYlist, List <List <double> > maskYlist) { int nNodes = x.Count; int nTag = m.NTag; double[] dAry = new double[nTag]; bool mask = false; try { //Global.rwlock.AcquireReaderLock(Global.readWaitTime); for (int i = 0; i < nNodes; i++) { dMatrix YYi = new dMatrix(nTag, nTag); List <double> Yi = new List <double>(dAry); //compute the Mi matrix getLogYY(m, x, i, ref YYi, ref Yi, false, mask); YYlist.Add(YYi); Ylist.Add(Yi); maskYYlist.Add(new dMatrix(YYi)); maskYlist.Add(new List <double>(Yi)); } //Global.rwlock.ReleaseReaderLock(); } catch (ApplicationException) { Console.WriteLine("read out time!"); } //get the masked YY and Y double maskValue = double.MinValue; for (int i = 0; i < nNodes; i++) { List <double> Y = maskYlist[i]; List <int> tagList = x.getTags(); for (int s = 0; s < Y.Count; s++) { if (tagList[i] != s) { Y[s] = maskValue; } } } }
public dataSeq(dataSeq x, int n, int length) { int end = 0; if (n + length < x.Count) { end = n + length; } else { end = x.Count; } for (int i = n; i < end; i++) { featureTemps.Add(x.featureTemps[i]); yGold.Add(x.yGold[i]); } }
public datasetList(string fileFeature, string fileTag) { StreamReader srfileFeature = new StreamReader(fileFeature); StreamReader srfileTag = new StreamReader(fileTag); string txt = srfileFeature.ReadToEnd(); txt = txt.Replace("\r", ""); string[] fAry = txt.Split(Global.triLineEndAry, StringSplitOptions.RemoveEmptyEntries); txt = srfileTag.ReadToEnd(); txt = txt.Replace("\r", ""); string[] tAry = txt.Split(Global.triLineEndAry, StringSplitOptions.RemoveEmptyEntries); if (fAry.Length != tAry.Length) { throw new Exception("error"); } _nFeature = int.Parse(fAry[0]); _nTag = int.Parse(tAry[0]); for (int i = 1; i < fAry.Length; i++) { string fBlock = fAry[i]; string tBlock = tAry[i]; dataSet ds = new dataSet(); string[] fbAry = fBlock.Split(Global.biLineEndAry, StringSplitOptions.RemoveEmptyEntries); string[] lbAry = tBlock.Split(Global.biLineEndAry, StringSplitOptions.RemoveEmptyEntries); for (int k = 0; k < fbAry.Length; k++) { string fm = fbAry[k]; string tm = lbAry[k]; dataSeq seq = new dataSeq(); seq.read(fm, tm); ds.Add(seq); } Add(ds); } srfileFeature.Close(); srfileTag.Close(); }
public double decodeViterbi_fast(model m, dataSeq x, List <int> tags) { tags.Clear(); int nNode = x.Count; int nTag = m.NTag; dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; List <double> Y = new List <double>(dAry); Viterbi viter = new Viterbi(nNode, nTag); for (int i = 0; i < nNode; i++) { getLogYY(m, x, i, ref YY, ref Y, false, false); viter.setScores(i, Y, YY); } double numer = viter.runViterbi(ref tags, false); return(numer); }
//the scalar version virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet) { idSet.Clear(); int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, scalar, false); _inf.getBeliefs(belMasked, m, x, scalar, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); idSet.Add(f); vecGrad[f] += bel.belState[i][s] * im.val; vecGrad[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); idSet.Add(f); vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold);//-log{P(y*|x,w)} }
//the scalar version public void getBeliefs(belief bel, model m, dataSeq x, double scalar, bool mask) { int nNodes = x.Count; int nTag = m.NTag; dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; List <double> Y = new List <double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry); List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { getLogYY(scalar, m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]); listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { getLogYY(scalar, m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y); YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } if (i > 0) { listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); YY.transpose(); bel.belEdge[i].set(YY); for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, newAlpha_Y); listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z; }
public List <featureTemp> getFeatureTemp(dataSeq x, int node) { return(x.getFeatureTemp(node)); }
//ADF training public double adf() { float[] w = _model.W; int fsize = w.Length; int xsize = _X.Count; List <double> grad = new List <double>(new double[fsize]); double error = 0; List <int> featureCountList = new List <int>(new int[fsize]); List <int> ri = randomTool <int> .getShuffledIndexList(xsize);//random shuffle of training samples Global.interval = xsize / Global.nUpdate; int nSample = 0;//#sample in an update interval for (int t = 0; t < xsize; t += Global.miniBatch) { List <dataSeq> XX = new List <dataSeq>(); bool end = false; for (int k = t; k < t + Global.miniBatch; k++) { int i = ri[k]; dataSeq x = _X[i]; XX.Add(x); if (k == xsize - 1) { end = true; break; } } int mbSize = XX.Count; nSample += mbSize; baseHashSet <int> fSet = new baseHashSet <int>(); double err = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fSet); error += err; foreach (int i in fSet) { featureCountList[i]++; } bool check = false; for (int k = t; k < t + Global.miniBatch; k++) { if (t != 0 && k % Global.interval == 0) { check = true; } } //update decay rates if (check || end) { for (int i = 0; i < fsize; i++) { int v = featureCountList[i]; double u = (double)v / (double)nSample; double eta = Global.upper - (Global.upper - Global.lower) * u; Global.decayList[i] *= eta; } //reset for (int i = 0; i < featureCountList.Count; i++) { featureCountList[i] = 0; } } //update weights foreach (int i in fSet) { w[i] -= (float)(Global.decayList[i] * grad[i]); //reset grad[i] = 0; } //reg if (check || end) { if (Global.reg != 0) { for (int i = 0; i < fsize; i++) { double grad_i = w[i] / (Global.reg * Global.reg) * ((double)nSample / (double)xsize); w[i] -= (float)(Global.decayList[i] * grad_i); } } //reset nSample = 0; } Global.countWithIter += mbSize; } if (Global.reg != 0) { double sum = arrayTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); return(error); }
//get beliefs (mariginal probabilities) public void getBeliefs(belief bel, model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist) { int nNodes = x.Count; int nTag = m.NTag; //dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; //List<double> Y = new List<double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry);//marginal probability from left to current node (including values of the current node) List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { dMatrix YY = YYlist[i]; List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]);//this is meaningful from the 2nd round listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { dMatrix YY = null; if (i > 0) { YY = new dMatrix(YYlist[i]);//should use the copy to avoid change } List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y);//this is meaningful from the 2nd round YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } //setting marginal probability on edges if (i > 0) { //beta + Y listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); //YY YY.transpose(); bel.belEdge[i].set(YY); //belief = alpha + YY + beta + Y for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } //setting marginal probability on nodes List <double> tmp = bel.belState[i]; //beta listTool.listAdd(ref tmp, newAlpha_Y); //belief = alpha + beta listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z;//the overall potential function value }
//SGD with lazy reg public double sgd_lazyReg() { float[] w = _model.W; int fsize = w.Length; int xsize = _X.Count; double[] ary = new double[fsize]; List <double> grad = new List <double>(ary); List <int> ri = randomTool <int> .getShuffledIndexList(xsize); double error = 0; double r_k = 0; for (int t = 0; t < xsize; t += Global.miniBatch) { List <dataSeq> XX = new List <dataSeq>(); for (int k = t; k < t + Global.miniBatch; k++) { int i = ri[k]; dataSeq x = _X[i]; XX.Add(x); if (k == xsize - 1) { break; } } int mbSize = XX.Count; baseHashSet <int> fset = new baseHashSet <int>(); double err = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fset); error += err; //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1 r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize); if (Global.countWithIter % (xsize / 4) == 0) { Global.swLog.WriteLine("iter{0} decay_rate={1}", Global.glbIter, r_k.ToString("e2")); } foreach (int i in fset) { //because dgrad[i] is the grad of -log(obj), minus the gradient to find the minumum point w[i] -= (float)(r_k * grad[i]); //reset grad[i] = 0; } Global.countWithIter += mbSize; } if (Global.reg != 0) { for (int i = 0; i < fsize; i++) { double grad_i = w[i] / (Global.reg * Global.reg); w[i] -= (float)(r_k * grad_i); } double sum = arrayTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); return(error); }
public double sgd_exactReg() { double scalar = 1, scalarOld = 1; float[] w = _model.W; int fsize = w.Length; int xsize = _X.Count; double newReg = Global.reg * Math.Sqrt(xsize); double oldReg = Global.reg; Global.reg = newReg; double[] tmpAry = new double[fsize]; List <double> grad = new List <double>(tmpAry); List <int> ri = randomTool <int> .getShuffledIndexList(xsize); double error = 0; double r_k = 0; for (int t = 0; t < xsize; t++) { int ii = ri[t]; dataSeq x = _X[ii]; baseHashSet <int> fset = new baseHashSet <int>(); double err = _grad.getGrad_SGD(grad, scalar, _model, x, fset); error += err; //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1 r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize); if (Global.countWithIter % (xsize / 4) == 0) { Global.swLog.WriteLine("iter{0} decay_rate={1}", Global.glbIter, r_k.ToString("e2")); } //reg if (t % Global.scalarResetStep == 0) { //reset for (int i = 0; i < fsize; i++) { w[i] *= (float)scalar; } scalar = scalarOld = 1; } else { scalarOld = scalar; scalar *= 1 - r_k / (Global.reg * Global.reg); } foreach (int i in fset) { double realWeight = w[i] * scalarOld; double grad_i = grad[i] + realWeight / (Global.reg * Global.reg); realWeight = realWeight - r_k * grad_i; w[i] = (float)(realWeight / scalar); //reset grad[i] = 0; } Global.countWithIter++; } //recover the real weights for (int i = 0; i < fsize; i++) { w[i] *= (float)scalar; } if (Global.reg != 0.0) { double sum = arrayTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); Global.reg = oldReg; return(error); }
public dataSeqTest(dataSeq x, List <int> yOutput) { _x = x; _yOutput = yOutput; }
//the scalar version public double getGrad_SGD(List <double> g, double scalar, model m, dataSeq x, baseHashSet <int> idset) { return(getGradCRF(g, scalar, m, x, idset)); }
virtual public void getFeatures(dataSeq x, int node, ref List <List <int> > nodeFeature, ref int[,] edgeFeature) { throw new Exception("error"); }
override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); //store the YY and Y List <dMatrix> YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >(); _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist); _inf.getBeliefs(bel, m, x, YYlist, Ylist); _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belState[i][s] * im.val; gradList[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { //non-rich if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s]; gradList[f] -= belMasked.belEdge[i][sPre, s]; } } } //rich fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s] * im.val; gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val; } } } } } return(Z - ZGold);//-log{P(y*|x,w)} }
override public void getLogYY(model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask) { YY.set(0); listTool.listSet(ref Y, 0); float[] w = m.W; List <featureTemp> fList = _fGene.getFeatureTemp(x, i); int nTag = m.NTag; foreach (featureTemp ft in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(ft.id, s); Y[s] += w[f] * ft.val; } } if (i > 0) { //non-rich edge if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); YY[sPre, s] += w[f]; } } } //rich edge foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); YY[sPre, s] += w[f] * im.val; } } } } } double maskValue = double.MinValue; if (takeExp) { listTool.listExp(ref Y); YY.eltExp(); maskValue = 0; } if (mask) { List <int> tagList = x.getTags(); for (int s = 0; s < Y.Count; s++) { if (tagList[i] != s) { Y[s] = maskValue; } } } }