public model(model m, bool wCopy) { _nTag = m.NTag; _w = new float[m.W.Length]; if (wCopy) { m.W.CopyTo(_w, 0); } }
public double getGrad_SGD(List <double> g, model m, dataSeq x, baseHashSet <int> idset) { if (idset != null) { idset.Clear(); } if (x == null) { return(0); } return(getGradCRF(g, m, x, idset)); }
//the scalar version virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet) { idSet.Clear(); int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, scalar, false); _inf.getBeliefs(belMasked, m, x, scalar, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); idSet.Add(f); vecGrad[f] += bel.belState[i][s] * im.val; vecGrad[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); idSet.Add(f); vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold);//-log{P(y*|x,w)} }
public void getYYandY(model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist, List <dMatrix> maskYYlist, List <List <double> > maskYlist) { int nNodes = x.Count; int nTag = m.NTag; double[] dAry = new double[nTag]; bool mask = false; try { //Global.rwlock.AcquireReaderLock(Global.readWaitTime); for (int i = 0; i < nNodes; i++) { dMatrix YYi = new dMatrix(nTag, nTag); List <double> Yi = new List <double>(dAry); //compute the Mi matrix getLogYY(m, x, i, ref YYi, ref Yi, false, mask); YYlist.Add(YYi); Ylist.Add(Yi); maskYYlist.Add(new dMatrix(YYi)); maskYlist.Add(new List <double>(Yi)); } //Global.rwlock.ReleaseReaderLock(); } catch (ApplicationException) { Console.WriteLine("read out time!"); } //get the masked YY and Y double maskValue = double.MinValue; for (int i = 0; i < nNodes; i++) { List <double> Y = maskYlist[i]; List <int> tagList = x.getTags(); for (int s = 0; s < Y.Count; s++) { if (tagList[i] != s) { Y[s] = maskValue; } } } }
//the scalar version virtual public void getLogYY(double scalar, model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask) { YY.set(0); listTool.listSet(ref Y, 0); float[] w = m.W; List <featureTemp> fList = _fGene.getFeatureTemp(x, i); int nTag = m.NTag; foreach (featureTemp ft in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(ft.id, s); Y[s] += w[f] * scalar * ft.val; } } if (i > 0) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); YY[sPre, s] += w[f] * scalar; } } } double maskValue = double.MinValue; if (takeExp) { listTool.listExp(ref Y); YY.eltExp(); maskValue = 0; } if (mask) { List <int> tagList = x.getTags(); for (int s = 0; s < Y.Count; s++) { if (tagList[i] != s) { Y[s] = maskValue; } } } }
//f-score public List <double> decode_fscore(dataSet X, model m) { //multi thread List <dataSeqTest> X2 = new List <dataSeqTest>(); multiThreading(X, X2); List <string> goldTagList = new List <string>(); List <string> resTagList = new List <string>(); foreach (dataSeqTest x in X2) { string res = ""; foreach (int im in x._yOutput) { res += im.ToString() + ","; } resTagList.Add(res); //output tag results if (Global.swOutput != null) { for (int i = 0; i < x._yOutput.Count; i++) { Global.swOutput.Write(x._yOutput[i] + ","); } Global.swOutput.WriteLine(); } List <int> goldTags = x._x.getTags(); string gold = ""; foreach (int im in goldTags) { gold += im.ToString() + ","; } goldTagList.Add(gold); } List <double> scoreList = new List <double>(); if (Global.runMode == "train") { List <double> infoList = new List <double>(); scoreList = fscore.getFscore(goldTagList, resTagList, infoList); Global.swLog.WriteLine("#gold-chunk={0} #output-chunk={1} #correct-output-chunk={2} precision={3} recall={4} f-score={5}", infoList[0], infoList[1], infoList[2], scoreList[1].ToString("f2"), scoreList[2].ToString("f2"), scoreList[0].ToString("f2")); } return(scoreList); }
//string accuracy public List <double> decode_strAcc(dataSet X, model m) { double xsize = X.Count; double corr = 0; //multi thread List <dataSeqTest> X2 = new List <dataSeqTest>(); multiThreading(X, X2); foreach (dataSeqTest x in X2) { //output tag results if (Global.swOutput != null) { for (int i = 0; i < x._x.Count; i++) { Global.swOutput.Write(x._yOutput[i].ToString() + ","); } Global.swOutput.WriteLine(); } List <int> goldTags = x._x.getTags(); bool ck = true; for (int i = 0; i < x._x.Count; i++) { if (goldTags[i] != x._yOutput[i]) { ck = false; break; } } if (ck) { corr++; } } double acc = corr / xsize * 100.0; Global.swLog.WriteLine("total-tag-strings={0} correct-tag-strings={1} string-accuracy={2}%", xsize, corr, acc); List <double> scoreList = new List <double>(); scoreList.Add(acc); return(scoreList); }
public toolboxRich(dataSet X, bool train = true) { if (train)//for training { _X = X; _fGene = new featureGeneRich(X); _model = new model(X, _fGene); _inf = new inferRich(this); _grad = new gradRich(this); initOptimizer(); } else//for test { _X = X; _model = new model(Global.fModel); _fGene = new featureGeneRich(X); _inf = new inferRich(this); _grad = new gradRich(this); } }
public toolbox(dataSet X, bool train = true) { if (train)//to train { _X = X; _fGene = new featureGenerator(X); _model = new model(X, _fGene); _inf = new inference(this); _grad = new gradient(this); initOptimizer(); } else//to test { _X = X; _model = new model(Global.fModel); _fGene = new featureGenerator(X); _inf = new inference(this); _grad = new gradient(this); } }
public double decodeViterbi_fast(model m, dataSeq x, List <int> tags) { tags.Clear(); int nNode = x.Count; int nTag = m.NTag; dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; List <double> Y = new List <double>(dAry); Viterbi viter = new Viterbi(nNode, nTag); for (int i = 0; i < nNode; i++) { getLogYY(m, x, i, ref YY, ref Y, false, false); viter.setScores(i, Y, YY); } double numer = viter.runViterbi(ref tags, false); return(numer); }
//the mini-batch version public double getGrad_SGD_miniBatch(List <double> g, model m, List <dataSeq> X, baseHashSet <int> idset) { if (idset != null) { idset.Clear(); } double error = 0; foreach (dataSeq x in X) { baseHashSet <int> idset2 = new baseHashSet <int>(); error += getGradCRF(g, m, x, idset2); if (idset != null) { foreach (int i in idset2) { idset.Add(i); } } } return(error); }
//the scalar version public void getBeliefs(belief bel, model m, dataSeq x, double scalar, bool mask) { int nNodes = x.Count; int nTag = m.NTag; dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; List <double> Y = new List <double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry); List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { getLogYY(scalar, m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]); listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { getLogYY(scalar, m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y); YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } if (i > 0) { listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); YY.transpose(); bel.belEdge[i].set(YY); for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, newAlpha_Y); listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z; }
//get beliefs (mariginal probabilities) public void getBeliefs(belief bel, model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist) { int nNodes = x.Count; int nTag = m.NTag; //dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; //List<double> Y = new List<double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry);//marginal probability from left to current node (including values of the current node) List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { dMatrix YY = YYlist[i]; List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]);//this is meaningful from the 2nd round listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { dMatrix YY = null; if (i > 0) { YY = new dMatrix(YYlist[i]);//should use the copy to avoid change } List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y);//this is meaningful from the 2nd round YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } //setting marginal probability on edges if (i > 0) { //beta + Y listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); //YY YY.transpose(); bel.belEdge[i].set(YY); //belief = alpha + YY + beta + Y for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } //setting marginal probability on nodes List <double> tmp = bel.belState[i]; //beta listTool.listAdd(ref tmp, newAlpha_Y); //belief = alpha + beta listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z;//the overall potential function value }
//the scalar version public double getGrad_SGD(List <double> g, double scalar, model m, dataSeq x, baseHashSet <int> idset) { return(getGradCRF(g, scalar, m, x, idset)); }
//token accuracy public List <double> decode_tokAcc(dataSet X, model m) { int nTag = m.NTag; int[] tmpAry = new int[nTag]; List <int> corrOutput = new List <int>(tmpAry); List <int> gold = new List <int>(tmpAry); List <int> output = new List <int>(tmpAry); //multi thread List <dataSeqTest> X2 = new List <dataSeqTest>(); multiThreading(X, X2); foreach (dataSeqTest x in X2) { List <int> outTags = x._yOutput; List <int> goldTags = x._x.getTags(); //output tag results if (Global.swOutput != null) { for (int i = 0; i < outTags.Count; i++) { Global.swOutput.Write(outTags[i].ToString() + ","); } Global.swOutput.WriteLine(); } //count for (int i = 0; i < outTags.Count; i++) { gold[goldTags[i]]++; output[outTags[i]]++; if (outTags[i] == goldTags[i]) { corrOutput[outTags[i]]++; } } } Global.swLog.WriteLine("% tag-type #gold #output #correct-output token-precision token-recall token-f-score"); double prec, rec; int sumGold = 0, sumOutput = 0, sumCorrOutput = 0; for (int i = 0; i < nTag; i++) { sumCorrOutput += corrOutput[i]; sumGold += gold[i]; sumOutput += output[i]; if (gold[i] == 0) { rec = 0; } else { rec = ((double)corrOutput[i]) * 100.0 / (double)gold[i]; } if (output[i] == 0) { prec = 0; } else { prec = ((double)corrOutput[i]) * 100.0 / (double)output[i]; } Global.swLog.WriteLine("% {0}: {1} {2} {3} {4} {5} {6}", i, gold[i], output[i], corrOutput[i], prec.ToString("f2"), rec.ToString("f2"), (2 * prec * rec / (prec + rec)).ToString("f2")); } if (sumGold == 0) { rec = 0; } else { rec = ((double)sumCorrOutput) * 100.0 / (double)sumGold; } if (sumOutput == 0) { prec = 0; } else { prec = ((double)sumCorrOutput) * 100.0 / (double)sumOutput; } double fscore; if (prec == 0 && rec == 0) { fscore = 0; } else { fscore = 2 * prec * rec / (prec + rec);//this token-based overall-f-score is also the token-based-accuracy } Global.swLog.WriteLine("% overall-tags: {0} {1} {2} {3} {4} {5}", sumGold, sumOutput, sumCorrOutput, prec.ToString("f2"), rec.ToString("f2"), fscore.ToString("f2")); Global.swLog.Flush(); List <double> scoreList = new List <double>(); scoreList.Add(fscore); return(scoreList); }
override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); //store the YY and Y List <dMatrix> YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >(); _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist); _inf.getBeliefs(bel, m, x, YYlist, Ylist); _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belState[i][s] * im.val; gradList[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { //non-rich if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s]; gradList[f] -= belMasked.belEdge[i][sPre, s]; } } } //rich fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s] * im.val; gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val; } } } } } return(Z - ZGold);//-log{P(y*|x,w)} }
override public void getLogYY(model m, dataSeq x, int i, ref dMatrix YY, ref List <double> Y, bool takeExp, bool mask) { YY.set(0); listTool.listSet(ref Y, 0); float[] w = m.W; List <featureTemp> fList = _fGene.getFeatureTemp(x, i); int nTag = m.NTag; foreach (featureTemp ft in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(ft.id, s); Y[s] += w[f] * ft.val; } } if (i > 0) { //non-rich edge if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); YY[sPre, s] += w[f]; } } } //rich edge foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); YY[sPre, s] += w[f] * im.val; } } } } } double maskValue = double.MinValue; if (takeExp) { listTool.listExp(ref Y); YY.eltExp(); maskValue = 0; } if (mask) { List <int> tagList = x.getTags(); for (int s = 0; s < Y.Count; s++) { if (tagList[i] != s) { Y[s] = maskValue; } } } }