//the scalar version virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet) { idSet.Clear(); int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, scalar, false); _inf.getBeliefs(belMasked, m, x, scalar, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); idSet.Add(f); vecGrad[f] += bel.belState[i][s] * im.val; vecGrad[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); idSet.Add(f); vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold);//-log{P(y*|x,w)} }
//the scalar version public void getBeliefs(belief bel, model m, dataSeq x, double scalar, bool mask) { int nNodes = x.Count; int nTag = m.NTag; dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; List <double> Y = new List <double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry); List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { getLogYY(scalar, m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]); listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { getLogYY(scalar, m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y); YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } if (i > 0) { listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); YY.transpose(); bel.belEdge[i].set(YY); for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, newAlpha_Y); listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z; }
//get beliefs (mariginal probabilities) public void getBeliefs(belief bel, model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist) { int nNodes = x.Count; int nTag = m.NTag; //dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; //List<double> Y = new List<double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry);//marginal probability from left to current node (including values of the current node) List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { dMatrix YY = YYlist[i]; List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]);//this is meaningful from the 2nd round listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { dMatrix YY = null; if (i > 0) { YY = new dMatrix(YYlist[i]);//should use the copy to avoid change } List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y);//this is meaningful from the 2nd round YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } //setting marginal probability on edges if (i > 0) { //beta + Y listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); //YY YY.transpose(); bel.belEdge[i].set(YY); //belief = alpha + YY + beta + Y for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } //setting marginal probability on nodes List <double> tmp = bel.belState[i]; //beta listTool.listAdd(ref tmp, newAlpha_Y); //belief = alpha + beta listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z;//the overall potential function value }
override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); //store the YY and Y List <dMatrix> YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >(); _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist); _inf.getBeliefs(bel, m, x, YYlist, Ylist); _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belState[i][s] * im.val; gradList[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { //non-rich if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s]; gradList[f] -= belMasked.belEdge[i][sPre, s]; } } } //rich fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s] * im.val; gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val; } } } } } return(Z - ZGold);//-log{P(y*|x,w)} }