public double getZ(model m, dataSeq x, bool mask) { belief bel = new belief(x.Count, m.NTag); getBeliefs(bel, m, x, mask); return(bel.Z); }
public Lattice(model m, inference inf, dataSeq x) { _w = x.Count; _h = m.NTag; _logBel = new belief(_w, _h); List <dMatrix> YYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(); inf.getYYandY(m, x, YYlist, Ylist); for (int i = 0; i < _w; i++) { _logBel.belState[i] = new List <double>(Ylist[i]); if (i > 0) { _logBel.belEdge[i] = new dMatrix(YYlist[i]); } } _heuListList = new List <List <double> >(); for (int i = 0; i < _w; i++) { _heuListList.Add(new List <double>(new double[_h])); } Viterbi _bwdViterbi = new Viterbi(_w, _h); for (int i = 0; i < _w; i++) { _bwdViterbi.setScores(i, Ylist[i], YYlist[i]); } List <int> tags = new List <int>(); _bwdViterbi.runViterbi(ref tags); //update the viterbiHeuristicMap for (int i = 0; i < _w; i++) { for (int j = 0; j < _h; j++) { double h = _bwdViterbi.getPathScore(i, j); setHeuMap(i, j, h); } } //get zGold ZGold = 0; for (int i = 0; i < x.Count; i++) { int s = x.getTags(i); ZGold += Ylist[i][s]; if (i > 0) { int sPre = x.getTags(i - 1); ZGold += YYlist[i][sPre, s]; } } }
//return the gradient of -log{P(y*|x,w)} as follows: E_{P(y|x)}(F(x,y)) - F(x,y*) virtual public double getGrad(List <double> vecGrad, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, false); _inf.getBeliefs(belMasked, m, x, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); for (int j = 0; j < fList.Count; j++) { featureTemp im = fList[j]; int id = im.id; double v = im.val; for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(id, s); if (idSet != null) { idSet.Add(f); } vecGrad[f] += bel.belState[i][s] * v; vecGrad[f] -= belMasked.belState[i][s] * v; } } } for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold); }
//the scalar version virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet) { idSet.Clear(); int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, scalar, false); _inf.getBeliefs(belMasked, m, x, scalar, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); idSet.Add(f); vecGrad[f] += bel.belState[i][s] * im.val; vecGrad[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); idSet.Add(f); vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold);//-log{P(y*|x,w)} }
public void getBeliefs(belief bel, model m, dataSeq x, bool mask) { int nNodes = x.Count; int nStates = m.NTag; dMatrix YY = new dMatrix(nStates, nStates); double[] dAry = new double[nStates]; List <double> Y = new List <double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry); List <double> tmp_Y = new List <double>(dAry); for (int i = nNodes - 1; i > 0; i--) { getLogYY(m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]); listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute Alpha values for (int i = 0; i < nNodes; i++) { getLogYY(m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y); YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } if (i > 0) { listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); YY.transpose(); bel.belEdge[i].set(YY); for (int yPre = 0; yPre < nStates; yPre++) { for (int y = 0; y < nStates; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, newAlpha_Y); listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z; }
//get beliefs (mariginal probabilities) public void getBeliefs(belief bel, model m, dataSeq x, List <dMatrix> YYlist, List <List <double> > Ylist) { int nNodes = x.Count; int nTag = m.NTag; //dMatrix YY = new dMatrix(nTag, nTag); double[] dAry = new double[nTag]; //List<double> Y = new List<double>(dAry); List <double> alpha_Y = new List <double>(dAry); List <double> newAlpha_Y = new List <double>(dAry);//marginal probability from left to current node (including values of the current node) List <double> tmp_Y = new List <double>(dAry); //compute beta values in a backward scan for (int i = nNodes - 1; i > 0; i--) { dMatrix YY = YYlist[i]; List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); listTool.listSet(ref tmp_Y, bel.belState[i]);//this is meaningful from the 2nd round listTool.listAdd(ref tmp_Y, Y); logMultiply(YY, tmp_Y, bel.belState[i - 1]); } //compute alpha values for (int i = 0; i < nNodes; i++) { dMatrix YY = null; if (i > 0) { YY = new dMatrix(YYlist[i]);//should use the copy to avoid change } List <double> Y = Ylist[i]; //compute the Mi matrix //getLogYY(m, x, i, ref YY, ref Y, false, mask); if (i > 0) { listTool.listSet(ref tmp_Y, alpha_Y);//this is meaningful from the 2nd round YY.transpose(); logMultiply(YY, tmp_Y, newAlpha_Y); listTool.listAdd(ref newAlpha_Y, Y); } else { listTool.listSet(ref newAlpha_Y, Y); } //setting marginal probability on edges if (i > 0) { //beta + Y listTool.listSet(ref tmp_Y, Y); listTool.listAdd(ref tmp_Y, bel.belState[i]); //YY YY.transpose(); bel.belEdge[i].set(YY); //belief = alpha + YY + beta + Y for (int yPre = 0; yPre < nTag; yPre++) { for (int y = 0; y < nTag; y++) { bel.belEdge[i][yPre, y] += tmp_Y[y] + alpha_Y[yPre]; } } } //setting marginal probability on nodes List <double> tmp = bel.belState[i]; //beta listTool.listAdd(ref tmp, newAlpha_Y); //belief = alpha + beta listTool.listSet(ref alpha_Y, newAlpha_Y); } double Z = logSum(alpha_Y); for (int i = 0; i < nNodes; i++) { List <double> tmp = bel.belState[i]; listTool.listAdd(ref tmp, -Z); listTool.listExp(ref tmp); } for (int i = 1; i < nNodes; i++) { bel.belEdge[i].add(-Z); bel.belEdge[i].eltExp(); } bel.Z = Z;//the overall potential function value }
override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); //store the YY and Y List <dMatrix> YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >(); _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist); _inf.getBeliefs(bel, m, x, YYlist, Ylist); _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belState[i][s] * im.val; gradList[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { //non-rich if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s]; gradList[f] -= belMasked.belEdge[i][sPre, s]; } } } //rich fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s] * im.val; gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val; } } } } } return(Z - ZGold);//-log{P(y*|x,w)} }