//the scalar version virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet) { idSet.Clear(); int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, scalar, false); _inf.getBeliefs(belMasked, m, x, scalar, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); idSet.Add(f); vecGrad[f] += bel.belState[i][s] * im.val; vecGrad[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); idSet.Add(f); vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold);//-log{P(y*|x,w)} }
public static List <double> getFscore(List <string> goldTagList, List <string> resTagList, List <double> infoList) { List <double> scoreList = new List <double>(); if (resTagList.Count != goldTagList.Count) { throw new Exception("error"); } //convert original tags to 3 tags: B(x), I, O getNewTagList(Global.chunkTagMap, ref goldTagList); getNewTagList(Global.chunkTagMap, ref resTagList); List <string> goldChunkList = getChunks(goldTagList); List <string> resChunkList = getChunks(resTagList); int gold_chunk = 0, res_chunk = 0, correct_chunk = 0; for (int i = 0; i < goldChunkList.Count; i++) { string res = resChunkList[i]; string gold = goldChunkList[i]; string[] resChunkAry = res.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries); string[] goldChunkAry = gold.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries); gold_chunk += goldChunkAry.Length; res_chunk += resChunkAry.Length; baseHashSet <string> goldChunkSet = new baseHashSet <string>(); foreach (string im in goldChunkAry) { goldChunkSet.Add(im); } foreach (string im in resChunkAry) { if (goldChunkSet.Contains(im)) { correct_chunk++; } } } double pre = (double)correct_chunk / (double)res_chunk * 100; double rec = (double)correct_chunk / (double)gold_chunk * 100; double f1 = 2 * pre * rec / (pre + rec); scoreList.Add(f1); scoreList.Add(pre); scoreList.Add(rec); infoList.Add(gold_chunk); infoList.Add(res_chunk); infoList.Add(correct_chunk); return(scoreList); }
//the mini-batch version public double getGrad_SGD_miniBatch(List <double> g, model m, List <dataSeq> X, baseHashSet <int> idset) { if (idset != null) { idset.Clear(); } double error = 0; foreach (dataSeq x in X) { baseHashSet <int> idset2 = new baseHashSet <int>(); error += getGradCRF(g, m, x, idset2); if (idset != null) { foreach (int i in idset2) { idset.Add(i); } } } return(error); }
override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); //store the YY and Y List <dMatrix> YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >(); _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist); _inf.getBeliefs(bel, m, x, YYlist, Ylist); _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belState[i][s] * im.val; gradList[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { //non-rich if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s]; gradList[f] -= belMasked.belEdge[i][sPre, s]; } } } //rich fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s] * im.val; gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val; } } } } } return(Z - ZGold);//-log{P(y*|x,w)} }
public void getMaps(string file) { if (!File.Exists(file)) { Console.WriteLine("file {0} no exist!", file); return; } Console.WriteLine("file {0} converting...", file); StreamReader sr = new StreamReader(file); baseHashMap <string, int> featureFreqMap = new baseHashMap <string, int>(); baseHashSet <string> tagSet = new baseHashSet <string>(); //get feature-freq info and tagset int nFeatTemp = 0; while (!sr.EndOfStream) { string line = sr.ReadLine(); line = line.Replace("\t", " "); line = line.Replace("\r", ""); if (line == "") { continue; } string[] ary = line.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries); nFeatTemp = ary.Length - 2; for (int i = 1; i < ary.Length - 1; i++) { if (ary[i] == "/")//no feature here { continue; } if (Global.weightRegMode == "GL") { if (Global.GL_init == false && Global.groupTrim[i - 1])//this feature is removed in GL 1st step { continue; } } string[] ary2 = ary[i].Split(Global.slashAry, StringSplitOptions.RemoveEmptyEntries);//for real-value features string feature = i.ToString() + "." + ary2[0]; featureFreqMap[feature]++; } string tag = ary[ary.Length - 1]; tagSet.Add(tag); } //sort features List <string> sortList = new List <string>(); foreach (baseHashMap <string, int> .KeyValuePair kv in featureFreqMap) { sortList.Add(kv.Key + " " + kv.Value); } if (Global.weightRegMode == "GL")//sort based on feature templates { sortList.Sort(listSortFunc.compareKV_key); //sortList.Reverse(); StreamWriter sw = new StreamWriter("featureTemp_sorted.txt"); foreach (string f in sortList) { sw.WriteLine(f); } sw.Close(); Global.groupStart = new List <int>(); Global.groupEnd = new List <int>(); Global.groupStart.Add(0); for (int k = 1; k < sortList.Count; k++) { string[] thisAry = sortList[k].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries); string[] preAry = sortList[k - 1].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries); string str = thisAry[0], preStr = preAry[0]; if (str != preStr) { Global.groupStart.Add(k); Global.groupEnd.Add(k); } } Global.groupEnd.Add(sortList.Count); } else//sort based on feature frequency { sortList.Sort(listSortFunc.compareKV_value);//sort feature based on freq, for 1)compress .txt file 2)better edge features sortList.Reverse(); } if (Global.weightRegMode == "GL" && Global.GL_init) { if (nFeatTemp != Global.groupStart.Count) { throw new Exception("inconsistent # of features per line, check the feature file for consistency!"); } } //feature index should begin from 0 StreamWriter swFeat = new StreamWriter(Global.modelDir + "/featureIndex.txt"); for (int i = 0; i < sortList.Count; i++) { string[] ary = sortList[i].Split(Global.blankAry); featureIndexMap[ary[0]] = i; swFeat.WriteLine("{0} {1}", ary[0].Trim(), i); } swFeat.Close(); //label index should begin from 0 StreamWriter swTag = new StreamWriter(Global.modelDir + "/tagIndex.txt"); List <string> tagSortList = new List <string>(); foreach (string tag in tagSet) { tagSortList.Add(tag); } tagSortList.Sort();//sort tags for (int i = 0; i < tagSortList.Count; i++) { tagIndexMap[tagSortList[i]] = i; swTag.WriteLine("{0} {1}", tagSortList[i], i); } swTag.Close(); sr.Close(); }