//return the gradient of -log{P(y*|x,w)} as follows: E_{P(y|x)}(F(x,y)) - F(x,y*) virtual public double getGrad(List <double> vecGrad, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, false); _inf.getBeliefs(belMasked, m, x, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); for (int j = 0; j < fList.Count; j++) { featureTemp im = fList[j]; int id = im.id; double v = im.val; for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(id, s); if (idSet != null) { idSet.Add(f); } vecGrad[f] += bel.belState[i][s] * v; vecGrad[f] -= belMasked.belState[i][s] * v; } } } for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold); }
public void weightUpdate(List <double> w, List <double> grad, baseHashSet <int> idSet, double rs) { foreach (int i in idSet) { //minus the gradient to find the minumum point w[i] -= rs * grad[i]; //reset grad[i] = 0; } }
public static List <double> getFscore(List <string> goldTagList, List <string> resTagList, List <double> infoList) { List <double> scoreList = new List <double>(); if (resTagList.Count != goldTagList.Count) { throw new Exception("error"); } //convert original tags to 3 tags: B(x), I, O getNewTagList(Global.chunkTagMap, ref goldTagList); getNewTagList(Global.chunkTagMap, ref resTagList); List <string> goldChunkList = getChunks(goldTagList); List <string> resChunkList = getChunks(resTagList); int gold_chunk = 0, res_chunk = 0, correct_chunk = 0; for (int i = 0; i < goldChunkList.Count; i++) { string res = resChunkList[i]; string gold = goldChunkList[i]; string[] resChunkAry = res.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries); string[] goldChunkAry = gold.Split(Global.commaAry, StringSplitOptions.RemoveEmptyEntries); gold_chunk += goldChunkAry.Length; res_chunk += resChunkAry.Length; baseHashSet <string> goldChunkSet = new baseHashSet <string>(); foreach (string im in goldChunkAry) { goldChunkSet.Add(im); } foreach (string im in resChunkAry) { if (goldChunkSet.Contains(im)) { correct_chunk++; } } } double pre = (double)correct_chunk / (double)res_chunk * 100; double rec = (double)correct_chunk / (double)gold_chunk * 100; double f1 = 2 * pre * rec / (pre + rec); scoreList.Add(f1); scoreList.Add(pre); scoreList.Add(rec); infoList.Add(gold_chunk); infoList.Add(res_chunk); infoList.Add(correct_chunk); return(scoreList); }
public double sgd_lazyReg() { List <double> w = _model.W; int fsize = w.Count; int xsize = _X.Count; double[] ary = new double[fsize]; List <double> grad = new List <double>(ary); List <int> ri = randomTool <int> .getShuffledIndexList(xsize); double error = 0; double r_k = 0; for (int t = 0; t < xsize; t++) { int ii = ri[t]; dataSeq x = _X[ii]; baseHashSet <int> fset = new baseHashSet <int>(); double err = _grad.getGrad_SGD(grad, _model, x, fset); error += err; r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize); if (Global.countWithIter % (xsize / 4) == 0) { Global.swLog.WriteLine("iter{0} decay_rate={1}", Global.glbIter, r_k.ToString("e2")); } foreach (int i in fset) { w[i] -= r_k * grad[i]; //reset grad[i] = 0; } Global.countWithIter++; } if (Global.reg != 0) { for (int i = 0; i < fsize; i++) { double grad_i = w[i] / (Global.reg * Global.reg); w[i] -= r_k * grad_i; } double sum = listTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); return(error); }
public double getGrad_SGD(List <double> g, model m, dataSeq x, baseHashSet <int> idset) { if (idset != null) { idset.Clear(); } if (x == null) { return(0); } return(getGradCRF(g, m, x, idset)); }
//the mini-batch version public double getGrad_SGD_miniBatch(List <double> g, model m, List <dataSeq> X, baseHashSet <int> idset) { if (idset != null) { idset.Clear(); } double error = 0; foreach (dataSeq x in X) { baseHashSet <int> idset2 = new baseHashSet <int>(); error += getGradCRF(g, m, x, idset2); if (idset != null) { foreach (int i in idset2) { idset.Add(i); } } } return(error); }
public void getMaps(string file) { if (!File.Exists(file)) { Console.WriteLine("file {0} no exist!", file); return; } Console.WriteLine("file {0} converting...", file); StreamReader sr = new StreamReader(file); baseHashMap <string, int> featureFreqMap = new baseHashMap <string, int>(); baseHashSet <string> tagSet = new baseHashSet <string>(); //get feature-freq info and tagset int nFeatTemp = 0; while (!sr.EndOfStream) { string line = sr.ReadLine(); line = line.Replace("\t", " "); line = line.Replace("\r", ""); if (line == "") { continue; } string[] ary = line.Split(Global.blankAry, StringSplitOptions.RemoveEmptyEntries); nFeatTemp = ary.Length - 2; for (int i = 1; i < ary.Length - 1; i++) { if (ary[i] == "/")//no feature here { continue; } string[] ary2 = ary[i].Split(Global.slashAry, StringSplitOptions.RemoveEmptyEntries);//for real-value features string feature = i.ToString() + "." + ary2[0]; if (featureFreqMap.ContainsKey(feature) == false) { featureFreqMap[feature] = 1; } else { featureFreqMap[feature]++; } } string tag = ary[ary.Length - 1]; tagSet.Add(tag); } //sort features List <string> sortList = new List <string>(); foreach (baseHashMap <string, int> .KeyValuePair kv in featureFreqMap) { sortList.Add(kv.Key + " " + kv.Value); } if (Global.regMode == "GL")//sort based on feature templates { sortList.Sort(listSortFunc.compareKV_key); //sortList.Reverse(); Global.groupStart = new List <int>(); Global.groupEnd = new List <int>(); Global.groupStart.Add(0); for (int k = 1; k < sortList.Count; k++) { string[] thisAry = sortList[k].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries); string[] preAry = sortList[k - 1].Split(Global.dotAry, StringSplitOptions.RemoveEmptyEntries); string str = thisAry[0], preStr = preAry[0]; if (str != preStr) { Global.groupStart.Add(k); Global.groupEnd.Add(k); } } Global.groupEnd.Add(sortList.Count); } else//sort based on feature frequency { sortList.Sort(listSortFunc.compareKV_value);//sort feature based on freq, for 1)compress .txt file 2)better edge features sortList.Reverse(); } if (Global.regMode == "GL") { if (nFeatTemp != Global.groupStart.Count) { throw new Exception("inconsistent # of features per line, check the feature file for consistency!"); } } //feature index should begin from 0 StreamWriter swFeat = new StreamWriter("featureIndex.txt"); for (int i = 0; i < sortList.Count; i++) { string[] ary = sortList[i].Split(Global.blankAry); featureIndexMap[ary[0]] = i; swFeat.WriteLine("{0} {1}", ary[0], i); } swFeat.Close(); //label index should begin from 0 StreamWriter swTag = new StreamWriter("tagIndex.txt"); List <string> tagSortList = new List <string>(); foreach (string tag in tagSet) { tagSortList.Add(tag); } tagSortList.Sort();//sort tags for (int i = 0; i < tagSortList.Count; i++) { tagIndexMap[tagSortList[i]] = i; swTag.WriteLine("{0} {1}", tagSortList[i], i); } swTag.Close(); sr.Close(); }
//ADF training public double adf() { float[] w = _model.W; int fsize = w.Length; int xsize = _X.Count; List <double> grad = new List <double>(new double[fsize]); double error = 0; List <int> featureCountList = new List <int>(new int[fsize]); List <int> ri = randomTool <int> .getShuffledIndexList(xsize);//random shuffle of training samples Global.interval = xsize / Global.nUpdate; int nSample = 0;//#sample in an update interval for (int t = 0; t < xsize; t += Global.miniBatch) { List <dataSeq> XX = new List <dataSeq>(); bool end = false; for (int k = t; k < t + Global.miniBatch; k++) { int i = ri[k]; dataSeq x = _X[i]; XX.Add(x); if (k == xsize - 1) { end = true; break; } } int mbSize = XX.Count; nSample += mbSize; baseHashSet <int> fSet = new baseHashSet <int>(); double err = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fSet); error += err; foreach (int i in fSet) { featureCountList[i]++; } bool check = false; for (int k = t; k < t + Global.miniBatch; k++) { if (t != 0 && k % Global.interval == 0) { check = true; } } //update decay rates if (check || end) { for (int i = 0; i < fsize; i++) { int v = featureCountList[i]; double u = (double)v / (double)nSample; double eta = Global.upper - (Global.upper - Global.lower) * u; Global.decayList[i] *= eta; } //reset for (int i = 0; i < featureCountList.Count; i++) { featureCountList[i] = 0; } } //update weights foreach (int i in fSet) { w[i] -= (float)(Global.decayList[i] * grad[i]); //reset grad[i] = 0; } //reg if (check || end) { if (Global.reg != 0) { for (int i = 0; i < fsize; i++) { double grad_i = w[i] / (Global.reg * Global.reg) * ((double)nSample / (double)xsize); w[i] -= (float)(Global.decayList[i] * grad_i); } } //reset nSample = 0; } Global.countWithIter += mbSize; } if (Global.reg != 0) { double sum = arrayTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); return(error); }
public double sgd_exactReg() { double scalar = 1, scalarOld = 1; float[] w = _model.W; int fsize = w.Length; int xsize = _X.Count; double newReg = Global.reg * Math.Sqrt(xsize); double oldReg = Global.reg; Global.reg = newReg; double[] tmpAry = new double[fsize]; List <double> grad = new List <double>(tmpAry); List <int> ri = randomTool <int> .getShuffledIndexList(xsize); double error = 0; double r_k = 0; for (int t = 0; t < xsize; t++) { int ii = ri[t]; dataSeq x = _X[ii]; baseHashSet <int> fset = new baseHashSet <int>(); double err = _grad.getGrad_SGD(grad, scalar, _model, x, fset); error += err; //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1 r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize); if (Global.countWithIter % (xsize / 4) == 0) { Global.swLog.WriteLine("iter{0} decay_rate={1}", Global.glbIter, r_k.ToString("e2")); } //reg if (t % Global.scalarResetStep == 0) { //reset for (int i = 0; i < fsize; i++) { w[i] *= (float)scalar; } scalar = scalarOld = 1; } else { scalarOld = scalar; scalar *= 1 - r_k / (Global.reg * Global.reg); } foreach (int i in fset) { double realWeight = w[i] * scalarOld; double grad_i = grad[i] + realWeight / (Global.reg * Global.reg); realWeight = realWeight - r_k * grad_i; w[i] = (float)(realWeight / scalar); //reset grad[i] = 0; } Global.countWithIter++; } //recover the real weights for (int i = 0; i < fsize; i++) { w[i] *= (float)scalar; } if (Global.reg != 0.0) { double sum = arrayTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); Global.reg = oldReg; return(error); }
//SGD with lazy reg public double sgd_lazyReg() { float[] w = _model.W; int fsize = w.Length; int xsize = _X.Count; double[] ary = new double[fsize]; List <double> grad = new List <double>(ary); List <int> ri = randomTool <int> .getShuffledIndexList(xsize); double error = 0; double r_k = 0; for (int t = 0; t < xsize; t += Global.miniBatch) { List <dataSeq> XX = new List <dataSeq>(); for (int k = t; k < t + Global.miniBatch; k++) { int i = ri[k]; dataSeq x = _X[i]; XX.Add(x); if (k == xsize - 1) { break; } } int mbSize = XX.Count; baseHashSet <int> fset = new baseHashSet <int>(); double err = _grad.getGrad_SGD_miniBatch(grad, _model, XX, fset); error += err; //decaying rate: r_k = r_0 * beta^(k/N), with 0<r_0<=1, 0<beta<1 r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)xsize); if (Global.countWithIter % (xsize / 4) == 0) { Global.swLog.WriteLine("iter{0} decay_rate={1}", Global.glbIter, r_k.ToString("e2")); } foreach (int i in fset) { //because dgrad[i] is the grad of -log(obj), minus the gradient to find the minumum point w[i] -= (float)(r_k * grad[i]); //reset grad[i] = 0; } Global.countWithIter += mbSize; } if (Global.reg != 0) { for (int i = 0; i < fsize; i++) { double grad_i = w[i] / (Global.reg * Global.reg); w[i] -= (float)(r_k * grad_i); } double sum = arrayTool.squareSum(w); error += sum / (2.0 * Global.reg * Global.reg); } Global.diff = convergeTest(error); return(error); }
//the scalar version virtual public double getGradCRF(List <double> vecGrad, double scalar, model m, dataSeq x, baseHashSet <int> idSet) { idSet.Clear(); int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); _inf.getBeliefs(bel, m, x, scalar, false); _inf.getBeliefs(belMasked, m, x, scalar, true); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); idSet.Add(f); vecGrad[f] += bel.belState[i][s] * im.val; vecGrad[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); idSet.Add(f); vecGrad[f] += bel.belEdge[i][sPre, s]; vecGrad[f] -= belMasked.belEdge[i][sPre, s]; } } } return(Z - ZGold);//-log{P(y*|x,w)} }
//the scalar version public double getGrad_SGD(List <double> g, double scalar, model m, dataSeq x, baseHashSet <int> idset) { return(getGradCRF(g, scalar, m, x, idset)); }
public static void loadData_multi(List <dataSet> XList, dataSet X, List <dataSet> XXList) { XList.Clear(); XXList.Clear(); //load train data baseHashSet <int> checkSet = new baseHashSet <int>(); for (int i = 0; i < Global.nTask; i++) { string dat_i = i.ToString() + Global.fFeatureTrain; string tag_i = i.ToString() + Global.fGoldTrain; dataSet Xi = new dataSet(dat_i, tag_i); dataSizeScale(Xi); checkSet.Add(Xi.NFeatureTemp); XList.Add(Xi); } if (checkSet.Count > 1) { throw new Exception("inconsistent features among multi tasks!"); } //make nTag consistent among different tasks int maxNTag = 0; foreach (dataSet Xi in XList) { if (maxNTag < Xi.NTag) { maxNTag = Xi.NTag; } } for (int i = 0; i < Global.nTask; i++) { XList[i].NTag = maxNTag; } //add to merged data X.NTag = XList[0].NTag; X.NFeatureTemp = XList[0].NFeatureTemp; foreach (dataSet Xi in XList) { foreach (dataSeq im in Xi) { X.Add(im); } } Global.swLog.WriteLine("data sizes (1, ..., T):"); for (int i = 0; i < Global.nTask; i++) { dataSet Xi = XList[i]; Global.swLog.WriteLine(" " + Xi.Count.ToString()); } Global.swLog.WriteLine(); //load test data for (int i = 0; i < Global.nTask; i++) { string dat_i = i.ToString() + Global.fFeatureTest; string tag_i = i.ToString() + Global.fGoldTest; dataSet Xtest = new dataSet(dat_i, tag_i); XXList.Add(Xtest); } for (int i = 0; i < Global.nTask; i++) { XXList[i].NTag = maxNTag; } }
override public double getGradCRF(List <double> gradList, model m, dataSeq x, baseHashSet <int> idSet) { if (idSet != null) { idSet.Clear(); } int nTag = m.NTag; //compute beliefs belief bel = new belief(x.Count, nTag); belief belMasked = new belief(x.Count, nTag); //store the YY and Y List <dMatrix> YYlist = new List <dMatrix>(), maskYYlist = new List <dMatrix>(); List <List <double> > Ylist = new List <List <double> >(), maskYlist = new List <List <double> >(); _inf.getYYandY(m, x, YYlist, Ylist, maskYYlist, maskYlist); _inf.getBeliefs(bel, m, x, YYlist, Ylist); _inf.getBeliefs(belMasked, m, x, maskYYlist, maskYlist); double ZGold = belMasked.Z; double Z = bel.Z; List <featureTemp> fList; //Loop over nodes to compute features and update the gradient for (int i = 0; i < x.Count; i++) { fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { for (int s = 0; s < nTag; s++) { int f = _fGene.getNodeFeatID(im.id, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belState[i][s] * im.val; gradList[f] -= belMasked.belState[i][s] * im.val; } } } //Loop over edges to compute features and update the gradient for (int i = 1; i < x.Count; i++) { //non-rich if (Global.useTraditionalEdge) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s]; gradList[f] -= belMasked.belEdge[i][sPre, s]; } } } //rich fList = _fGene.getFeatureTemp(x, i); foreach (featureTemp im in fList) { int id = im.id; if (id < _fGene.getNRichFeatTemp()) { for (int s = 0; s < nTag; s++) { for (int sPre = 0; sPre < nTag; sPre++) { int f = _fGene.getEdgeFeatID(id, sPre, s); if (idSet != null) { idSet.Add(f); } gradList[f] += bel.belEdge[i][sPre, s] * im.val; gradList[f] -= belMasked.belEdge[i][sPre, s] * im.val; } } } } } return(Z - ZGold);//-log{P(y*|x,w)} }
//for training public featureGenerator(dataSet X) { _nFeatureTemp = X.NFeature; int ft_richEdge = (int)(X.NFeature * Global.edgeReduce); _nTag = X.NTag; _nEdge = _nTag * _nTag; Global.swLog.WriteLine("feature templates: {0}", _nFeatureTemp); //build feature mapping etc. information //baseHashMap<string, int> strIntMap = new baseHashMap<string, int>(_nFeatureTemp * _nTag, 0.65,2); baseHashSet <int>[] setAry = new baseHashSet <int> [_nFeatureTemp]; for (int i = 0; i < setAry.Length; i++) { setAry[i] = new baseHashSet <int>(); } List <nodeFeature>[] idNodeFeatures = new List <nodeFeature> [_nFeatureTemp]; List <edgeFeature>[] idEdgeFeatures = new List <edgeFeature> [_nFeatureTemp]; List <edgeFeature>[] idEdgeFeatures2 = new List <edgeFeature> [_nFeatureTemp]; for (int i = 0; i < _nFeatureTemp; i++) { idNodeFeatures[i] = new List <nodeFeature>(); idEdgeFeatures[i] = new List <edgeFeature>(); idEdgeFeatures2[i] = new List <edgeFeature>(); } int fIndex = _nEdge;//start from this int factor = 10000, factor2 = 100000; if (Global.negFeatureMode == "node")//neg features for node features { for (int id = 0; id < _nFeatureTemp; id++) { for (int tag = 0; tag < _nTag; tag++) { //node feature int mark = tag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; nodeFeature feat = new nodeFeature(tag, fid); idNodeFeatures[id].Add(feat); } } } } else if (Global.negFeatureMode == "edge")//neg features for node & edge features { //s2 case for (int id = 0; id < _nFeatureTemp; id++) { for (int tag = 0; tag < _nTag; tag++) { //node feature int mark = tag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; nodeFeature feat = new nodeFeature(tag, fid); idNodeFeatures[id].Add(feat); } } } //neg rich edge feature for (int id = 0; id < _nFeatureTemp; id++) { //rich edge here, non-rich edge feature is already coded before if (id < ft_richEdge)//pruning rich edge features, id relates to frequency of features { for (int random = 0; random < Global.nNegEdgeFeat; random++) { int tag = randomTool.getOneRandom_int(0, _nTag), preTag = randomTool.getOneRandom_int(0, _nTag); int mark = tag * factor + preTag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; edgeFeature feat = new edgeFeature(tag, preTag, fid); idEdgeFeatures[id].Add(feat); } } //rich2 if (Global.richFeat2) { for (int random = 0; random < Global.nNegEdgeFeat; random++) { int tag = randomTool.getOneRandom_int(0, _nTag), preTag = randomTool.getOneRandom_int(0, _nTag); int mark = tag * factor2 + preTag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; edgeFeature feat = new edgeFeature(tag, preTag, fid); idEdgeFeatures2[id].Add(feat); } } } } } } else if (Global.negFeatureMode == "full")//full negative features for node features & edge features { //s2 case for (int id = 0; id < _nFeatureTemp; id++) { for (int tag = 0; tag < _nTag; tag++) { //node feature int mark = tag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; nodeFeature feat = new nodeFeature(tag, fid); idNodeFeatures[id].Add(feat); } } } //neg rich edge feature for (int id = 0; id < _nFeatureTemp; id++) { //rich edge here, non-rich edge feature is already coded before if (id < ft_richEdge)//pruning rich edge features, id relates to frequency of features { for (int tag = 0; tag < _nTag; tag++) { for (int preTag = 0; preTag < _nTag; preTag++) { int mark = tag * factor + preTag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; edgeFeature feat = new edgeFeature(tag, preTag, fid); idEdgeFeatures[id].Add(feat); } } } //rich2 if (Global.richFeat2) { for (int tag = 0; tag < _nTag; tag++) { for (int preTag = 0; preTag < _nTag; preTag++) { int mark = tag * factor2 + preTag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; edgeFeature feat = new edgeFeature(tag, preTag, fid); idEdgeFeatures2[id].Add(feat); } } } } } } } //true features foreach (dataSeq x in X) { for (int i = 0; i < x.Count; i++) { List <featureTemp> fList = getFeatureTemp(x, i); int tag = x.getTags(i); foreach (featureTemp im in fList) { int id = im.id; //node feature int mark = tag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; nodeFeature feat = new nodeFeature(tag, fid); idNodeFeatures[id].Add(feat); } //rich edge here, non-rich edge feature is already coded before if (i > 0 && id < ft_richEdge)//pruning rich edge features, id relates to frequency of features { int preTag = x.getTags(i - 1); mark = tag * factor + preTag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; edgeFeature feat = new edgeFeature(tag, preTag, fid); idEdgeFeatures[id].Add(feat); } } //rich2 feature if (Global.richFeat2) { if (i < x.Count - 1 && id < ft_richEdge)//pruning rich edge features, id relates to frequency of features { int postTag = x.getTags(i + 1); mark = tag * factor2 + postTag; if (!setAry[id].Contains(mark)) { int fid = fIndex; setAry[id].Add(mark); fIndex++; edgeFeature feat = new edgeFeature(postTag, tag, fid); idEdgeFeatures2[id].Add(feat); } } } } } } //build globals Global.idNodeFeatures = new nodeFeature[_nFeatureTemp][]; Global.idEdgeFeatures = new edgeFeature[_nFeatureTemp][]; Global.idEdgeFeatures2 = new edgeFeature[_nFeatureTemp][]; for (int i = 0; i < _nFeatureTemp; i++) { Global.idNodeFeatures[i] = idNodeFeatures[i].ToArray(); Global.idEdgeFeatures[i] = idEdgeFeatures[i].ToArray(); Global.idEdgeFeatures2[i] = idEdgeFeatures2[i].ToArray(); } _nCompleteFeature = fIndex; Global.swLog.WriteLine("feature templates & rich-edge feature templates: {0}, {1}", _nFeatureTemp, ft_richEdge); //Global.swLog.WriteLine("nNodeFeature, nEdgeFeature1, nEdgeFeature2: {0}, {1}, {2}", nNodeFeature, nEdgeFeature1, nEdgeFeature2); Global.swLog.WriteLine("complete features: {0}", _nCompleteFeature); Global.swLog.WriteLine(); Global.swLog.Flush(); setAry = null; idNodeFeatures = null; idEdgeFeatures = null; idEdgeFeatures2 = null; GC.Collect();//should set null before memo collect }
//fast multi-task learning via approximation public double sgd_multi_fast() { int fsize = (_modelList[0]).W.Count; List <int> sizeList = new List <int>(); int maxSize = 0; for (int i = 0; i < _newXList.Count; i++) { dataSet Xi = _newXList[i]; int size = Xi.Count; sizeList.Add(size); if (maxSize < size) { maxSize = size; } } double error = 0; double r_k = 0; List <double> vecGrad = new List <double>(new double[fsize]); List <List <int> > riList = new List <List <int> >(); for (int i = 0; i < _newXList.Count; i++) { int size = sizeList[i]; List <int> ri = randomTool <int> .getShuffledIndexList(size); riList.Add(ri); } for (int t = 0; t < maxSize; t++) { r_k = Global.rate0 * Math.Pow(Global.decayFactor, (double)Global.countWithIter / (double)maxSize); if (Global.countWithIter % (maxSize / 4) == 0) { Global.swLog.WriteLine("iter{0} decay_rate={1}", Global.glbIter, r_k.ToString("e2")); } List <dataSeq> X = new List <dataSeq>(); for (int i = 0; i < _newXList.Count; i++) { dataSet Xi = _newXList[i]; List <int> ri = riList[i]; int size = sizeList[i]; int idx = ri[t % size]; dataSeq x = Xi[idx]; X.Add(x); } baseHashSet <int> fset = new baseHashSet <int>(); for (int i = 0; i < Global.nTask; i++) { for (int j = 0; j < Global.nTask; j++) { if (i == j) { model m = _modelList[i]; List <double> w = m.W; dataSeq x = X[j]; double err = _grad.getGrad_SGD(vecGrad, m, x, fset); weightUpdate(w, vecGrad, fset, r_k); error += err; } else if (t % Global.sampleFactor == 0)//probabilistic sampling for faster speed { model m = _modelList[i]; List <double> w = m.W; dataSeq x = X[j]; double err = _grad.getGrad_SGD(vecGrad, m, x, fset); double simi = _simiBiAry[i, j]; weightUpdate(w, vecGrad, fset, r_k * simi * Global.sampleFactor); } } } Global.countWithIter++; } //reg for (int i = 0; i < Global.nTask; i++) { error += reg(_modelList[i], fsize, r_k); } //update the similarity biAry if (Global.glbIter == Global.simiUpdateIter) { if (Global.simiMode == "cov") { updateSimi_covariance(_modelList); } else if (Global.simiMode == "poly") { updateSimi_polynomial(_modelList); } else if (Global.simiMode == "rbf") { updateSimi_RBF(_modelList); } Console.WriteLine("updated simi-matrix!"); } return(error); }