static void crossValidation() { //load data Console.WriteLine("reading cross validation data..."); Global.swLog.WriteLine("reading cross validation data..."); List <dataSet> XList = new List <dataSet>(); List <dataSet> XXList = new List <dataSet>(); loadDataForCV(XList, XXList); for (int i = 0; i < Global.nCV; i++) { Global.swLog.WriteLine("\n#validation={0}", i + 1); Console.WriteLine("\n#validation={0}", i + 1); if (Global.rawResWrite) { Global.swResRaw.WriteLine("% #validation={0}", i + 1); } dataSet Xi = XList[i]; toolbox tb = new toolbox(Xi); baseTrain(XXList[i], tb); resSummarize.write(); if (Global.rawResWrite) { Global.swResRaw.WriteLine(); } } }
static double train(dataSet X = null, dataSet XX = null) { //load data if (X == null && XX == null) { Console.WriteLine("\nreading training & test data..."); Global.swLog.WriteLine("\nreading training & test data..."); X = new dataSet(Global.fFeatureTrain, Global.fGoldTrain); XX = new dataSet(Global.fFeatureTest, Global.fGoldTest); dataSizeScale(X); Console.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count); Global.swLog.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count); } double score = 0; toolbox tb = new toolbox(X, true); score = baseTrain(XX, tb); resSummarize.write(); //save model if (Global.save == 1) { tb.Model.save(Global.fModel); } return(score); }
static double test() { Console.WriteLine("reading test data..."); Global.swLog.WriteLine("reading test data..."); dataSet XX = new dataSet(Global.fFeatureTest, Global.fGoldTest); Console.WriteLine("Done! test data size: {0}", XX.Count); Global.swLog.WriteLine("Done! test data size: {0}", XX.Count); //load model & feature files for testing toolbox tb = new toolbox(XX, false); Stopwatch timer = new Stopwatch(); timer.Start(); List <double> scoreList = tb.test(XX, 0); timer.Stop(); double time = timer.ElapsedMilliseconds / 1000.0; Global.timeList.Add(time); double score = scoreList[0]; Global.scoreListList.Add(scoreList); resSummarize.write(); return(score); }
//this function can be called by train(), cv(), & richEdge.train() public static double baseTrain(dataSet XTest, toolbox tb) { Global.reinitGlobal(); double score = 0; for (int i = 0; i < Global.ttlIter; i++) { Global.glbIter++; Stopwatch timer = new Stopwatch(); timer.Start(); double err = tb.train(); timer.Stop(); double time = timer.ElapsedMilliseconds / 1000.0; Global.timeList.Add(time); Global.errList.Add(err); Global.diffList.Add(Global.diff); List <double> scoreList = tb.test(XTest, i); score = scoreList[0]; Global.scoreListList.Add(scoreList); Global.swLog.WriteLine("iter{0} diff={1} train-time(sec)={2} {3}={4}%", Global.glbIter, Global.diff.ToString("e2"), time.ToString("f2"), Global.metric, score.ToString("f2")); Global.swLog.WriteLine("------------------------------------------------"); Global.swLog.Flush(); Console.WriteLine("iter{0} diff={1} train-time(sec)={2} {3}={4}%", Global.glbIter, Global.diff.ToString("e2"), time.ToString("f2"), Global.metric, score.ToString("f2")); //if (Global.diff < Global.convergeTol) //break; } return(score); }
public optimLBFGS(toolbox tb, List <double> wInit, int memo, double l1weight, double maxIter) { _model = tb.Model; _modelList = tb.ModelList; _X = tb.X; _XList = tb.XList; _inf = tb.Inf; _fGene = tb.FGene; _grad = tb.Grad; double[] tmpAry = new double[wInit.Count]; _w = new List <double>(wInit); _gradList = new List <double>(tmpAry); _newW = new List <double>(wInit); _newGradList = new List <double>(tmpAry); _dir = new List <double>(tmpAry); _steepestDescDir = new List <double>(_newGradList); double[] tmpAry2 = new double[memo]; _alphas = new List <double>(tmpAry2); _iter = 0; _memo = memo; _dim = wInit.Count; _l1weight = l1weight; _maxIter = maxIter; if (memo <= 0) { throw new Exception("m must be an integer greater than zero."); } _value = evalL1(); listTool.listSet(ref _gradList, _newGradList); }
public optimSAPO(toolbox tb) { _model = tb.Model; _X = tb.X; _inf = tb.Inf; _fGene = tb.FGene; //reinit globals Global.reinitGlobal(); }
static void crossValidation() { //load data Console.WriteLine("reading cross validation data..."); Global.swLog.WriteLine("reading cross validation data..."); List <dataSet> XList = new List <dataSet>(); List <dataSet> XXList = new List <dataSet>(); loadDataForCV(XList, XXList); //start cross validation foreach (double r in Global.regList)//do CV for each different regularizer r (sigma) { Global.swLog.WriteLine("\ncross validation. r={0}", r); Console.WriteLine("\ncross validation. r={0}", r); if (Global.rawResWrite) { Global.swResRaw.WriteLine("% cross validation. r={0}", r); } for (int i = 0; i < Global.nCV; i++) { Global.swLog.WriteLine("\n#validation={0}", i + 1); Console.WriteLine("\n#validation={0}", i + 1); if (Global.rawResWrite) { Global.swResRaw.WriteLine("% #validation={0}", i + 1); } Global.reg = r; dataSet Xi = XList[i]; if (Global.runMode.Contains("rich")) { toolboxRich tb = new toolboxRich(Xi); basicTrain(XXList[i], tb); } else { toolbox tb = new toolbox(Xi); basicTrain(XXList[i], tb); } resSummarize.write(); if (Global.rawResWrite) { Global.swResRaw.WriteLine(); } } if (Global.rawResWrite) { Global.swResRaw.WriteLine(); } } }
public optimStochastic(toolbox tb) { _model = tb.Model; _X = tb.X; _inf = tb.Inf; _fGene = tb.FGene; _grad = tb.Grad; //init int fsize = _model.W.Length; Global.decayList = new List <double>(new double[fsize]); listTool.listSet(ref Global.decayList, Global.rate0); }
public optimPercMIRA(toolbox tb) { _model = tb.Model; _X = tb.X; _inf = tb.Inf; _fGene = tb.FGene; _sumW = new float[_model.W.Length]; _tmpW = new float[_model.W.Length]; _recoverFlag = false; //reinit globals Global.reinitGlobal(); }
public optimSGD(toolbox tb) { _model = tb.Model; _modelList = tb.ModelList; _X = tb.X; _XList = tb.XList; _inf = tb.Inf; _fGene = tb.FGene; _grad = tb.Grad; //init if (Global.runMode.Contains("mt")) { initForMulti(); } }
//merged learning in multi-task framework public static void train_multi_merge(List <dataSet> XXList, toolbox tb) { Global.reinitGlobal(); if (Global.optim.Contains("bfgs")) { Global.bfgsTb = tb; Global.bfgsXXList = XXList; Global.bfgsTestMode = "mt.merge"; } for (double i = 0; i < Global.ttlIter; i++) { Global.glbIter++; Stopwatch timer = new Stopwatch(); timer.Start(); double error = tb.train_single(); timer.Stop(); double time = timer.ElapsedMilliseconds / 1000.0; Global.swLog.WriteLine("Training used time (second): " + time.ToString()); //evaluate if (!Global.optim.Contains("bfgs"))//test is already done in bfgs training { List <double> scoreList = tb.test_multi_merge(XXList, i, Global.swOutputList); for (int k = 0; k < Global.nTask; k++) { Global.scoreTaskList_multi[k].Add(scoreList[k]); } Global.timeList_multi[Global.glbIter - 1] += time; Global.errorList_multi[Global.glbIter - 1] += error; } if (Global.diff < Global.convergeTol) { break; } } //save model if (Global.save == 1) { tb.Model.save(Global.modelDir + Global.fModel); } }
static double train() { //load data Console.WriteLine("\nreading training & test data..."); Global.swLog.WriteLine("\nreading training & test data..."); dataSet X, XX; if (Global.runMode.Contains("tune"))//put "tune" related code here because train() could be sub-function of tune() { dataSet origX = new dataSet(Global.fFeatureTrain, Global.fGoldTrain); X = new dataSet(); XX = new dataSet(); dataSplit(origX, Global.tuneSplit, X, XX); } else { X = new dataSet(Global.fFeatureTrain, Global.fGoldTrain); XX = new dataSet(Global.fFeatureTest, Global.fGoldTest); dataSizeScale(X); } Console.WriteLine("done! train/test data sizes: {0}/{1}", X.Count, XX.Count); Global.swLog.WriteLine("done! train/test data sizes: {0}/{1}", X.Count, XX.Count); double score = 0; //start training foreach (double r in Global.regList)//train on different r (sigma) { Global.reg = r; Global.swLog.WriteLine("\nr: " + r.ToString()); Console.WriteLine("\nr: " + r.ToString()); if (Global.rawResWrite) { Global.swResRaw.WriteLine("\n%r: " + r.ToString()); } toolbox tb = new toolbox(X, true); score = basicTrain(XX, tb); resSummarize.write();//summarize the results & output the summarized results if (Global.save == 1) { tb.Model.save(Global.fModel);//save model as a .txt file } } return(score); }
//multi-task learning public static void train_multi_mtl(List <dataSet> XXList, toolbox tb) { Global.reinitGlobal(); for (double iter = 0; iter < Global.ttlIter; iter++) { Global.glbIter++; Stopwatch timer = new Stopwatch(); timer.Start(); double error = tb.train_multi(); timer.Stop(); double time = timer.ElapsedMilliseconds / 1000.0; Global.swLog.WriteLine("Training used time (second): " + time.ToString()); //evaluate List <double> scoreList = tb.test_multi_mtl(XXList, iter, Global.swOutputList); for (int i = 0; i < Global.nTask; i++) { Global.scoreTaskList_multi[i].Add(scoreList[i]); } Global.timeList_multi[Global.glbIter - 1] += time; Global.errorList_multi[Global.glbIter - 1] += error; if (iter > 30 && Global.diff > 0 && Global.diff < Global.convergeTol) { break; } } //save model if (Global.save == 1) { for (int i = 0; i < Global.nTask; i++) { tb.ModelList[i].save(Global.modelDir + i.ToString() + Global.fModel); } } }
public optimLBFGS(toolbox tb, float[] init, int m, double l1weight, double maxIter) { _model = tb.Model; _X = tb.X; _inf = tb.Inf; _fGene = tb.FGene; _grad = tb.Grad; double[] wInit = new double[init.Length]; for (int i = 0; i < init.Length; i++) { wInit[i] = (double)init[i]; } double[] tmpAry = new double[wInit.Length]; _w = new List <double>(wInit); _gradList = new List <double>(tmpAry); _newW = new List <double>(wInit); _newGradList = new List <double>(tmpAry); _dir = new List <double>(tmpAry); _steepestDescDir = new List <double>(_newGradList); double[] tmpAry2 = new double[m]; _alphas = new List <double>(tmpAry2); _iter = 0; _memo = m; _dim = wInit.Length; _l1weight = l1weight; _maxIter = maxIter; if (m <= 0) { throw new Exception("m must be an integer greater than zero."); } _value = evalL1(); listTool.listSet(ref _gradList, _newGradList); }
public inference(toolbox tb) { _optim = tb.Optim; _fGene = tb.FGene; }
public inference(toolbox tb) { _optim = tb.Optim; _fGene = tb.FGene; _grad = tb.Grad; }
public gradient(toolbox tb) { _optim = tb.Optim; _inf = tb.Inf; _fGene = tb.FGene; }
public inferRich(toolbox tb) : base(tb) { }
static double train(dataSet X = null, dataSet XX = null) { //load data if (X == null && XX == null) { Console.WriteLine("\nreading training & test data..."); Global.swLog.WriteLine("\nreading training & test data..."); X = new dataSet(Global.fFeatureTrain, Global.fGoldTrain); XX = new dataSet(Global.fFeatureTest, Global.fGoldTest); dataSizeScale(X); double trainLength = 0, testLength = 0; foreach (dataSeq x in X) { trainLength += x.Count; } trainLength /= (double)X.Count; foreach (dataSeq x in XX) { testLength += x.Count; } testLength /= (double)XX.Count; Console.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count); Global.swLog.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count); Global.swLog.WriteLine("sample length (train, test): {0} {1}", trainLength.ToString("f2"), testLength.ToString("f2")); Console.WriteLine("sample length (train, test): {0} {1}", trainLength, testLength); if (Global.structReg) { double trainAlpha = trainLength / Global.miniSize; Global.swLog.WriteLine("train-alpha in structReg: {0}", trainAlpha.ToString("f2")); } Global.swLog.Flush(); } double score = 0; if (Global.structReg) { foreach (double sr in Global.srList) { Global.miniSize = sr; Global.swLog.WriteLine("\n%sr:{0}", sr); Console.WriteLine("\n%sr:{0}", sr); if (Global.rawResWrite) { Global.swResRaw.WriteLine("\n%sr:{0}", sr); } toolbox tb = new toolbox(X); score = baseTrain(XX, tb); resSummarize.write(); //save model if (Global.save == 1) { tb.Model.save(Global.fModel); } } } else { toolbox tb = new toolbox(X); score = baseTrain(XX, tb); resSummarize.write(); //save model if (Global.save == 1) { tb.Model.save(Global.fModel); } } return(score); }
public gradRich(toolbox tb) : base(tb) { }
static void multiTask() { //train if (Global.runMode.Contains("train")) { //load data List <dataSet> XList = new List <dataSet>(); List <dataSet> XXList = new List <dataSet>(); dataSet X = new dataSet(); loadData_multi(XList, X, XXList); toolbox toolbox; //single-task training in multi-task framework: each task has its own independent train & test data if (Global.mt_singleTrain) { foreach (double r in Global.regList)//experiments for each different regularizer value { Global.swResRaw.WriteLine("\n%single-task! r: {0}", r); Console.WriteLine("\nsingle-task! r: {0}", r); for (int i = 0; i < Global.nTask; i++) { Global.swLog.WriteLine("\nsingle-task! #task, r: " + (i + 1).ToString() + "," + r.ToString()); Console.WriteLine("\nsingle-task! #task, r: " + (i + 1).ToString() + "," + r.ToString()); Global.reg = r; dataSet Xi = XList[i]; toolbox = new toolbox(Xi); train_multi_single(XXList, toolbox, i); } resProcess.write_multi(); } Global.swResRaw.WriteLine(); } //merged training in multi-task framework: merge all training data to train a unified model if (Global.mt_mergeTrain) { foreach (double r in Global.regList)//experiments for each different regularizer value { Global.reg = r; Global.swLog.WriteLine("\nmerged-task! r: " + r.ToString()); Console.WriteLine("\nmerged-task! r: " + r.ToString()); Global.swResRaw.WriteLine("\n%merged-task! r: " + r.ToString()); toolbox = new toolbox(X); train_multi_merge(XXList, toolbox); resProcess.write_multi(); } Global.swResRaw.WriteLine(); } //multi-task learning if (Global.mt_mtTrain) { foreach (double r in Global.regList)//experiments for each different regularizer value { Global.reg = r; foreach (double cFactor in Global.cFactors)//experiments for each different C value (see Eq. 18 & 19 of [Sun+ TKDE 2013] for the definition of C) { Global.C = cFactor; Global.swLog.WriteLine("\n%multi-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode); Global.swSimi.WriteLine("\n%multi-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode); Global.swResRaw.WriteLine("\n%multi-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode); Console.WriteLine("\nmulti-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode); toolbox = new toolbox(X, XList); train_multi_mtl(XXList, toolbox); resProcess.write_multi(); } } Global.swResRaw.WriteLine(); } } else if (Global.runMode.Contains("test1"))//normal test { //load data List <dataSet> XList = new List <dataSet>(); List <dataSet> XXList = new List <dataSet>(); dataSet X = new dataSet(); loadData_multi(XList, X, XXList); //load model etc. toolbox tb = new toolbox(X, XList, false); if (Global.mt_mergeTrain)//multi_merge { List <double> scoreList = tb.test_multi_merge(XXList, 0, Global.swOutputList); for (int i = 0; i < Global.nTask; i++) { Global.scoreTaskList_multi[i].Add(scoreList[i]); } resProcess.write_multi(); } else//multi_single or multi_mtl: they have the same testing schema { List <double> scoreList = tb.test_multi_mtl(XXList, 0, Global.swOutputList); for (int i = 0; i < Global.nTask; i++) { Global.scoreTaskList_multi[i].Add(scoreList[i]); } resProcess.write_multi(); } } else if (Global.runMode.Contains("test2"))//for multi_mtl: test a new task via choosing the most similar model { //load data List <dataSet> XList = new List <dataSet>(); List <dataSet> XXList = new List <dataSet>(); dataSet X = new dataSet(); loadData_multi(XList, X, XXList); //get vectors List <List <double> > vecList = new List <List <double> >(); foreach (dataSet Xi in XList) { List <double> vec = getVecFromX(Xi); vecList.Add(vec); } //load model & test toolbox tb = new toolbox(X, XList, false); List <double> scoreList = tb.test2_multi_mtl(vecList, XXList, 0, Global.swOutputList); for (int i = 0; i < Global.nTask; i++) { Global.scoreTaskList_multi[i].Add(scoreList[i]); } resProcess.write_multi(); } else if (Global.runMode.Contains("test3"))//for multi_mtl: test a new task via voted-test based on all models, i.e., the OMT-SBD method described in Section 4.4 of [Sun+ TKDE 2013] { //load data List <dataSet> XList = new List <dataSet>(); List <dataSet> XXList = new List <dataSet>(); dataSet X = new dataSet(); loadData_multi(XList, X, XXList); //get vectors List <List <double> > vecList = new List <List <double> >(); foreach (dataSet Xi in XList) { List <double> vec = getVecFromX(Xi); vecList.Add(vec); } //load model & test toolbox tb = new toolbox(X, XList, false); List <double> scoreList = tb.test3_multi_mtl(vecList, XXList, 0, Global.swOutputList); for (int i = 0; i < Global.nTask; i++) { Global.scoreTaskList_multi[i].Add(scoreList[i]); } resProcess.write_multi(); } else { throw new Exception("error"); } }