Example #1
0
        static void crossValidation()
        {
            //load data
            Console.WriteLine("reading cross validation data...");
            Global.swLog.WriteLine("reading cross validation data...");
            List <dataSet> XList  = new List <dataSet>();
            List <dataSet> XXList = new List <dataSet>();

            loadDataForCV(XList, XXList);

            for (int i = 0; i < Global.nCV; i++)
            {
                Global.swLog.WriteLine("\n#validation={0}", i + 1);
                Console.WriteLine("\n#validation={0}", i + 1);
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("% #validation={0}", i + 1);
                }
                dataSet Xi = XList[i];
                toolbox tb = new toolbox(Xi);
                baseTrain(XXList[i], tb);

                resSummarize.write();
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine();
                }
            }
        }
Example #2
0
        static double train(dataSet X = null, dataSet XX = null)
        {
            //load data
            if (X == null && XX == null)
            {
                Console.WriteLine("\nreading training & test data...");
                Global.swLog.WriteLine("\nreading training & test data...");
                X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);
                dataSizeScale(X);
                Console.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);
                Global.swLog.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);
            }

            double  score = 0;
            toolbox tb    = new toolbox(X, true);

            score = baseTrain(XX, tb);
            resSummarize.write();
            //save model
            if (Global.save == 1)
            {
                tb.Model.save(Global.fModel);
            }

            return(score);
        }
Example #3
0
        static double test()
        {
            Console.WriteLine("reading test data...");
            Global.swLog.WriteLine("reading test data...");
            dataSet XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);

            Console.WriteLine("Done! test data size: {0}", XX.Count);
            Global.swLog.WriteLine("Done! test data size: {0}", XX.Count);
            //load model & feature files for testing
            toolbox tb = new toolbox(XX, false);

            Stopwatch timer = new Stopwatch();

            timer.Start();

            List <double> scoreList = tb.test(XX, 0);

            timer.Stop();
            double time = timer.ElapsedMilliseconds / 1000.0;

            Global.timeList.Add(time);
            double score = scoreList[0];

            Global.scoreListList.Add(scoreList);

            resSummarize.write();
            return(score);
        }
Example #4
0
        //this function can be called by train(), cv(), & richEdge.train()
        public static double baseTrain(dataSet XTest, toolbox tb)
        {
            Global.reinitGlobal();
            double score = 0;

            for (int i = 0; i < Global.ttlIter; i++)
            {
                Global.glbIter++;
                Stopwatch timer = new Stopwatch();
                timer.Start();

                double err = tb.train();

                timer.Stop();
                double time = timer.ElapsedMilliseconds / 1000.0;

                Global.timeList.Add(time);
                Global.errList.Add(err);
                Global.diffList.Add(Global.diff);

                List <double> scoreList = tb.test(XTest, i);
                score = scoreList[0];
                Global.scoreListList.Add(scoreList);

                Global.swLog.WriteLine("iter{0}  diff={1}  train-time(sec)={2}  {3}={4}%", Global.glbIter, Global.diff.ToString("e2"), time.ToString("f2"), Global.metric, score.ToString("f2"));
                Global.swLog.WriteLine("------------------------------------------------");
                Global.swLog.Flush();
                Console.WriteLine("iter{0}  diff={1}  train-time(sec)={2}  {3}={4}%", Global.glbIter, Global.diff.ToString("e2"), time.ToString("f2"), Global.metric, score.ToString("f2"));

                //if (Global.diff < Global.convergeTol)
                //break;
            }
            return(score);
        }
        public optimLBFGS(toolbox tb, List <double> wInit, int memo, double l1weight, double maxIter)
        {
            _model     = tb.Model;
            _modelList = tb.ModelList;
            _X         = tb.X;
            _XList     = tb.XList;
            _inf       = tb.Inf;
            _fGene     = tb.FGene;
            _grad      = tb.Grad;

            double[] tmpAry = new double[wInit.Count];
            _w               = new List <double>(wInit);
            _gradList        = new List <double>(tmpAry);
            _newW            = new List <double>(wInit);
            _newGradList     = new List <double>(tmpAry);
            _dir             = new List <double>(tmpAry);
            _steepestDescDir = new List <double>(_newGradList);
            double[] tmpAry2 = new double[memo];
            _alphas   = new List <double>(tmpAry2);
            _iter     = 0;
            _memo     = memo;
            _dim      = wInit.Count;
            _l1weight = l1weight;
            _maxIter  = maxIter;

            if (memo <= 0)
            {
                throw new Exception("m must be an integer greater than zero.");
            }
            _value = evalL1();
            listTool.listSet(ref _gradList, _newGradList);
        }
Example #6
0
        public optimSAPO(toolbox tb)
        {
            _model = tb.Model;
            _X     = tb.X;
            _inf   = tb.Inf;
            _fGene = tb.FGene;

            //reinit globals
            Global.reinitGlobal();
        }
Example #7
0
        static void crossValidation()
        {
            //load data
            Console.WriteLine("reading cross validation data...");
            Global.swLog.WriteLine("reading cross validation data...");
            List <dataSet> XList  = new List <dataSet>();
            List <dataSet> XXList = new List <dataSet>();

            loadDataForCV(XList, XXList);

            //start cross validation
            foreach (double r in Global.regList)//do CV for each different regularizer r (sigma)
            {
                Global.swLog.WriteLine("\ncross validation. r={0}", r);
                Console.WriteLine("\ncross validation. r={0}", r);
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("% cross validation. r={0}", r);
                }
                for (int i = 0; i < Global.nCV; i++)
                {
                    Global.swLog.WriteLine("\n#validation={0}", i + 1);
                    Console.WriteLine("\n#validation={0}", i + 1);
                    if (Global.rawResWrite)
                    {
                        Global.swResRaw.WriteLine("% #validation={0}", i + 1);
                    }
                    Global.reg = r;
                    dataSet Xi = XList[i];
                    if (Global.runMode.Contains("rich"))
                    {
                        toolboxRich tb = new toolboxRich(Xi);
                        basicTrain(XXList[i], tb);
                    }
                    else
                    {
                        toolbox tb = new toolbox(Xi);
                        basicTrain(XXList[i], tb);
                    }

                    resSummarize.write();
                    if (Global.rawResWrite)
                    {
                        Global.swResRaw.WriteLine();
                    }
                }
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine();
                }
            }
        }
Example #8
0
        public optimStochastic(toolbox tb)
        {
            _model = tb.Model;
            _X     = tb.X;
            _inf   = tb.Inf;
            _fGene = tb.FGene;
            _grad  = tb.Grad;
            //init
            int fsize = _model.W.Length;

            Global.decayList = new List <double>(new double[fsize]);
            listTool.listSet(ref Global.decayList, Global.rate0);
        }
Example #9
0
        public optimPercMIRA(toolbox tb)
        {
            _model = tb.Model;
            _X     = tb.X;
            _inf   = tb.Inf;
            _fGene = tb.FGene;

            _sumW        = new float[_model.W.Length];
            _tmpW        = new float[_model.W.Length];
            _recoverFlag = false;

            //reinit globals
            Global.reinitGlobal();
        }
        public optimSGD(toolbox tb)
        {
            _model     = tb.Model;
            _modelList = tb.ModelList;
            _X         = tb.X;
            _XList     = tb.XList;
            _inf       = tb.Inf;
            _fGene     = tb.FGene;
            _grad      = tb.Grad;

            //init
            if (Global.runMode.Contains("mt"))
            {
                initForMulti();
            }
        }
Example #11
0
        //merged learning in multi-task framework
        public static void train_multi_merge(List <dataSet> XXList, toolbox tb)
        {
            Global.reinitGlobal();

            if (Global.optim.Contains("bfgs"))
            {
                Global.bfgsTb       = tb;
                Global.bfgsXXList   = XXList;
                Global.bfgsTestMode = "mt.merge";
            }

            for (double i = 0; i < Global.ttlIter; i++)
            {
                Global.glbIter++;
                Stopwatch timer = new Stopwatch();
                timer.Start();

                double error = tb.train_single();

                timer.Stop();
                double time = timer.ElapsedMilliseconds / 1000.0;
                Global.swLog.WriteLine("Training used time (second): " + time.ToString());

                //evaluate
                if (!Global.optim.Contains("bfgs"))//test is already done in bfgs training
                {
                    List <double> scoreList = tb.test_multi_merge(XXList, i, Global.swOutputList);
                    for (int k = 0; k < Global.nTask; k++)
                    {
                        Global.scoreTaskList_multi[k].Add(scoreList[k]);
                    }
                    Global.timeList_multi[Global.glbIter - 1]  += time;
                    Global.errorList_multi[Global.glbIter - 1] += error;
                }
                if (Global.diff < Global.convergeTol)
                {
                    break;
                }
            }

            //save model
            if (Global.save == 1)
            {
                tb.Model.save(Global.modelDir + Global.fModel);
            }
        }
Example #12
0
        static double train()
        {
            //load data
            Console.WriteLine("\nreading training & test data...");
            Global.swLog.WriteLine("\nreading training & test data...");
            dataSet X, XX;

            if (Global.runMode.Contains("tune"))//put "tune" related code here because train() could be sub-function of tune()
            {
                dataSet origX = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                X  = new dataSet();
                XX = new dataSet();
                dataSplit(origX, Global.tuneSplit, X, XX);
            }
            else
            {
                X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);
                dataSizeScale(X);
            }
            Console.WriteLine("done! train/test data sizes: {0}/{1}", X.Count, XX.Count);
            Global.swLog.WriteLine("done! train/test data sizes: {0}/{1}", X.Count, XX.Count);
            double score = 0;

            //start training
            foreach (double r in Global.regList)//train on different r (sigma)
            {
                Global.reg = r;
                Global.swLog.WriteLine("\nr: " + r.ToString());
                Console.WriteLine("\nr: " + r.ToString());
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("\n%r: " + r.ToString());
                }
                toolbox tb = new toolbox(X, true);
                score = basicTrain(XX, tb);
                resSummarize.write();//summarize the results & output the summarized results

                if (Global.save == 1)
                {
                    tb.Model.save(Global.fModel);//save model as a .txt file
                }
            }
            return(score);
        }
Example #13
0
        //multi-task learning
        public static void train_multi_mtl(List <dataSet> XXList, toolbox tb)
        {
            Global.reinitGlobal();

            for (double iter = 0; iter < Global.ttlIter; iter++)
            {
                Global.glbIter++;
                Stopwatch timer = new Stopwatch();
                timer.Start();

                double error = tb.train_multi();

                timer.Stop();
                double time = timer.ElapsedMilliseconds / 1000.0;
                Global.swLog.WriteLine("Training used time (second): " + time.ToString());

                //evaluate
                List <double> scoreList = tb.test_multi_mtl(XXList, iter, Global.swOutputList);
                for (int i = 0; i < Global.nTask; i++)
                {
                    Global.scoreTaskList_multi[i].Add(scoreList[i]);
                }
                Global.timeList_multi[Global.glbIter - 1]  += time;
                Global.errorList_multi[Global.glbIter - 1] += error;

                if (iter > 30 && Global.diff > 0 && Global.diff < Global.convergeTol)
                {
                    break;
                }
            }

            //save model
            if (Global.save == 1)
            {
                for (int i = 0; i < Global.nTask; i++)
                {
                    tb.ModelList[i].save(Global.modelDir + i.ToString() + Global.fModel);
                }
            }
        }
Example #14
0
        public optimLBFGS(toolbox tb, float[] init, int m, double l1weight, double maxIter)
        {
            _model = tb.Model;
            _X     = tb.X;
            _inf   = tb.Inf;
            _fGene = tb.FGene;
            _grad  = tb.Grad;

            double[] wInit = new double[init.Length];
            for (int i = 0; i < init.Length; i++)
            {
                wInit[i] = (double)init[i];
            }

            double[] tmpAry = new double[wInit.Length];
            _w               = new List <double>(wInit);
            _gradList        = new List <double>(tmpAry);
            _newW            = new List <double>(wInit);
            _newGradList     = new List <double>(tmpAry);
            _dir             = new List <double>(tmpAry);
            _steepestDescDir = new List <double>(_newGradList);
            double[] tmpAry2 = new double[m];
            _alphas   = new List <double>(tmpAry2);
            _iter     = 0;
            _memo     = m;
            _dim      = wInit.Length;
            _l1weight = l1weight;
            _maxIter  = maxIter;

            if (m <= 0)
            {
                throw new Exception("m must be an integer greater than zero.");
            }
            _value = evalL1();
            listTool.listSet(ref _gradList, _newGradList);
        }
Example #15
0
 public inference(toolbox tb)
 {
     _optim = tb.Optim;
     _fGene = tb.FGene;
 }
Example #16
0
 public inference(toolbox tb)
 {
     _optim = tb.Optim;
     _fGene = tb.FGene;
     _grad  = tb.Grad;
 }
Example #17
0
 public gradient(toolbox tb)
 {
     _optim = tb.Optim;
     _inf   = tb.Inf;
     _fGene = tb.FGene;
 }
Example #18
0
 public inferRich(toolbox tb)
     : base(tb)
 {
 }
Example #19
0
        static double train(dataSet X = null, dataSet XX = null)
        {
            //load data
            if (X == null && XX == null)
            {
                Console.WriteLine("\nreading training & test data...");
                Global.swLog.WriteLine("\nreading training & test data...");
                X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);
                dataSizeScale(X);

                double trainLength = 0, testLength = 0;
                foreach (dataSeq x in X)
                {
                    trainLength += x.Count;
                }
                trainLength /= (double)X.Count;
                foreach (dataSeq x in XX)
                {
                    testLength += x.Count;
                }
                testLength /= (double)XX.Count;

                Console.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);
                Global.swLog.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);
                Global.swLog.WriteLine("sample length (train, test): {0} {1}", trainLength.ToString("f2"), testLength.ToString("f2"));
                Console.WriteLine("sample length (train, test): {0} {1}", trainLength, testLength);
                if (Global.structReg)
                {
                    double trainAlpha = trainLength / Global.miniSize;
                    Global.swLog.WriteLine("train-alpha in structReg: {0}", trainAlpha.ToString("f2"));
                }
                Global.swLog.Flush();
            }

            double score = 0;

            if (Global.structReg)
            {
                foreach (double sr in Global.srList)
                {
                    Global.miniSize = sr;

                    Global.swLog.WriteLine("\n%sr:{0}", sr);
                    Console.WriteLine("\n%sr:{0}", sr);
                    if (Global.rawResWrite)
                    {
                        Global.swResRaw.WriteLine("\n%sr:{0}", sr);
                    }

                    toolbox tb = new toolbox(X);
                    score = baseTrain(XX, tb);
                    resSummarize.write();
                    //save model
                    if (Global.save == 1)
                    {
                        tb.Model.save(Global.fModel);
                    }
                }
            }
            else
            {
                toolbox tb = new toolbox(X);
                score = baseTrain(XX, tb);
                resSummarize.write();
                //save model
                if (Global.save == 1)
                {
                    tb.Model.save(Global.fModel);
                }
            }

            return(score);
        }
Example #20
0
 public gradRich(toolbox tb)
     : base(tb)
 {
 }
Example #21
0
        static void multiTask()
        {
            //train
            if (Global.runMode.Contains("train"))
            {
                //load data
                List <dataSet> XList  = new List <dataSet>();
                List <dataSet> XXList = new List <dataSet>();
                dataSet        X      = new dataSet();
                loadData_multi(XList, X, XXList);

                toolbox toolbox;

                //single-task training in multi-task framework: each task has its own independent train & test data
                if (Global.mt_singleTrain)
                {
                    foreach (double r in Global.regList)//experiments for each different regularizer value
                    {
                        Global.swResRaw.WriteLine("\n%single-task! r: {0}", r);
                        Console.WriteLine("\nsingle-task! r: {0}", r);

                        for (int i = 0; i < Global.nTask; i++)
                        {
                            Global.swLog.WriteLine("\nsingle-task! #task, r: " + (i + 1).ToString() + "," + r.ToString());
                            Console.WriteLine("\nsingle-task! #task, r: " + (i + 1).ToString() + "," + r.ToString());
                            Global.reg = r;
                            dataSet Xi = XList[i];
                            toolbox = new toolbox(Xi);
                            train_multi_single(XXList, toolbox, i);
                        }
                        resProcess.write_multi();
                    }
                    Global.swResRaw.WriteLine();
                }

                //merged training in multi-task framework: merge all training data to train a unified model
                if (Global.mt_mergeTrain)
                {
                    foreach (double r in Global.regList)//experiments for each different regularizer value
                    {
                        Global.reg = r;
                        Global.swLog.WriteLine("\nmerged-task! r: " + r.ToString());
                        Console.WriteLine("\nmerged-task! r: " + r.ToString());
                        Global.swResRaw.WriteLine("\n%merged-task! r: " + r.ToString());
                        toolbox = new toolbox(X);
                        train_multi_merge(XXList, toolbox);

                        resProcess.write_multi();
                    }
                    Global.swResRaw.WriteLine();
                }

                //multi-task learning
                if (Global.mt_mtTrain)
                {
                    foreach (double r in Global.regList)//experiments for each different regularizer value
                    {
                        Global.reg = r;
                        foreach (double cFactor in Global.cFactors)//experiments for each different C value (see Eq. 18 & 19 of [Sun+ TKDE 2013] for the definition of C)
                        {
                            Global.C = cFactor;
                            Global.swLog.WriteLine("\n%multi-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode);
                            Global.swSimi.WriteLine("\n%multi-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode);
                            Global.swResRaw.WriteLine("\n%multi-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode);
                            Console.WriteLine("\nmulti-task! reg, rate0, C, kernel: {0},{1},{2},{3}", Global.reg, Global.rate0, Global.C, Global.simiMode);
                            toolbox = new toolbox(X, XList);
                            train_multi_mtl(XXList, toolbox);

                            resProcess.write_multi();
                        }
                    }
                    Global.swResRaw.WriteLine();
                }
            }
            else if (Global.runMode.Contains("test1"))//normal test
            {
                //load data
                List <dataSet> XList  = new List <dataSet>();
                List <dataSet> XXList = new List <dataSet>();
                dataSet        X      = new dataSet();
                loadData_multi(XList, X, XXList);
                //load model etc.
                toolbox tb = new toolbox(X, XList, false);

                if (Global.mt_mergeTrain)//multi_merge
                {
                    List <double> scoreList = tb.test_multi_merge(XXList, 0, Global.swOutputList);
                    for (int i = 0; i < Global.nTask; i++)
                    {
                        Global.scoreTaskList_multi[i].Add(scoreList[i]);
                    }
                    resProcess.write_multi();
                }
                else//multi_single or multi_mtl: they have the same testing schema
                {
                    List <double> scoreList = tb.test_multi_mtl(XXList, 0, Global.swOutputList);
                    for (int i = 0; i < Global.nTask; i++)
                    {
                        Global.scoreTaskList_multi[i].Add(scoreList[i]);
                    }
                    resProcess.write_multi();
                }
            }
            else if (Global.runMode.Contains("test2"))//for multi_mtl: test a new task via choosing the most similar model
            {
                //load data
                List <dataSet> XList  = new List <dataSet>();
                List <dataSet> XXList = new List <dataSet>();
                dataSet        X      = new dataSet();
                loadData_multi(XList, X, XXList);
                //get vectors
                List <List <double> > vecList = new List <List <double> >();
                foreach (dataSet Xi in XList)
                {
                    List <double> vec = getVecFromX(Xi);
                    vecList.Add(vec);
                }
                //load model & test
                toolbox       tb        = new toolbox(X, XList, false);
                List <double> scoreList = tb.test2_multi_mtl(vecList, XXList, 0, Global.swOutputList);
                for (int i = 0; i < Global.nTask; i++)
                {
                    Global.scoreTaskList_multi[i].Add(scoreList[i]);
                }
                resProcess.write_multi();
            }
            else if (Global.runMode.Contains("test3"))//for multi_mtl: test a new task via voted-test based on all models, i.e., the OMT-SBD method described in Section 4.4 of [Sun+ TKDE 2013]
            {
                //load data
                List <dataSet> XList  = new List <dataSet>();
                List <dataSet> XXList = new List <dataSet>();
                dataSet        X      = new dataSet();
                loadData_multi(XList, X, XXList);
                //get vectors
                List <List <double> > vecList = new List <List <double> >();
                foreach (dataSet Xi in XList)
                {
                    List <double> vec = getVecFromX(Xi);
                    vecList.Add(vec);
                }
                //load model & test
                toolbox       tb        = new toolbox(X, XList, false);
                List <double> scoreList = tb.test3_multi_mtl(vecList, XXList, 0, Global.swOutputList);
                for (int i = 0; i < Global.nTask; i++)
                {
                    Global.scoreTaskList_multi[i].Add(scoreList[i]);
                }
                resProcess.write_multi();
            }
            else
            {
                throw new Exception("error");
            }
        }