//compute grad of: sum{-log{P(y*|x,w)} + R(w)}
        public double getGrad_BFGS(List <double> vecGrad, model m, dataSet X)
        {
            //-log(obj)
            double error      = 0;
            int    nbFeatures = _fGene.NCompleteFeature;

            //int i = 0;
            foreach (dataSeq im in X)
            {
                double err = 0;
                err    = getGrad(vecGrad, m, im, null);
                error += err;
            }

            if (Global.reg != 0.0)
            {
                for (int f = 0; f < nbFeatures; f++)
                {
                    vecGrad[f] += m.W[f] / (Global.reg * Global.reg);
                }
            }
            if (Global.reg != 0.0)
            {
                List <double> tmpWeights = m.W;
                double        sum        = listTool.squareSum(tmpWeights);
                error += sum / (2.0 * Global.reg * Global.reg);
            }
            return(error);
        }
Exemple #2
0
 public model(dataSet X, featureGenerator fGen)
 {
     _nTag         = X.NTag;
     _nHiddenState = Global.nHiddenStatePerTag * _nTag;
     //default value is 0
     if (Global.random == 0)
     {
         _w = new float[fGen.NCompleteFeature];
     }
     else if (Global.random == 1)
     {
         List <float> randList = randomDoubleTool.getRandomList_float(fGen.NCompleteFeature);
         _w = randList.ToArray();
         if (Global.tuneWeightInit)
         {
             if (Global.tmpW == null)
             {
                 Global.tmpW = new float[_w.Length];
             }
             _w.CopyTo(Global.tmpW, 0);
         }
     }
     else if (Global.random == 2)
     {
         _w = new float[fGen.NCompleteFeature];
         Global.optimW.CopyTo(_w, 0);
     }
     else
     {
         throw new Exception("error");
     }
 }
        public List <double> test_multi_mtl(List <dataSet> XXList, double iter, List <StreamWriter> swOutputList)
        {
            List <double> scoreList = new List <double>();

            for (int i = 0; i < XXList.Count; i++)
            {
                dataSet       X = XXList[i];
                model         m = _modelList[i];
                List <double> scoreList_i;
                if (Global.evalMetric == "tok.acc")
                {
                    scoreList_i = decode_tokAcc(X, m, iter, swOutputList[i]);
                }
                else if (Global.evalMetric == "str.acc")
                {
                    scoreList_i = decode_strAcc(X, m, iter, swOutputList[i]);
                }
                else if (Global.evalMetric == "f1")
                {
                    scoreList_i = decode_fscore(X, m, iter, swOutputList[i]);
                }
                else
                {
                    throw new Exception("error");
                }
                scoreList.Add(scoreList_i[0]);
            }
            return(scoreList);
        }
Exemple #4
0
        //compute grad of: sum{-log{P(y*|x,w)}} + R(w)
        public double getGrad_BFGS(List <double> g, model m, dataSet X)
        {
            double error    = 0;
            int    nFeature = _fGene.NCompleteFeature;

            foreach (dataSeq x in X)
            {
                double err = 0;
                err    = getGradCRF(g, m, x, null);
                error += err;
            }

            if (Global.reg != 0.0)
            {
                for (int f = 0; f < nFeature; f++)
                {
                    g[f] += m.W[f] / (Global.reg * Global.reg);
                }
            }
            if (Global.reg != 0.0)
            {
                float[] tmpWeights = m.W;
                double  sum        = arrayTool.squareSum(tmpWeights);
                error += sum / (2.0 * Global.reg * Global.reg);
            }
            return(error);
        }
Exemple #5
0
        public static void dataSplit(dataSet X, double v1, double v2, dataSet X1, dataSet X2)
        {
            if (v2 < v1)
            {
                throw new Exception("error");
            }
            X1.Clear();
            X2.Clear();
            X1.setDataInfo(X);
            X2.setDataInfo(X);
            int n1 = (int)(X.Count * v1);
            int n2 = (int)(X.Count * v2);

            for (int i = 0; i < X.Count; i++)
            {
                if (i >= n1 && i < n2)
                {
                    X1.Add(X[i]);
                }
                else
                {
                    X2.Add(X[i]);
                }
            }
        }
Exemple #6
0
        static double test()
        {
            Console.WriteLine("reading test data...");
            Global.swLog.WriteLine("reading test data...");
            dataSet XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);

            Console.WriteLine("Done! test data size: {0}", XX.Count);
            Global.swLog.WriteLine("Done! test data size: {0}", XX.Count);
            //load model & feature files for testing
            toolbox tb = new toolbox(XX, false);

            Stopwatch timer = new Stopwatch();

            timer.Start();

            List <double> scoreList = tb.test(XX, 0);

            timer.Stop();
            double time = timer.ElapsedMilliseconds / 1000.0;

            Global.timeList.Add(time);
            double score = scoreList[0];

            Global.scoreListList.Add(scoreList);

            resSummarize.write();
            return(score);
        }
Exemple #7
0
        static void getSegments(dataSeq x, dataSet X2)
        {
            int rand = randomTool.getOneRandom_int(-100, 100);

            if (rand <= 0)//forward
            {
                for (int node = 0; node < x.Count;)
                {
                    int     step = getStep();
                    dataSeq x2   = new dataSeq(x, node, step + Global.overlapLength);
                    X2.Add(x2);
                    node += step;
                }
            }
            else//backward
            {
                for (int node = x.Count - 1; node >= 0;)
                {
                    int     step = getStep();
                    dataSeq x2   = new dataSeq(x, node, step + Global.overlapLength, false);
                    X2.Add(x2);
                    node -= step;
                }
            }
        }
Exemple #8
0
        public static dataSet structSplit(dataSet X)
        {
            //make fractions
            dataSet X2 = new dataSet(X.NTag, X.NFeature);

            for (int t = 0; t < X.Count; t++)
            {
                dataSeq x = X[t];

                if (Global.structReg && Global.miniSize != 0)
                {
                    /*int step = getStep();
                     * //if (x.Count > 4 * step)
                     * if (x.Count > 4 * step && Global.segStep.ToString().Contains(".") == false)//divide x to 2 segments, then do fine segments
                     * {
                     *  int rand = randomTool.getOneRandom_int(step, x.Count - step);
                     *  dataSeq x1 = new dataSeq(x, 0, rand);
                     *  dataSeq x2 = new dataSeq(x, rand, x.Count);
                     *  getSegments(x1, X2);
                     *  getSegments(x2, X2);
                     * }
                     * else*/
                    getSegments(x, X2);
                }
                else
                {
                    X2.Add(x);
                }
            }

            return(X2);
        }
Exemple #9
0
        //this function can be called by train(), cv(), & richEdge.train()
        public static double baseTrain(dataSet XTest, toolbox tb)
        {
            Global.reinitGlobal();
            double score = 0;

            for (int i = 0; i < Global.ttlIter; i++)
            {
                Global.glbIter++;
                Stopwatch timer = new Stopwatch();
                timer.Start();

                double err = tb.train();

                timer.Stop();
                double time = timer.ElapsedMilliseconds / 1000.0;

                Global.timeList.Add(time);
                Global.errList.Add(err);
                Global.diffList.Add(Global.diff);

                List <double> scoreList = tb.test(XTest, i);
                score = scoreList[0];
                Global.scoreListList.Add(scoreList);

                Global.swLog.WriteLine("iter{0}  diff={1}  train-time(sec)={2}  {3}={4}%", Global.glbIter, Global.diff.ToString("e2"), time.ToString("f2"), Global.metric, score.ToString("f2"));
                Global.swLog.WriteLine("------------------------------------------------");
                Global.swLog.Flush();
                Console.WriteLine("iter{0}  diff={1}  train-time(sec)={2}  {3}={4}%", Global.glbIter, Global.diff.ToString("e2"), time.ToString("f2"), Global.metric, score.ToString("f2"));

                //if (Global.diff < Global.convergeTol)
                //break;
            }
            return(score);
        }
Exemple #10
0
        public List <double> test(dataSet X, double iter)
        {
            string outfile = Global.outDir + Global.fOutput;

            Global.swOutput = new StreamWriter(outfile);
            List <double> scoreList;

            if (Global.evalMetric == "tok.acc")
            {
                scoreList = decode_tokAcc(X, _model, iter);
            }
            else if (Global.evalMetric == "str.acc")
            {
                scoreList = decode_strAcc(X, _model, iter);
            }
            else if (Global.evalMetric == "f1")
            {
                scoreList = decode_fscore(X, _model, iter);
            }
            else
            {
                throw new Exception("error");
            }
            Global.swOutput.Close();

            return(scoreList);
        }
Exemple #11
0
        static double train(dataSet X = null, dataSet XX = null)
        {
            //load data
            if (X == null && XX == null)
            {
                Console.WriteLine("\nreading training & test data...");
                Global.swLog.WriteLine("\nreading training & test data...");
                X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);
                dataSizeScale(X);
                Console.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);
                Global.swLog.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);
            }

            double  score = 0;
            toolbox tb    = new toolbox(X, true);

            score = baseTrain(XX, tb);
            resSummarize.write();
            //save model
            if (Global.save == 1)
            {
                tb.Model.save(Global.fModel);
            }

            return(score);
        }
 //for multi-task
 public toolbox(dataSet X, List <dataSet> XList, bool train = true)
 {
     if (train)//to train
     {
         _X         = X;
         _XList     = XList;
         _fGene     = new featureGenerator(X);
         _model     = null;
         _modelList = new List <model>();
         for (int i = 0; i < Global.nTask; i++)
         {
             model m = new model(XList[i], _fGene);
             _modelList.Add(m);
         }
         _inf  = new inference(this);
         _grad = new gradient(this);
         initOptimizer();
     }
     else//to test
     {
         _X         = X;
         _XList     = XList;
         _model     = null;
         _modelList = new List <model>();
         for (int i = 0; i < Global.nTask; i++)
         {
             model m = new model(Global.modelDir + i.ToString() + Global.fModel);
             _modelList.Add(m);
         }
         _fGene = new featureGenerator(X);
         _inf   = new inference(this);
         _grad  = new gradient(this);
     }
 }
Exemple #13
0
        static void crossValidation()
        {
            //load data
            Console.WriteLine("reading cross validation data...");
            Global.swLog.WriteLine("reading cross validation data...");
            List <dataSet> XList  = new List <dataSet>();
            List <dataSet> XXList = new List <dataSet>();

            loadDataForCV(XList, XXList);

            for (int i = 0; i < Global.nCV; i++)
            {
                Global.swLog.WriteLine("\n#validation={0}", i + 1);
                Console.WriteLine("\n#validation={0}", i + 1);
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("% #validation={0}", i + 1);
                }
                dataSet Xi = XList[i];
                toolbox tb = new toolbox(Xi);
                baseTrain(XXList[i], tb);

                resSummarize.write();
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine();
                }
            }
        }
Exemple #14
0
        public dataSet randomShuffle()
        {
            List <int> ri = randomTool <int> .getShuffledIndexList(this.Count);

            dataSet X = new dataSet(this.NTag, this.NFeature);

            foreach (int i in ri)
            {
                X.Add(this[i]);
            }
            return(X);
        }
Exemple #15
0
        static void crossValidation()
        {
            //load data
            Console.WriteLine("reading cross validation data...");
            Global.swLog.WriteLine("reading cross validation data...");
            List <dataSet> XList  = new List <dataSet>();
            List <dataSet> XXList = new List <dataSet>();

            loadDataForCV(XList, XXList);

            //start cross validation
            foreach (double r in Global.regList)//do CV for each different regularizer r (sigma)
            {
                Global.swLog.WriteLine("\ncross validation. r={0}", r);
                Console.WriteLine("\ncross validation. r={0}", r);
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("% cross validation. r={0}", r);
                }
                for (int i = 0; i < Global.nCV; i++)
                {
                    Global.swLog.WriteLine("\n#validation={0}", i + 1);
                    Console.WriteLine("\n#validation={0}", i + 1);
                    if (Global.rawResWrite)
                    {
                        Global.swResRaw.WriteLine("% #validation={0}", i + 1);
                    }
                    Global.reg = r;
                    dataSet Xi = XList[i];
                    if (Global.runMode.Contains("rich"))
                    {
                        toolboxRich tb = new toolboxRich(Xi);
                        basicTrain(XXList[i], tb);
                    }
                    else
                    {
                        toolbox tb = new toolbox(Xi);
                        basicTrain(XXList[i], tb);
                    }

                    resSummarize.write();
                    if (Global.rawResWrite)
                    {
                        Global.swResRaw.WriteLine();
                    }
                }
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine();
                }
            }
        }
Exemple #16
0
        //for train & test
        public featureGenerator(dataSet X)
        {
            _nFeatureTemp = X.NFeatureTemp;
            _nTag         = X.NTag;
            Global.swLog.WriteLine("feature templates: {0}", _nFeatureTemp);

            int nNodeFeature = _nFeatureTemp * _nTag;
            int nEdgeFeature = _nTag * _nTag;

            _backoffEdge      = nNodeFeature;
            _nCompleteFeature = nNodeFeature + nEdgeFeature;
            Global.swLog.WriteLine("complete features: {0}", _nCompleteFeature);
        }
Exemple #17
0
        //for training
        public featureGeneRich(dataSet X)
        {
            _nFeatureTemp = X.NFeatureTemp;
            Global.swLog.WriteLine("feature templates: {0}", _nFeatureTemp);

            _nTag   = X.NTag;
            _nState = X.NTag * Global.nHiddenStatePerTag;
            int nNodeFeatures = _nFeatureTemp * _nState;
            int nEdgeFeatures = _nFeatureTemp * _nState * _nState;

            _backoffEdge      = nNodeFeatures;
            _nCompleteFeature = nNodeFeatures + nEdgeFeatures;
            Global.swLog.WriteLine("complete features: {0}", _nCompleteFeature);
        }
Exemple #18
0
        static void tuneWeightInit()
        {
            //tune good weight init for latent conditional models
            if (Global.modelOptimizer.StartsWith("lsp"))
            {
                Console.WriteLine("\nreading training & test data...");
                Global.swLog.WriteLine("\nreading training & test data...");
                dataSet origX = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                dataSet X     = new dataSet();
                dataSet XX    = new dataSet();
                dataSplit(origX, Global.tuneSplit, X, XX);

                //backup & change setting
                int origTtlIter = Global.ttlIter;
                Global.ttlIter        = Global.iterTuneWeightInit;
                Global.rawResWrite    = false;
                Global.tuneWeightInit = true;

                Global.swTune.WriteLine("tuning weight initialization:");
                Console.WriteLine("tuning weight initialization:");
                double bestScore = 0;
                int    n         = Global.nTuneRound;
                for (int i = 0; i < n; i++)
                {
                    Console.WriteLine("\ntuning-weight-init round {0} (a step before real training!):", i + 1);
                    train(X, XX);
                    double ttlScore = Global.ttlScore;
                    Global.swTune.WriteLine("score: {0}", ttlScore);
                    if (ttlScore > bestScore)
                    {
                        bestScore = ttlScore;
                        if (Global.optimW == null)
                        {
                            Global.optimW = new float[Global.tmpW.Length];
                        }
                        Global.tmpW.CopyTo(Global.optimW, 0);
                    }
                    else
                    {
                        Global.swTune.WriteLine("optimW no update.");
                    }
                }
                Global.swTune.Flush();
                //recover setting
                Global.random         = 2;//2 means to init with optimal weights in toolbox
                Global.ttlIter        = origTtlIter;
                Global.rawResWrite    = true;
                Global.tuneWeightInit = false;
            }
        }
Exemple #19
0
        //re-init global values and conduct training
        static double reinitTrain(dataSet X = null, dataSet XX = null)
        {
            Global.reinitGlobal();
            double score = 0;

            if (Global.runMode.Contains("rich"))
            {
                score = richEdge.train(X, XX);
            }
            else
            {
                score = train(X, XX);
            }
            return(score);
        }
 public static void loadTestData_multi(List <dataSet> XXList)
 {
     XXList.Clear();
     //load test data
     Global.swLog.WriteLine("test data sizes (1, ..., T):");
     for (int i = 0; i < Global.nTask; i++)
     {
         string  dat_i = i.ToString() + Global.fFeatureTest;
         string  tag_i = i.ToString() + Global.fGoldTest;
         dataSet Xtest = new dataSet(dat_i, tag_i);
         Global.swLog.WriteLine(" " + Xtest.Count.ToString());
         XXList.Add(Xtest);
     }
     Global.swLog.WriteLine();
 }
Exemple #21
0
        public static double test()
        {
            dataSet X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
            dataSet XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);

            Global.swLog.WriteLine("data size (test): {0}", XX.Count);
            //load model for testing
            toolboxRich tb = new toolboxRich(X, false);

            List <double> scoreList = tb.test(XX, 0);

            double score = scoreList[0];

            Global.scoreListList.Add(scoreList);
            resSummarize.write();
            return(score);
        }
Exemple #22
0
        //string accuracy
        public List <double> decode_strAcc(dataSet X, model m, double iter)
        {
            double xsize = X.Count;
            double corr  = 0;

            //multi thread
            List <dataSeqTest> X2 = new List <dataSeqTest>();

            multiThreading(X, X2);

            foreach (dataSeqTest x in X2)
            {
                //output tag results
                if (Global.swOutput != null)
                {
                    for (int i = 0; i < x._x.Count; i++)
                    {
                        Global.swOutput.Write(x._yOutput[i].ToString() + ",");
                    }
                    Global.swOutput.WriteLine();
                }

                List <int> goldTags = x._x.getTags();
                bool       ck       = true;
                for (int i = 0; i < x._x.Count; i++)
                {
                    if (goldTags[i] != x._yOutput[i])
                    {
                        ck = false;
                        break;
                    }
                }
                if (ck)
                {
                    corr++;
                }
            }
            double acc = corr / xsize * 100.0;

            Global.swLog.WriteLine("total-tag-strings={0}  correct-tag-strings={1}  string-accuracy={2}%", xsize, corr, acc);
            List <double> scoreList = new List <double>();

            scoreList.Add(acc);
            return(scoreList);
        }
Exemple #23
0
        static double train()
        {
            //load data
            Console.WriteLine("\nreading training & test data...");
            Global.swLog.WriteLine("\nreading training & test data...");
            dataSet X, XX;

            if (Global.runMode.Contains("tune"))//put "tune" related code here because train() could be sub-function of tune()
            {
                dataSet origX = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                X  = new dataSet();
                XX = new dataSet();
                dataSplit(origX, Global.tuneSplit, X, XX);
            }
            else
            {
                X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);
                dataSizeScale(X);
            }
            Console.WriteLine("done! train/test data sizes: {0}/{1}", X.Count, XX.Count);
            Global.swLog.WriteLine("done! train/test data sizes: {0}/{1}", X.Count, XX.Count);
            double score = 0;

            //start training
            foreach (double r in Global.regList)//train on different r (sigma)
            {
                Global.reg = r;
                Global.swLog.WriteLine("\nr: " + r.ToString());
                Console.WriteLine("\nr: " + r.ToString());
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("\n%r: " + r.ToString());
                }
                toolbox tb = new toolbox(X, true);
                score = basicTrain(XX, tb);
                resSummarize.write();//summarize the results & output the summarized results

                if (Global.save == 1)
                {
                    tb.Model.save(Global.fModel);//save model as a .txt file
                }
            }
            return(score);
        }
        //test3 mode in multi-task test: all models vote
        public List <double> test3_multi_mtl(List <List <double> > vecList, List <dataSet> XXList, double iter, List <StreamWriter> swOutputList)
        {
            List <double> scoreList = new List <double>();

            for (int i = 0; i < XXList.Count; i++)
            {
                dataSet       X      = XXList[i];
                List <double> vec    = MainClass.getVecFromX(X);
                model         m      = new model(_modelList[0], false);
                double        ttlCos = 0;
                for (int j = 0; j < vecList.Count; j++)
                {
                    double cos = mathTool.cos(vecList[j], vec);
                    for (int k = 0; k < m.W.Count; k++)
                    {
                        m.W[k] += cos * _modelList[j].W[k];
                        ttlCos += cos;
                    }
                }
                for (int k = 0; k < m.W.Count; k++)
                {
                    m.W[k] /= ttlCos;
                }

                List <double> scoreList_i;
                if (Global.evalMetric == "tok.acc")
                {
                    scoreList_i = decode_tokAcc(X, m, iter, swOutputList[i]);
                }
                else if (Global.evalMetric == "str.acc")
                {
                    scoreList_i = decode_strAcc(X, m, iter, swOutputList[i]);
                }
                else if (Global.evalMetric == "f1")
                {
                    scoreList_i = decode_fscore(X, m, iter, swOutputList[i]);
                }
                else
                {
                    throw new Exception("error");
                }
                scoreList.Add(scoreList_i[0]);
            }
            return(scoreList);
        }
 public toolbox(dataSet X, bool train = true)
 {
     if (train)//for training
     {
         _X     = X;
         _fGene = new featureGenerator(X);
         _model = new model(X, _fGene);
         _inf   = new inference(this);
         initOptimizer();
     }
     else//for test
     {
         _X     = X;
         _model = new model(Global.fModel);
         _fGene = new featureGenerator(X);
         _inf   = new inference(this);
     }
 }
Exemple #26
0
 public model(dataSet X, featureGenerator fGen)
 {
     _nTag = X.NTag;
     //default value is 0
     if (Global.random == 0)
     {
         _w = new float[fGen.NCompleteFeature];
     }
     else if (Global.random == 1)
     {
         List <float> randList = randomDoubleTool.getRandomList_float(fGen.NCompleteFeature);
         _w = randList.ToArray();
     }
     else
     {
         throw new Exception("error");
     }
 }
Exemple #27
0
        //f-score
        public List <double> decode_fscore(dataSet X, model m, double iter)
        {
            //multi thread
            List <dataSeqTest> X2 = new List <dataSeqTest>();

            multiThreading(X, X2);

            List <string> goldTagList = new List <string>();
            List <string> resTagList  = new List <string>();

            foreach (dataSeqTest x in X2)
            {
                string res = "";
                foreach (int im in x._yOutput)
                {
                    res += im.ToString() + ",";
                }
                resTagList.Add(res);

                //output tag results
                if (Global.swOutput != null)
                {
                    for (int i = 0; i < x._yOutput.Count; i++)
                    {
                        Global.swOutput.Write(x._yOutput[i] + ",");
                    }
                    Global.swOutput.WriteLine();
                }

                List <int> goldTags = x._x.getTags();
                string     gold     = "";
                foreach (int im in goldTags)
                {
                    gold += im.ToString() + ",";
                }
                goldTagList.Add(gold);
            }

            List <double> infoList  = new List <double>();
            List <double> scoreList = fscore.getFscore(goldTagList, resTagList, infoList);

            Global.swLog.WriteLine("#gold-chunk={0}  #output-chunk={1}  #correct-output-chunk={2}  precision={3}  recall={4}  f-score={5}", infoList[0], infoList[1], infoList[2], scoreList[1].ToString("f2"), scoreList[2].ToString("f2"), scoreList[0].ToString("f2"));
            return(scoreList);
        }
Exemple #28
0
        public static double train()
        {
            //load data
            Console.WriteLine("\nreading training & test data...");
            Global.swLog.WriteLine("\nreading training & test data...");
            dataSet X, XX;

            if (Global.runMode.Contains("tune"))
            {
                dataSet origX = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                X  = new dataSet();
                XX = new dataSet();
                MainClass.dataSplit(origX, Global.tuneSplit, X, XX);
            }
            else
            {
                X  = new dataSet(Global.fFeatureTrain, Global.fGoldTrain);
                XX = new dataSet(Global.fFeatureTest, Global.fGoldTest);
                MainClass.dataSizeScale(X);
            }
            Global.swLog.WriteLine("data sizes (train, test): {0} {1}", X.Count, XX.Count);

            double score = 0;

            foreach (double r in Global.regList)
            {
                Global.reg = r;
                Global.swLog.WriteLine("\nr: " + r.ToString());
                Console.WriteLine("\nr: " + r.ToString());
                if (Global.rawResWrite)
                {
                    Global.swResRaw.WriteLine("\n%r: " + r.ToString());
                }
                toolboxRich tb = new toolboxRich(X);
                score = MainClass.basicTrain(XX, tb);
                resSummarize.write();
                //save model
                if (Global.save == 1)
                {
                    tb.Model.save(Global.fModel);
                }
            }
            return(score);
        }
        public List <double> decode_fscore(dataSet XX, model m, double iter, StreamWriter swOutput)
        {
            int    nTag = m.NTag;
            double ttl  = XX.Count;

            List <string> goldTagList = new List <string>();
            List <string> resTagList  = new List <string>();

            foreach (dataSeq x in XX)
            {
                //compute detected tags
                List <int> tags = new List <int>();
                double     prob = _inf.decodeViterbi(m, x, tags);

                string res = "";
                foreach (int im in tags)
                {
                    res += im.ToString() + ",";
                }
                resTagList.Add(res);

                //output result tags
                if (swOutput != null)
                {
                    for (int i = 0; i < x.Count; i++)
                    {
                        swOutput.Write(tags[i] + ",");
                    }
                    swOutput.WriteLine();
                }

                List <int> goldTags = x.getTags();
                string     gold     = "";
                foreach (int im in goldTags)
                {
                    gold += im.ToString() + ",";
                }
                goldTagList.Add(gold);
            }
            List <double> infoList  = new List <double>();
            List <double> scoreList = fscore.getFscore(goldTagList, resTagList, infoList);

            return(scoreList);
        }
        public List <double> decode_strAcc(dataSet XX, model m, double iter, StreamWriter swOutput)
        {
            int    nTag    = m.NTag;
            double ttl     = XX.Count;
            double correct = 0;

            foreach (dataSeq x in XX)
            {
                //compute detected tags
                List <int> tags = new List <int>();
                double     prob = _inf.decodeViterbi(m, x, tags);

                //output result tags
                if (swOutput != null)
                {
                    for (int i = 0; i < x.Count; i++)
                    {
                        swOutput.Write(tags[i] + ",");
                    }
                    swOutput.WriteLine();
                }

                List <int> goldTags = x.getTags();
                bool       ck       = true;
                for (int i = 0; i < x.Count; i++)
                {
                    if (goldTags[i] != tags[i])
                    {
                        ck = false;
                        break;
                    }
                }
                if (ck)
                {
                    correct++;
                }
            }
            double        acc       = correct / ttl;
            List <double> scoreList = new List <double>();

            scoreList.Add(acc);
            return(scoreList);
        }