Exemplo n.º 1
0
        protected void TrainNe(TaggedToken[][] trainSentences, TaggedToken[][] developmentSentences)
        {
            NePerceptron.StartTraining();

            List <int> trainOrder = new List <int>(trainSentences.Length);

            for (int i = 0; i < trainSentences.Length; i++)
            {
                trainOrder.Add(i);
            }

            int bestIterations = 0;

            double bestAccuracy = 0.0;

            for (int iterations = 0; iterations < MaximumNeIterations; iterations++)
            {
                Console.WriteLine($"Starting NE iteration {iterations}");

                int tokenCount = 0;

                Evaluation trainEvaluation = new Evaluation();

                foreach (int sentenceIndex in trainOrder)
                {
                    TaggedToken[] trainSentence = trainSentences[sentenceIndex];

                    if (trainSentence.Length == 0 || trainSentence[0].NeTag < 0)
                    {
                        continue;
                    }

                    TaggedToken[] taggedSentence = new TaggedToken[trainSentence.Length];

                    for (int i = 0; i < trainSentence.Length; i++)
                    {
                        taggedSentence[i] = new TaggedToken(trainSentence[i]);
                    }

                    TagNe(taggedSentence, false);

                    trainEvaluation.Evaluate(taggedSentence, trainSentence);

                    if (!trainEvaluation.CheckNesEqual(taggedSentence, trainSentence))
                    {
                        NeUpdateWeights(taggedSentence, trainSentence);
                    }

                    tokenCount += trainSentence.Length;

                    if (tokenCount > AccumulateLimit)
                    {
                        NePerceptron.AccumulateWeights();

                        tokenCount = 0;
                    }
                }

                Console.WriteLine($"Training set F-score: {trainEvaluation.GetNeFScore()}");

                if (developmentSentences == null)
                {
                    if (iterations == MaximumNeIterations - 1)
                    {
                        NePerceptron.MakeBestWeight();
                    }

                    continue;
                }

                Evaluation developmentEvaluation = new Evaluation();

                foreach (TaggedToken[] developmentSent in developmentSentences)
                {
                    TaggedToken[] taggedSentence = new TaggedToken[developmentSent.Length];

                    for (int i = 0; i < developmentSent.Length; i++)
                    {
                        taggedSentence[i] = new TaggedToken(developmentSent[i]);
                    }

                    TrainingMode = false;

                    TagNe(taggedSentence, true);

                    TrainingMode = true;

                    developmentEvaluation.Evaluate(taggedSentence, developmentSent);
                }

                double developmentAccuracy = developmentEvaluation.GetNeFScore();

                Console.WriteLine($"Development set F-Score: {developmentAccuracy}");

                if ((developmentAccuracy - bestAccuracy) / developmentAccuracy > 0.00025)
                {
                    bestAccuracy = developmentAccuracy;

                    bestIterations = iterations;

                    NePerceptron.MakeBestWeight();
                }
                else if (bestIterations <= iterations - 3)
                {
                    Console.WriteLine("F-score not increasing, we are done.");

                    break;
                }
            }

            NePerceptron.EndTraining();
        }
Exemplo n.º 2
0
        protected void TrainPos(TaggedToken[][] trainSentences, TaggedToken[][] developmentSentences)
        {
            PosPerceptron.StartTraining();

            List <int> trainOrder = new List <int>(trainSentences.Length);

            for (int i = 0; i < trainSentences.Length; i++)
            {
                trainOrder.Add(i);
            }

            int bestIterations = 0;

            double bestAccuracy = 0.0;

            for (int iterations = 0; iterations < MaximumPosIterations; iterations++)
            {
                Console.WriteLine($"Starting POS iteration {iterations}");

                int tokensCount = 0;

                Evaluation trainEvaluation = new Evaluation();

                foreach (int sentenceIndex in trainOrder)
                {
                    TaggedToken[] trainSent = trainSentences[sentenceIndex];

                    if (trainSent.Length == 0 || trainSent[0].PosTag < 0)
                    {
                        continue;
                    }

                    TaggedToken[] taggedSent = new TaggedToken[trainSent.Length];

                    for (int i = 0; i < trainSent.Length; i++)
                    {
                        taggedSent[i] = new TaggedToken(trainSent[i]);
                    }

                    TagPos(taggedSent, false);

                    int oldPosCorrect = trainEvaluation.PosCorrect;

                    trainEvaluation.Evaluate(taggedSent, trainSent);

                    if (trainEvaluation.PosCorrect != oldPosCorrect + trainSent.Length)
                    {
                        PosUpdateWeights(taggedSent, trainSent);
                    }

                    tokensCount += trainSent.Length;

                    if (tokensCount > AccumulateLimit)
                    {
                        PosPerceptron.AccumulateWeights();

                        tokensCount = 0;
                    }
                }

                Console.WriteLine($"Training set accuracy: {trainEvaluation.GetPosAccuracy()}");

                if (developmentSentences == null)
                {
                    if (iterations == MaximumPosIterations - 1)
                    {
                        PosPerceptron.MakeBestWeight();
                    }

                    continue;
                }

                Evaluation developmentEvaluation = new Evaluation();

                foreach (TaggedToken[] developmentSentence in developmentSentences)
                {
                    TaggedToken[] taggedSentence = new TaggedToken[developmentSentence.Length];

                    for (int i = 0; i < developmentSentence.Length; i++)
                    {
                        taggedSentence[i] = new TaggedToken(developmentSentence[i]);
                    }

                    TrainingMode = false;

                    TagPos(taggedSentence, true);

                    TrainingMode = true;

                    developmentEvaluation.Evaluate(taggedSentence, developmentSentence);
                }

                double developmentAccuracy = developmentEvaluation.GetPosAccuracy();

                Console.WriteLine($"Development set accuracy: {developmentAccuracy}");

                if ((developmentAccuracy - bestAccuracy) / developmentAccuracy > 0.00025)
                {
                    bestAccuracy = developmentAccuracy;

                    bestIterations = iterations;

                    PosPerceptron.MakeBestWeight();
                }
                else if (developmentAccuracy > bestAccuracy)
                {
                    PosPerceptron.MakeBestWeight();
                }
                else if (bestIterations <= iterations - 3)
                {
                    Console.WriteLine("Accuracy not increasing, we are done.");

                    break;
                }
            }

            PosPerceptron.EndTraining();
        }