protected void TrainNe(TaggedToken[][] trainSentences, TaggedToken[][] developmentSentences) { NePerceptron.StartTraining(); List <int> trainOrder = new List <int>(trainSentences.Length); for (int i = 0; i < trainSentences.Length; i++) { trainOrder.Add(i); } int bestIterations = 0; double bestAccuracy = 0.0; for (int iterations = 0; iterations < MaximumNeIterations; iterations++) { Console.WriteLine($"Starting NE iteration {iterations}"); int tokenCount = 0; Evaluation trainEvaluation = new Evaluation(); foreach (int sentenceIndex in trainOrder) { TaggedToken[] trainSentence = trainSentences[sentenceIndex]; if (trainSentence.Length == 0 || trainSentence[0].NeTag < 0) { continue; } TaggedToken[] taggedSentence = new TaggedToken[trainSentence.Length]; for (int i = 0; i < trainSentence.Length; i++) { taggedSentence[i] = new TaggedToken(trainSentence[i]); } TagNe(taggedSentence, false); trainEvaluation.Evaluate(taggedSentence, trainSentence); if (!trainEvaluation.CheckNesEqual(taggedSentence, trainSentence)) { NeUpdateWeights(taggedSentence, trainSentence); } tokenCount += trainSentence.Length; if (tokenCount > AccumulateLimit) { NePerceptron.AccumulateWeights(); tokenCount = 0; } } Console.WriteLine($"Training set F-score: {trainEvaluation.GetNeFScore()}"); if (developmentSentences == null) { if (iterations == MaximumNeIterations - 1) { NePerceptron.MakeBestWeight(); } continue; } Evaluation developmentEvaluation = new Evaluation(); foreach (TaggedToken[] developmentSent in developmentSentences) { TaggedToken[] taggedSentence = new TaggedToken[developmentSent.Length]; for (int i = 0; i < developmentSent.Length; i++) { taggedSentence[i] = new TaggedToken(developmentSent[i]); } TrainingMode = false; TagNe(taggedSentence, true); TrainingMode = true; developmentEvaluation.Evaluate(taggedSentence, developmentSent); } double developmentAccuracy = developmentEvaluation.GetNeFScore(); Console.WriteLine($"Development set F-Score: {developmentAccuracy}"); if ((developmentAccuracy - bestAccuracy) / developmentAccuracy > 0.00025) { bestAccuracy = developmentAccuracy; bestIterations = iterations; NePerceptron.MakeBestWeight(); } else if (bestIterations <= iterations - 3) { Console.WriteLine("F-score not increasing, we are done."); break; } } NePerceptron.EndTraining(); }
protected void TrainPos(TaggedToken[][] trainSentences, TaggedToken[][] developmentSentences) { PosPerceptron.StartTraining(); List <int> trainOrder = new List <int>(trainSentences.Length); for (int i = 0; i < trainSentences.Length; i++) { trainOrder.Add(i); } int bestIterations = 0; double bestAccuracy = 0.0; for (int iterations = 0; iterations < MaximumPosIterations; iterations++) { Console.WriteLine($"Starting POS iteration {iterations}"); int tokensCount = 0; Evaluation trainEvaluation = new Evaluation(); foreach (int sentenceIndex in trainOrder) { TaggedToken[] trainSent = trainSentences[sentenceIndex]; if (trainSent.Length == 0 || trainSent[0].PosTag < 0) { continue; } TaggedToken[] taggedSent = new TaggedToken[trainSent.Length]; for (int i = 0; i < trainSent.Length; i++) { taggedSent[i] = new TaggedToken(trainSent[i]); } TagPos(taggedSent, false); int oldPosCorrect = trainEvaluation.PosCorrect; trainEvaluation.Evaluate(taggedSent, trainSent); if (trainEvaluation.PosCorrect != oldPosCorrect + trainSent.Length) { PosUpdateWeights(taggedSent, trainSent); } tokensCount += trainSent.Length; if (tokensCount > AccumulateLimit) { PosPerceptron.AccumulateWeights(); tokensCount = 0; } } Console.WriteLine($"Training set accuracy: {trainEvaluation.GetPosAccuracy()}"); if (developmentSentences == null) { if (iterations == MaximumPosIterations - 1) { PosPerceptron.MakeBestWeight(); } continue; } Evaluation developmentEvaluation = new Evaluation(); foreach (TaggedToken[] developmentSentence in developmentSentences) { TaggedToken[] taggedSentence = new TaggedToken[developmentSentence.Length]; for (int i = 0; i < developmentSentence.Length; i++) { taggedSentence[i] = new TaggedToken(developmentSentence[i]); } TrainingMode = false; TagPos(taggedSentence, true); TrainingMode = true; developmentEvaluation.Evaluate(taggedSentence, developmentSentence); } double developmentAccuracy = developmentEvaluation.GetPosAccuracy(); Console.WriteLine($"Development set accuracy: {developmentAccuracy}"); if ((developmentAccuracy - bestAccuracy) / developmentAccuracy > 0.00025) { bestAccuracy = developmentAccuracy; bestIterations = iterations; PosPerceptron.MakeBestWeight(); } else if (developmentAccuracy > bestAccuracy) { PosPerceptron.MakeBestWeight(); } else if (bestIterations <= iterations - 3) { Console.WriteLine("Accuracy not increasing, we are done."); break; } } PosPerceptron.EndTraining(); }