/// <exception cref="System.Exception"/> public virtual void BenchmarkOptimizer() { IList <CoNLLBenchmark.CoNLLSentence> train = GetSentences(DataPath + "conll.iob.4class.train"); IList <CoNLLBenchmark.CoNLLSentence> testA = GetSentences(DataPath + "conll.iob.4class.testa"); IList <CoNLLBenchmark.CoNLLSentence> testB = GetSentences(DataPath + "conll.iob.4class.testb"); IList <CoNLLBenchmark.CoNLLSentence> allData = new List <CoNLLBenchmark.CoNLLSentence>(); Sharpen.Collections.AddAll(allData, train); Sharpen.Collections.AddAll(allData, testA); Sharpen.Collections.AddAll(allData, testB); ICollection <string> tagsSet = new HashSet <string>(); foreach (CoNLLBenchmark.CoNLLSentence sentence in allData) { foreach (string nerTag in sentence.ner) { tagsSet.Add(nerTag); } } IList <string> tags = new List <string>(); Sharpen.Collections.AddAll(tags, tagsSet); embeddings = GetEmbeddings(DataPath + "google-300-trimmed.ser.gz", allData); log.Info("Making the training set..."); ConcatVectorNamespace @namespace = new ConcatVectorNamespace(); int trainSize = train.Count; GraphicalModel[] trainingSet = new GraphicalModel[trainSize]; for (int i = 0; i < trainSize; i++) { if (i % 10 == 0) { log.Info(i + "/" + trainSize); } trainingSet[i] = GenerateSentenceModel(@namespace, train[i], tags); } log.Info("Training system..."); AbstractBatchOptimizer opt = new BacktrackingAdaGradOptimizer(); // This training call is basically what we want the benchmark for. It should take 99% of the wall clock time ConcatVector weights = opt.Optimize(trainingSet, new LogLikelihoodDifferentiableFunction(), @namespace.NewWeightsVector(), 0.01, 1.0e-5, false); log.Info("Testing system..."); // Evaluation method lifted from the CoNLL 2004 perl script IDictionary <string, double> correctChunk = new Dictionary <string, double>(); IDictionary <string, double> foundCorrect = new Dictionary <string, double>(); IDictionary <string, double> foundGuessed = new Dictionary <string, double>(); double correct = 0.0; double total = 0.0; foreach (CoNLLBenchmark.CoNLLSentence sentence_1 in testA) { GraphicalModel model = GenerateSentenceModel(@namespace, sentence_1, tags); int[] guesses = new CliqueTree(model, weights).CalculateMAP(); string[] nerGuesses = new string[guesses.Length]; for (int i_1 = 0; i_1 < guesses.Length; i_1++) { nerGuesses[i_1] = tags[guesses[i_1]]; if (nerGuesses[i_1].Equals(sentence_1.ner[i_1])) { correct++; correctChunk[nerGuesses[i_1]] = correctChunk.GetOrDefault(nerGuesses[i_1], 0.0) + 1; } total++; foundCorrect[sentence_1.ner[i_1]] = foundCorrect.GetOrDefault(sentence_1.ner[i_1], 0.0) + 1; foundGuessed[nerGuesses[i_1]] = foundGuessed.GetOrDefault(nerGuesses[i_1], 0.0) + 1; } } log.Info("\nSystem results:\n"); log.Info("Accuracy: " + (correct / total) + "\n"); foreach (string tag in tags) { double precision = foundGuessed.GetOrDefault(tag, 0.0) == 0 ? 0.0 : correctChunk.GetOrDefault(tag, 0.0) / foundGuessed[tag]; double recall = foundCorrect.GetOrDefault(tag, 0.0) == 0 ? 0.0 : correctChunk.GetOrDefault(tag, 0.0) / foundCorrect[tag]; double f1 = (precision + recall == 0.0) ? 0.0 : (precision * recall * 2) / (precision + recall); log.Info(tag + " (" + foundCorrect.GetOrDefault(tag, 0.0) + ")"); log.Info("\tP:" + precision + " (" + correctChunk.GetOrDefault(tag, 0.0) + "/" + foundGuessed.GetOrDefault(tag, 0.0) + ")"); log.Info("\tR:" + recall + " (" + correctChunk.GetOrDefault(tag, 0.0) + "/" + foundCorrect.GetOrDefault(tag, 0.0) + ")"); log.Info("\tF1:" + f1); } }