/// <exception cref="System.Exception"/> public virtual void BenchmarkOptimizer() { IList <CoNLLBenchmark.CoNLLSentence> train = GetSentences(DataPath + "conll.iob.4class.train"); IList <CoNLLBenchmark.CoNLLSentence> testA = GetSentences(DataPath + "conll.iob.4class.testa"); IList <CoNLLBenchmark.CoNLLSentence> testB = GetSentences(DataPath + "conll.iob.4class.testb"); IList <CoNLLBenchmark.CoNLLSentence> allData = new List <CoNLLBenchmark.CoNLLSentence>(); Sharpen.Collections.AddAll(allData, train); Sharpen.Collections.AddAll(allData, testA); Sharpen.Collections.AddAll(allData, testB); ICollection <string> tagsSet = new HashSet <string>(); foreach (CoNLLBenchmark.CoNLLSentence sentence in allData) { foreach (string nerTag in sentence.ner) { tagsSet.Add(nerTag); } } IList <string> tags = new List <string>(); Sharpen.Collections.AddAll(tags, tagsSet); embeddings = GetEmbeddings(DataPath + "google-300-trimmed.ser.gz", allData); log.Info("Making the training set..."); ConcatVectorNamespace @namespace = new ConcatVectorNamespace(); int trainSize = train.Count; GraphicalModel[] trainingSet = new GraphicalModel[trainSize]; for (int i = 0; i < trainSize; i++) { if (i % 10 == 0) { log.Info(i + "/" + trainSize); } trainingSet[i] = GenerateSentenceModel(@namespace, train[i], tags); } log.Info("Training system..."); AbstractBatchOptimizer opt = new BacktrackingAdaGradOptimizer(); // This training call is basically what we want the benchmark for. It should take 99% of the wall clock time ConcatVector weights = opt.Optimize(trainingSet, new LogLikelihoodDifferentiableFunction(), @namespace.NewWeightsVector(), 0.01, 1.0e-5, false); log.Info("Testing system..."); // Evaluation method lifted from the CoNLL 2004 perl script IDictionary <string, double> correctChunk = new Dictionary <string, double>(); IDictionary <string, double> foundCorrect = new Dictionary <string, double>(); IDictionary <string, double> foundGuessed = new Dictionary <string, double>(); double correct = 0.0; double total = 0.0; foreach (CoNLLBenchmark.CoNLLSentence sentence_1 in testA) { GraphicalModel model = GenerateSentenceModel(@namespace, sentence_1, tags); int[] guesses = new CliqueTree(model, weights).CalculateMAP(); string[] nerGuesses = new string[guesses.Length]; for (int i_1 = 0; i_1 < guesses.Length; i_1++) { nerGuesses[i_1] = tags[guesses[i_1]]; if (nerGuesses[i_1].Equals(sentence_1.ner[i_1])) { correct++; correctChunk[nerGuesses[i_1]] = correctChunk.GetOrDefault(nerGuesses[i_1], 0.0) + 1; } total++; foundCorrect[sentence_1.ner[i_1]] = foundCorrect.GetOrDefault(sentence_1.ner[i_1], 0.0) + 1; foundGuessed[nerGuesses[i_1]] = foundGuessed.GetOrDefault(nerGuesses[i_1], 0.0) + 1; } } log.Info("\nSystem results:\n"); log.Info("Accuracy: " + (correct / total) + "\n"); foreach (string tag in tags) { double precision = foundGuessed.GetOrDefault(tag, 0.0) == 0 ? 0.0 : correctChunk.GetOrDefault(tag, 0.0) / foundGuessed[tag]; double recall = foundCorrect.GetOrDefault(tag, 0.0) == 0 ? 0.0 : correctChunk.GetOrDefault(tag, 0.0) / foundCorrect[tag]; double f1 = (precision + recall == 0.0) ? 0.0 : (precision * recall * 2) / (precision + recall); log.Info(tag + " (" + foundCorrect.GetOrDefault(tag, 0.0) + ")"); log.Info("\tP:" + precision + " (" + correctChunk.GetOrDefault(tag, 0.0) + "/" + foundGuessed.GetOrDefault(tag, 0.0) + ")"); log.Info("\tR:" + recall + " (" + correctChunk.GetOrDefault(tag, 0.0) + "/" + foundCorrect.GetOrDefault(tag, 0.0) + ")"); log.Info("\tF1:" + f1); } }
////////////////////////////////////////////////////////////// // This is an implementation of something like MCTS, trying to take advantage of the general speed gains due to fast // CliqueTree caching of dot products. It doesn't actually do any clever selection, preferring to select observations // at random. ////////////////////////////////////////////////////////////// private static void Gameplay(Random r, GraphicalModel model, ConcatVector weights, ConcatVector[] humanFeatureVectors) { IList <int> variablesList = new List <int>(); IList <int> variableSizesList = new List <int>(); foreach (GraphicalModel.Factor f in model.factors) { for (int i = 0; i < f.neigborIndices.Length; i++) { int j = f.neigborIndices[i]; if (!variablesList.Contains(j)) { variablesList.Add(j); variableSizesList.Add(f.featuresTable.GetDimensions()[i]); } } } int[] variables = variablesList.Stream().MapToInt(null).ToArray(); int[] variableSizes = variableSizesList.Stream().MapToInt(null).ToArray(); IList <GamePlayerBenchmark.SampleState> childrenOfRoot = new List <GamePlayerBenchmark.SampleState>(); CliqueTree tree = new CliqueTree(model, weights); int initialFactors = model.factors.Count; // Run some "samples" long start = Runtime.CurrentTimeMillis(); long marginalsTime = 0; for (int i_1 = 0; i_1 < 1000; i_1++) { log.Info("\tTaking sample " + i_1); Stack <GamePlayerBenchmark.SampleState> stack = new Stack <GamePlayerBenchmark.SampleState>(); GamePlayerBenchmark.SampleState state = SelectOrCreateChildAtRandom(r, model, variables, variableSizes, childrenOfRoot, humanFeatureVectors); long localMarginalsTime = 0; // Each "sample" is 10 moves deep for (int j = 0; j < 10; j++) { // log.info("\t\tFrame "+j); state.Push(model); System.Diagnostics.Debug.Assert((model.factors.Count == initialFactors + j + 1)); /////////////////////////////////////////////////////////// // This is the thing we're really benchmarking /////////////////////////////////////////////////////////// if (state.cachedMarginal == null) { long s = Runtime.CurrentTimeMillis(); state.cachedMarginal = tree.CalculateMarginalsJustSingletons(); localMarginalsTime += Runtime.CurrentTimeMillis() - s; } stack.Push(state); state = SelectOrCreateChildAtRandom(r, model, variables, variableSizes, state.children, humanFeatureVectors); } log.Info("\t\t" + localMarginalsTime + " ms"); marginalsTime += localMarginalsTime; while (!stack.Empty()) { stack.Pop().Pop(model); } System.Diagnostics.Debug.Assert((model.factors.Count == initialFactors)); } log.Info("Marginals time: " + marginalsTime + " ms"); log.Info("Avg time per marginal: " + (marginalsTime / 200) + " ms"); log.Info("Total time: " + (Runtime.CurrentTimeMillis() - start)); }
// This sets a gold observation for a model to use as training gold data /// <summary> /// Gets a summary of the log-likelihood of a singe model at a point /// <p> /// It assumes that the models have observations for training set as metadata in /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. /// </summary> /// <remarks> /// Gets a summary of the log-likelihood of a singe model at a point /// <p> /// It assumes that the models have observations for training set as metadata in /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. The models can also have observations fixed in /// CliqueTree.VARIABLE_OBSERVED_VALUE, but these will be considered fixed and will not be learned against. /// </remarks> /// <param name="model">the model to find the log-likelihood of</param> /// <param name="weights">the weights to use</param> /// <returns>the gradient and value of the function at that point</returns> public override double GetSummaryForInstance(GraphicalModel model, ConcatVector weights, ConcatVector gradient) { double logLikelihood = 0.0; CliqueTree.MarginalResult result = new CliqueTree(model, weights).CalculateMarginals(); // Cache everything in preparation for multiple redundant requests for feature vectors foreach (GraphicalModel.Factor factor in model.factors) { factor.featuresTable.CacheVectors(); } // Subtract log partition function logLikelihood -= Math.Log(result.partitionFunction); // Quit if we have an infinite partition function if (double.IsInfinite(logLikelihood)) { return(0.0); } // Add the determined assignment by training values foreach (GraphicalModel.Factor factor_1 in model.factors) { // Find the assignment, taking both fixed and training observed variables into account int[] assignment = new int[factor_1.neigborIndices.Length]; for (int i = 0; i < assignment.Length; i++) { int deterministicValue = GetDeterministicAssignment(result.marginals[factor_1.neigborIndices[i]]); if (deterministicValue != -1) { assignment[i] = deterministicValue; } else { int trainingObservation = System.Convert.ToInt32(model.GetVariableMetaDataByReference(factor_1.neigborIndices[i])[LogLikelihoodDifferentiableFunction.VariableTrainingValue]); assignment[i] = trainingObservation; } } ConcatVector features = factor_1.featuresTable.GetAssignmentValue(assignment).Get(); // Add the log-likelihood from this observation to the log-likelihood logLikelihood += features.DotProduct(weights); // Add the vector from this observation to the gradient gradient.AddVectorInPlace(features, 1.0); } // Take expectations over features given marginals // NOTE: This is extremely expensive. Not sure what to do about that foreach (GraphicalModel.Factor factor_2 in model.factors) { // OPTIMIZATION: // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd, // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal. IEnumerator <int[]> fastPassByReferenceIterator = factor_2.featuresTable.FastPassByReferenceIterator(); int[] assignment = fastPassByReferenceIterator.Current; while (true) { // calculate assignment prob double assignmentProb = result.jointMarginals[factor_2].GetAssignmentValue(assignment); // subtract this feature set, weighted by the probability of the assignment if (assignmentProb > 0) { gradient.AddVectorInPlace(factor_2.featuresTable.GetAssignmentValue(assignment).Get(), -assignmentProb); } // This mutates the assignment[] array, rather than creating a new one if (fastPassByReferenceIterator.MoveNext()) { fastPassByReferenceIterator.Current; } else { break; } } } // Uncache everything, now that the computations have completed foreach (GraphicalModel.Factor factor_3 in model.factors) { factor_3.featuresTable.ReleaseCache(); } return(logLikelihood); }