/// <exception cref="System.Exception"/>
        public virtual void BenchmarkOptimizer()
        {
            IList <CoNLLBenchmark.CoNLLSentence> train   = GetSentences(DataPath + "conll.iob.4class.train");
            IList <CoNLLBenchmark.CoNLLSentence> testA   = GetSentences(DataPath + "conll.iob.4class.testa");
            IList <CoNLLBenchmark.CoNLLSentence> testB   = GetSentences(DataPath + "conll.iob.4class.testb");
            IList <CoNLLBenchmark.CoNLLSentence> allData = new List <CoNLLBenchmark.CoNLLSentence>();

            Sharpen.Collections.AddAll(allData, train);
            Sharpen.Collections.AddAll(allData, testA);
            Sharpen.Collections.AddAll(allData, testB);
            ICollection <string> tagsSet = new HashSet <string>();

            foreach (CoNLLBenchmark.CoNLLSentence sentence in allData)
            {
                foreach (string nerTag in sentence.ner)
                {
                    tagsSet.Add(nerTag);
                }
            }
            IList <string> tags = new List <string>();

            Sharpen.Collections.AddAll(tags, tagsSet);
            embeddings = GetEmbeddings(DataPath + "google-300-trimmed.ser.gz", allData);
            log.Info("Making the training set...");
            ConcatVectorNamespace @namespace = new ConcatVectorNamespace();
            int trainSize = train.Count;

            GraphicalModel[] trainingSet = new GraphicalModel[trainSize];
            for (int i = 0; i < trainSize; i++)
            {
                if (i % 10 == 0)
                {
                    log.Info(i + "/" + trainSize);
                }
                trainingSet[i] = GenerateSentenceModel(@namespace, train[i], tags);
            }
            log.Info("Training system...");
            AbstractBatchOptimizer opt = new BacktrackingAdaGradOptimizer();
            // This training call is basically what we want the benchmark for. It should take 99% of the wall clock time
            ConcatVector weights = opt.Optimize(trainingSet, new LogLikelihoodDifferentiableFunction(), @namespace.NewWeightsVector(), 0.01, 1.0e-5, false);

            log.Info("Testing system...");
            // Evaluation method lifted from the CoNLL 2004 perl script
            IDictionary <string, double> correctChunk = new Dictionary <string, double>();
            IDictionary <string, double> foundCorrect = new Dictionary <string, double>();
            IDictionary <string, double> foundGuessed = new Dictionary <string, double>();
            double correct = 0.0;
            double total   = 0.0;

            foreach (CoNLLBenchmark.CoNLLSentence sentence_1 in testA)
            {
                GraphicalModel model      = GenerateSentenceModel(@namespace, sentence_1, tags);
                int[]          guesses    = new CliqueTree(model, weights).CalculateMAP();
                string[]       nerGuesses = new string[guesses.Length];
                for (int i_1 = 0; i_1 < guesses.Length; i_1++)
                {
                    nerGuesses[i_1] = tags[guesses[i_1]];
                    if (nerGuesses[i_1].Equals(sentence_1.ner[i_1]))
                    {
                        correct++;
                        correctChunk[nerGuesses[i_1]] = correctChunk.GetOrDefault(nerGuesses[i_1], 0.0) + 1;
                    }
                    total++;
                    foundCorrect[sentence_1.ner[i_1]] = foundCorrect.GetOrDefault(sentence_1.ner[i_1], 0.0) + 1;
                    foundGuessed[nerGuesses[i_1]]     = foundGuessed.GetOrDefault(nerGuesses[i_1], 0.0) + 1;
                }
            }
            log.Info("\nSystem results:\n");
            log.Info("Accuracy: " + (correct / total) + "\n");
            foreach (string tag in tags)
            {
                double precision = foundGuessed.GetOrDefault(tag, 0.0) == 0 ? 0.0 : correctChunk.GetOrDefault(tag, 0.0) / foundGuessed[tag];
                double recall    = foundCorrect.GetOrDefault(tag, 0.0) == 0 ? 0.0 : correctChunk.GetOrDefault(tag, 0.0) / foundCorrect[tag];
                double f1        = (precision + recall == 0.0) ? 0.0 : (precision * recall * 2) / (precision + recall);
                log.Info(tag + " (" + foundCorrect.GetOrDefault(tag, 0.0) + ")");
                log.Info("\tP:" + precision + " (" + correctChunk.GetOrDefault(tag, 0.0) + "/" + foundGuessed.GetOrDefault(tag, 0.0) + ")");
                log.Info("\tR:" + recall + " (" + correctChunk.GetOrDefault(tag, 0.0) + "/" + foundCorrect.GetOrDefault(tag, 0.0) + ")");
                log.Info("\tF1:" + f1);
            }
        }
        //////////////////////////////////////////////////////////////
        // This is an implementation of something like MCTS, trying to take advantage of the general speed gains due to fast
        // CliqueTree caching of dot products. It doesn't actually do any clever selection, preferring to select observations
        // at random.
        //////////////////////////////////////////////////////////////
        private static void Gameplay(Random r, GraphicalModel model, ConcatVector weights, ConcatVector[] humanFeatureVectors)
        {
            IList <int> variablesList     = new List <int>();
            IList <int> variableSizesList = new List <int>();

            foreach (GraphicalModel.Factor f in model.factors)
            {
                for (int i = 0; i < f.neigborIndices.Length; i++)
                {
                    int j = f.neigborIndices[i];
                    if (!variablesList.Contains(j))
                    {
                        variablesList.Add(j);
                        variableSizesList.Add(f.featuresTable.GetDimensions()[i]);
                    }
                }
            }
            int[] variables     = variablesList.Stream().MapToInt(null).ToArray();
            int[] variableSizes = variableSizesList.Stream().MapToInt(null).ToArray();
            IList <GamePlayerBenchmark.SampleState> childrenOfRoot = new List <GamePlayerBenchmark.SampleState>();
            CliqueTree tree           = new CliqueTree(model, weights);
            int        initialFactors = model.factors.Count;
            // Run some "samples"
            long start         = Runtime.CurrentTimeMillis();
            long marginalsTime = 0;

            for (int i_1 = 0; i_1 < 1000; i_1++)
            {
                log.Info("\tTaking sample " + i_1);
                Stack <GamePlayerBenchmark.SampleState> stack = new Stack <GamePlayerBenchmark.SampleState>();
                GamePlayerBenchmark.SampleState         state = SelectOrCreateChildAtRandom(r, model, variables, variableSizes, childrenOfRoot, humanFeatureVectors);
                long localMarginalsTime = 0;
                // Each "sample" is 10 moves deep
                for (int j = 0; j < 10; j++)
                {
                    // log.info("\t\tFrame "+j);
                    state.Push(model);
                    System.Diagnostics.Debug.Assert((model.factors.Count == initialFactors + j + 1));
                    ///////////////////////////////////////////////////////////
                    // This is the thing we're really benchmarking
                    ///////////////////////////////////////////////////////////
                    if (state.cachedMarginal == null)
                    {
                        long s = Runtime.CurrentTimeMillis();
                        state.cachedMarginal = tree.CalculateMarginalsJustSingletons();
                        localMarginalsTime  += Runtime.CurrentTimeMillis() - s;
                    }
                    stack.Push(state);
                    state = SelectOrCreateChildAtRandom(r, model, variables, variableSizes, state.children, humanFeatureVectors);
                }
                log.Info("\t\t" + localMarginalsTime + " ms");
                marginalsTime += localMarginalsTime;
                while (!stack.Empty())
                {
                    stack.Pop().Pop(model);
                }
                System.Diagnostics.Debug.Assert((model.factors.Count == initialFactors));
            }
            log.Info("Marginals time: " + marginalsTime + " ms");
            log.Info("Avg time per marginal: " + (marginalsTime / 200) + " ms");
            log.Info("Total time: " + (Runtime.CurrentTimeMillis() - start));
        }
Example #3
0
        // This sets a gold observation for a model to use as training gold data
        /// <summary>
        /// Gets a summary of the log-likelihood of a singe model at a point
        /// <p>
        /// It assumes that the models have observations for training set as metadata in
        /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING.
        /// </summary>
        /// <remarks>
        /// Gets a summary of the log-likelihood of a singe model at a point
        /// <p>
        /// It assumes that the models have observations for training set as metadata in
        /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. The models can also have observations fixed in
        /// CliqueTree.VARIABLE_OBSERVED_VALUE, but these will be considered fixed and will not be learned against.
        /// </remarks>
        /// <param name="model">the model to find the log-likelihood of</param>
        /// <param name="weights">the weights to use</param>
        /// <returns>the gradient and value of the function at that point</returns>
        public override double GetSummaryForInstance(GraphicalModel model, ConcatVector weights, ConcatVector gradient)
        {
            double logLikelihood = 0.0;

            CliqueTree.MarginalResult result = new CliqueTree(model, weights).CalculateMarginals();
            // Cache everything in preparation for multiple redundant requests for feature vectors
            foreach (GraphicalModel.Factor factor in model.factors)
            {
                factor.featuresTable.CacheVectors();
            }
            // Subtract log partition function
            logLikelihood -= Math.Log(result.partitionFunction);
            // Quit if we have an infinite partition function
            if (double.IsInfinite(logLikelihood))
            {
                return(0.0);
            }
            // Add the determined assignment by training values
            foreach (GraphicalModel.Factor factor_1 in model.factors)
            {
                // Find the assignment, taking both fixed and training observed variables into account
                int[] assignment = new int[factor_1.neigborIndices.Length];
                for (int i = 0; i < assignment.Length; i++)
                {
                    int deterministicValue = GetDeterministicAssignment(result.marginals[factor_1.neigborIndices[i]]);
                    if (deterministicValue != -1)
                    {
                        assignment[i] = deterministicValue;
                    }
                    else
                    {
                        int trainingObservation = System.Convert.ToInt32(model.GetVariableMetaDataByReference(factor_1.neigborIndices[i])[LogLikelihoodDifferentiableFunction.VariableTrainingValue]);
                        assignment[i] = trainingObservation;
                    }
                }
                ConcatVector features = factor_1.featuresTable.GetAssignmentValue(assignment).Get();
                // Add the log-likelihood from this observation to the log-likelihood
                logLikelihood += features.DotProduct(weights);
                // Add the vector from this observation to the gradient
                gradient.AddVectorInPlace(features, 1.0);
            }
            // Take expectations over features given marginals
            // NOTE: This is extremely expensive. Not sure what to do about that
            foreach (GraphicalModel.Factor factor_2 in model.factors)
            {
                // OPTIMIZATION:
                // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd,
                // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal.
                IEnumerator <int[]> fastPassByReferenceIterator = factor_2.featuresTable.FastPassByReferenceIterator();
                int[] assignment = fastPassByReferenceIterator.Current;
                while (true)
                {
                    // calculate assignment prob
                    double assignmentProb = result.jointMarginals[factor_2].GetAssignmentValue(assignment);
                    // subtract this feature set, weighted by the probability of the assignment
                    if (assignmentProb > 0)
                    {
                        gradient.AddVectorInPlace(factor_2.featuresTable.GetAssignmentValue(assignment).Get(), -assignmentProb);
                    }
                    // This mutates the assignment[] array, rather than creating a new one
                    if (fastPassByReferenceIterator.MoveNext())
                    {
                        fastPassByReferenceIterator.Current;
                    }
                    else
                    {
                        break;
                    }
                }
            }
            // Uncache everything, now that the computations have completed
            foreach (GraphicalModel.Factor factor_3 in model.factors)
            {
                factor_3.featuresTable.ReleaseCache();
            }
            return(logLikelihood);
        }