/// <summary>
        /// Slowest possible way to calculate a derivative for a model: exhaustive definitional calculation, using the super
        /// slow logLikelihood function from this test suite.
        /// </summary>
        /// <param name="model">the model the get the derivative for</param>
        /// <param name="weights">the weights to get the derivative at</param>
        /// <returns>the derivative of the log likelihood with respect to the weights</returns>
        private ConcatVector DefinitionOfDerivative(GraphicalModel model, ConcatVector weights)
        {
            double       epsilon      = 1.0e-7;
            ConcatVector goldGradient = new ConcatVector(ConcatVecComponents);

            for (int i = 0; i < ConcatVecComponents; i++)
            {
                double[] component = new double[ConcatVecComponentLength];
                for (int j = 0; j < ConcatVecComponentLength; j++)
                {
                    // Create a unit vector pointing in the direction of this element of this component
                    ConcatVector unitVectorIJ = new ConcatVector(ConcatVecComponents);
                    unitVectorIJ.SetSparseComponent(i, j, 1.0);
                    // Create a +eps weight vector
                    ConcatVector weightsPlusEpsilon = weights.DeepClone();
                    weightsPlusEpsilon.AddVectorInPlace(unitVectorIJ, epsilon);
                    // Create a -eps weight vector
                    ConcatVector weightsMinusEpsilon = weights.DeepClone();
                    weightsMinusEpsilon.AddVectorInPlace(unitVectorIJ, -epsilon);
                    // Use the definition (f(x+eps) - f(x-eps))/(2*eps)
                    component[j] = (LogLikelihood(model, weightsPlusEpsilon) - LogLikelihood(model, weightsMinusEpsilon)) / (2 * epsilon);
                    // If we encounter an impossible assignment, logLikelihood will return negative infinity, which will
                    // screw with the definitional calculation
                    if (double.IsNaN(component[j]))
                    {
                        component[j] = 0.0;
                    }
                }
                goldGradient.SetDenseComponent(i, component);
            }
            return(goldGradient);
        }
        public virtual void TestGetSummaryForInstance(GraphicalModel[] dataset, ConcatVector weights)
        {
            LogLikelihoodDifferentiableFunction fn = new LogLikelihoodDifferentiableFunction();

            foreach (GraphicalModel model in dataset)
            {
                double       goldLogLikelihood = LogLikelihood(model, (ConcatVector)weights);
                ConcatVector goldGradient      = DefinitionOfDerivative(model, (ConcatVector)weights);
                ConcatVector gradient          = new ConcatVector(0);
                double       logLikelihood     = fn.GetSummaryForInstance(model, (ConcatVector)weights, gradient);
                NUnit.Framework.Assert.AreEqual(logLikelihood, Math.Max(1.0e-3, goldLogLikelihood * 1.0e-2), goldLogLikelihood);
                // Our check for gradient similarity involves distance between endpoints of vectors, instead of elementwise
                // similarity, b/c it can be controlled as a percentage
                ConcatVector difference = goldGradient.DeepClone();
                difference.AddVectorInPlace(gradient, -1);
                double distance = Math.Sqrt(difference.DotProduct(difference));
                // The tolerance here is pretty large, since the gold gradient is computed approximately
                // 5% still tells us whether everything is working or not though
                if (distance > 5.0e-2)
                {
                    System.Console.Error.WriteLine("Definitional and calculated gradient differ!");
                    System.Console.Error.WriteLine("Definition approx: " + goldGradient);
                    System.Console.Error.WriteLine("Calculated: " + gradient);
                }
                NUnit.Framework.Assert.AreEqual(distance, 5.0e-2, 0.0);
            }
        }
        // this magic number was arrived at with relation to the CoNLL benchmark, and tinkering
        public override bool UpdateWeights(ConcatVector weights, ConcatVector gradient, double logLikelihood, AbstractBatchOptimizer.OptimizationState optimizationState, bool quiet)
        {
            BacktrackingAdaGradOptimizer.AdaGradOptimizationState s = (BacktrackingAdaGradOptimizer.AdaGradOptimizationState)optimizationState;
            double logLikelihoodChange = logLikelihood - s.lastLogLikelihood;

            if (logLikelihoodChange == 0)
            {
                if (!quiet)
                {
                    log.Info("\tlogLikelihood improvement = 0: quitting");
                }
                return(true);
            }
            else
            {
                // Check if we should backtrack
                if (logLikelihoodChange < 0)
                {
                    // If we should, move the weights back by half, and cut the lastDerivative by half
                    s.lastDerivative.MapInPlace(null);
                    weights.AddVectorInPlace(s.lastDerivative, -1.0);
                    if (!quiet)
                    {
                        log.Info("\tBACKTRACK...");
                    }
                    // if the lastDerivative norm falls below a threshold, it means we've converged
                    if (s.lastDerivative.DotProduct(s.lastDerivative) < 1.0e-10)
                    {
                        if (!quiet)
                        {
                            log.Info("\tBacktracking derivative norm " + s.lastDerivative.DotProduct(s.lastDerivative) + " < 1.0e-9: quitting");
                        }
                        return(true);
                    }
                }
                else
                {
                    // Apply AdaGrad
                    ConcatVector squared = gradient.DeepClone();
                    squared.MapInPlace(null);
                    s.adagradAccumulator.AddVectorInPlace(squared, 1.0);
                    ConcatVector sqrt = s.adagradAccumulator.DeepClone();
                    sqrt.MapInPlace(null);
                    gradient.ElementwiseProductInPlace(sqrt);
                    weights.AddVectorInPlace(gradient, 1.0);
                    // Setup for backtracking, in case necessary
                    s.lastDerivative    = gradient;
                    s.lastLogLikelihood = logLikelihood;
                    if (!quiet)
                    {
                        log.Info("\tLL: " + logLikelihood);
                    }
                }
            }
            return(false);
        }
        internal static long CloneBenchmark(ConcatVector vector)
        {
            long before = Runtime.CurrentTimeMillis();

            for (int i = 0; i < 10000000; i++)
            {
                vector.DeepClone();
            }
            return(Runtime.CurrentTimeMillis() - before);
        }
 public TrainingWorker(AbstractBatchOptimizer _enclosing, T[] dataset, AbstractDifferentiableFunction <T> fn, ConcatVector initialWeights, double l2regularization, double convergenceDerivativeNorm, bool quiet)
 {
     this._enclosing        = _enclosing;
     this.optimizationState = this._enclosing.GetFreshOptimizationState(initialWeights);
     this.weights           = initialWeights.DeepClone();
     this.dataset           = dataset;
     this.fn = fn;
     this.l2regularization          = l2regularization;
     this.convergenceDerivativeNorm = convergenceDerivativeNorm;
     this.quiet = quiet;
 }
Ejemplo n.º 6
0
        public virtual void TestOptimizeLogLikelihood(AbstractBatchOptimizer optimizer, GraphicalModel[] dataset, ConcatVector initialWeights, double l2regularization)
        {
            AbstractDifferentiableFunction <GraphicalModel> ll = new LogLikelihoodDifferentiableFunction();
            ConcatVector finalWeights = optimizer.Optimize((GraphicalModel[])dataset, ll, (ConcatVector)initialWeights, (double)l2regularization, 1.0e-9, true);

            System.Console.Error.WriteLine("Finished optimizing");
            double logLikelihood = GetValueSum((GraphicalModel[])dataset, finalWeights, ll, (double)l2regularization);
            // Check in a whole bunch of random directions really nearby that there is no nearby point with a higher log
            // likelihood
            Random r = new Random(42);

            for (int i = 0; i < 1000; i++)
            {
                int          size            = finalWeights.GetNumberOfComponents();
                ConcatVector randomDirection = new ConcatVector(size);
                for (int j = 0; j < size; j++)
                {
                    double[] dense = new double[finalWeights.IsComponentSparse(j) ? finalWeights.GetSparseIndex(j) + 1 : finalWeights.GetDenseComponent(j).Length];
                    for (int k = 0; k < dense.Length; k++)
                    {
                        dense[k] = (r.NextDouble() - 0.5) * 1.0e-3;
                    }
                    randomDirection.SetDenseComponent(j, dense);
                }
                ConcatVector randomPerturbation = finalWeights.DeepClone();
                randomPerturbation.AddVectorInPlace(randomDirection, 1.0);
                double randomPerturbedLogLikelihood = GetValueSum((GraphicalModel[])dataset, randomPerturbation, ll, (double)l2regularization);
                // Check that we're within a very small margin of error (around 3 decimal places) of the randomly
                // discovered value
                if (logLikelihood < randomPerturbedLogLikelihood - (1.0e-3 * Math.Max(1.0, Math.Abs(logLikelihood))))
                {
                    System.Console.Error.WriteLine("Thought optimal point was: " + logLikelihood);
                    System.Console.Error.WriteLine("Discovered better point: " + randomPerturbedLogLikelihood);
                }
                NUnit.Framework.Assert.IsTrue(logLikelihood >= randomPerturbedLogLikelihood - (1.0e-3 * Math.Max(1.0, Math.Abs(logLikelihood))));
            }
        }
Ejemplo n.º 7
0
 /// <summary>Create an Inference object for a given set of weights, and a model.</summary>
 /// <remarks>
 /// Create an Inference object for a given set of weights, and a model.
 /// <p>
 /// The object is around to facilitate cacheing as an eventual optimization, when models are changing in minor ways
 /// and inference is required several times. Work is done lazily, so is left until actual inference is requested.
 /// </remarks>
 /// <param name="model">the model to be computed over, subject to change in the future</param>
 /// <param name="weights">
 /// the weights to dot product with model features to get log-linear factors, is cloned internally so
 /// that no changes to the weights vector will be reflected by the CliqueTree. If you want to change
 /// the weights, you must create a new CliqueTree.
 /// </param>
 public CliqueTree(GraphicalModel model, ConcatVector weights)
 {
     // This is the metadata key for the model to store an observed value for a variable, as an int
     this.model   = model;
     this.weights = weights.DeepClone();
 }