/// <summary> /// Slowest possible way to calculate a derivative for a model: exhaustive definitional calculation, using the super /// slow logLikelihood function from this test suite. /// </summary> /// <param name="model">the model the get the derivative for</param> /// <param name="weights">the weights to get the derivative at</param> /// <returns>the derivative of the log likelihood with respect to the weights</returns> private ConcatVector DefinitionOfDerivative(GraphicalModel model, ConcatVector weights) { double epsilon = 1.0e-7; ConcatVector goldGradient = new ConcatVector(ConcatVecComponents); for (int i = 0; i < ConcatVecComponents; i++) { double[] component = new double[ConcatVecComponentLength]; for (int j = 0; j < ConcatVecComponentLength; j++) { // Create a unit vector pointing in the direction of this element of this component ConcatVector unitVectorIJ = new ConcatVector(ConcatVecComponents); unitVectorIJ.SetSparseComponent(i, j, 1.0); // Create a +eps weight vector ConcatVector weightsPlusEpsilon = weights.DeepClone(); weightsPlusEpsilon.AddVectorInPlace(unitVectorIJ, epsilon); // Create a -eps weight vector ConcatVector weightsMinusEpsilon = weights.DeepClone(); weightsMinusEpsilon.AddVectorInPlace(unitVectorIJ, -epsilon); // Use the definition (f(x+eps) - f(x-eps))/(2*eps) component[j] = (LogLikelihood(model, weightsPlusEpsilon) - LogLikelihood(model, weightsMinusEpsilon)) / (2 * epsilon); // If we encounter an impossible assignment, logLikelihood will return negative infinity, which will // screw with the definitional calculation if (double.IsNaN(component[j])) { component[j] = 0.0; } } goldGradient.SetDenseComponent(i, component); } return(goldGradient); }
public virtual void TestGetSummaryForInstance(GraphicalModel[] dataset, ConcatVector weights) { LogLikelihoodDifferentiableFunction fn = new LogLikelihoodDifferentiableFunction(); foreach (GraphicalModel model in dataset) { double goldLogLikelihood = LogLikelihood(model, (ConcatVector)weights); ConcatVector goldGradient = DefinitionOfDerivative(model, (ConcatVector)weights); ConcatVector gradient = new ConcatVector(0); double logLikelihood = fn.GetSummaryForInstance(model, (ConcatVector)weights, gradient); NUnit.Framework.Assert.AreEqual(logLikelihood, Math.Max(1.0e-3, goldLogLikelihood * 1.0e-2), goldLogLikelihood); // Our check for gradient similarity involves distance between endpoints of vectors, instead of elementwise // similarity, b/c it can be controlled as a percentage ConcatVector difference = goldGradient.DeepClone(); difference.AddVectorInPlace(gradient, -1); double distance = Math.Sqrt(difference.DotProduct(difference)); // The tolerance here is pretty large, since the gold gradient is computed approximately // 5% still tells us whether everything is working or not though if (distance > 5.0e-2) { System.Console.Error.WriteLine("Definitional and calculated gradient differ!"); System.Console.Error.WriteLine("Definition approx: " + goldGradient); System.Console.Error.WriteLine("Calculated: " + gradient); } NUnit.Framework.Assert.AreEqual(distance, 5.0e-2, 0.0); } }
// this magic number was arrived at with relation to the CoNLL benchmark, and tinkering public override bool UpdateWeights(ConcatVector weights, ConcatVector gradient, double logLikelihood, AbstractBatchOptimizer.OptimizationState optimizationState, bool quiet) { BacktrackingAdaGradOptimizer.AdaGradOptimizationState s = (BacktrackingAdaGradOptimizer.AdaGradOptimizationState)optimizationState; double logLikelihoodChange = logLikelihood - s.lastLogLikelihood; if (logLikelihoodChange == 0) { if (!quiet) { log.Info("\tlogLikelihood improvement = 0: quitting"); } return(true); } else { // Check if we should backtrack if (logLikelihoodChange < 0) { // If we should, move the weights back by half, and cut the lastDerivative by half s.lastDerivative.MapInPlace(null); weights.AddVectorInPlace(s.lastDerivative, -1.0); if (!quiet) { log.Info("\tBACKTRACK..."); } // if the lastDerivative norm falls below a threshold, it means we've converged if (s.lastDerivative.DotProduct(s.lastDerivative) < 1.0e-10) { if (!quiet) { log.Info("\tBacktracking derivative norm " + s.lastDerivative.DotProduct(s.lastDerivative) + " < 1.0e-9: quitting"); } return(true); } } else { // Apply AdaGrad ConcatVector squared = gradient.DeepClone(); squared.MapInPlace(null); s.adagradAccumulator.AddVectorInPlace(squared, 1.0); ConcatVector sqrt = s.adagradAccumulator.DeepClone(); sqrt.MapInPlace(null); gradient.ElementwiseProductInPlace(sqrt); weights.AddVectorInPlace(gradient, 1.0); // Setup for backtracking, in case necessary s.lastDerivative = gradient; s.lastLogLikelihood = logLikelihood; if (!quiet) { log.Info("\tLL: " + logLikelihood); } } } return(false); }
internal static long CloneBenchmark(ConcatVector vector) { long before = Runtime.CurrentTimeMillis(); for (int i = 0; i < 10000000; i++) { vector.DeepClone(); } return(Runtime.CurrentTimeMillis() - before); }
public TrainingWorker(AbstractBatchOptimizer _enclosing, T[] dataset, AbstractDifferentiableFunction <T> fn, ConcatVector initialWeights, double l2regularization, double convergenceDerivativeNorm, bool quiet) { this._enclosing = _enclosing; this.optimizationState = this._enclosing.GetFreshOptimizationState(initialWeights); this.weights = initialWeights.DeepClone(); this.dataset = dataset; this.fn = fn; this.l2regularization = l2regularization; this.convergenceDerivativeNorm = convergenceDerivativeNorm; this.quiet = quiet; }
public virtual void TestOptimizeLogLikelihood(AbstractBatchOptimizer optimizer, GraphicalModel[] dataset, ConcatVector initialWeights, double l2regularization) { AbstractDifferentiableFunction <GraphicalModel> ll = new LogLikelihoodDifferentiableFunction(); ConcatVector finalWeights = optimizer.Optimize((GraphicalModel[])dataset, ll, (ConcatVector)initialWeights, (double)l2regularization, 1.0e-9, true); System.Console.Error.WriteLine("Finished optimizing"); double logLikelihood = GetValueSum((GraphicalModel[])dataset, finalWeights, ll, (double)l2regularization); // Check in a whole bunch of random directions really nearby that there is no nearby point with a higher log // likelihood Random r = new Random(42); for (int i = 0; i < 1000; i++) { int size = finalWeights.GetNumberOfComponents(); ConcatVector randomDirection = new ConcatVector(size); for (int j = 0; j < size; j++) { double[] dense = new double[finalWeights.IsComponentSparse(j) ? finalWeights.GetSparseIndex(j) + 1 : finalWeights.GetDenseComponent(j).Length]; for (int k = 0; k < dense.Length; k++) { dense[k] = (r.NextDouble() - 0.5) * 1.0e-3; } randomDirection.SetDenseComponent(j, dense); } ConcatVector randomPerturbation = finalWeights.DeepClone(); randomPerturbation.AddVectorInPlace(randomDirection, 1.0); double randomPerturbedLogLikelihood = GetValueSum((GraphicalModel[])dataset, randomPerturbation, ll, (double)l2regularization); // Check that we're within a very small margin of error (around 3 decimal places) of the randomly // discovered value if (logLikelihood < randomPerturbedLogLikelihood - (1.0e-3 * Math.Max(1.0, Math.Abs(logLikelihood)))) { System.Console.Error.WriteLine("Thought optimal point was: " + logLikelihood); System.Console.Error.WriteLine("Discovered better point: " + randomPerturbedLogLikelihood); } NUnit.Framework.Assert.IsTrue(logLikelihood >= randomPerturbedLogLikelihood - (1.0e-3 * Math.Max(1.0, Math.Abs(logLikelihood)))); } }
/// <summary>Create an Inference object for a given set of weights, and a model.</summary> /// <remarks> /// Create an Inference object for a given set of weights, and a model. /// <p> /// The object is around to facilitate cacheing as an eventual optimization, when models are changing in minor ways /// and inference is required several times. Work is done lazily, so is left until actual inference is requested. /// </remarks> /// <param name="model">the model to be computed over, subject to change in the future</param> /// <param name="weights"> /// the weights to dot product with model features to get log-linear factors, is cloned internally so /// that no changes to the weights vector will be reflected by the CliqueTree. If you want to change /// the weights, you must create a new CliqueTree. /// </param> public CliqueTree(GraphicalModel model, ConcatVector weights) { // This is the metadata key for the model to store an observed value for a variable, as an int this.model = model; this.weights = weights.DeepClone(); }