// this magic number was arrived at with relation to the CoNLL benchmark, and tinkering public override bool UpdateWeights(ConcatVector weights, ConcatVector gradient, double logLikelihood, AbstractBatchOptimizer.OptimizationState optimizationState, bool quiet) { BacktrackingAdaGradOptimizer.AdaGradOptimizationState s = (BacktrackingAdaGradOptimizer.AdaGradOptimizationState)optimizationState; double logLikelihoodChange = logLikelihood - s.lastLogLikelihood; if (logLikelihoodChange == 0) { if (!quiet) { log.Info("\tlogLikelihood improvement = 0: quitting"); } return(true); } else { // Check if we should backtrack if (logLikelihoodChange < 0) { // If we should, move the weights back by half, and cut the lastDerivative by half s.lastDerivative.MapInPlace(null); weights.AddVectorInPlace(s.lastDerivative, -1.0); if (!quiet) { log.Info("\tBACKTRACK..."); } // if the lastDerivative norm falls below a threshold, it means we've converged if (s.lastDerivative.DotProduct(s.lastDerivative) < 1.0e-10) { if (!quiet) { log.Info("\tBacktracking derivative norm " + s.lastDerivative.DotProduct(s.lastDerivative) + " < 1.0e-9: quitting"); } return(true); } } else { // Apply AdaGrad ConcatVector squared = gradient.DeepClone(); squared.MapInPlace(null); s.adagradAccumulator.AddVectorInPlace(squared, 1.0); ConcatVector sqrt = s.adagradAccumulator.DeepClone(); sqrt.MapInPlace(null); gradient.ElementwiseProductInPlace(sqrt); weights.AddVectorInPlace(gradient, 1.0); // Setup for backtracking, in case necessary s.lastDerivative = gradient; s.lastLogLikelihood = logLikelihood; if (!quiet) { log.Info("\tLL: " + logLikelihood); } } } return(false); }
public virtual void Run() { // Multithreading stuff int numThreads = Math.Max(1, Runtime.GetRuntime().AvailableProcessors()); IList <T>[] queues = (IList <T>[])(new IList[numThreads]); Random r = new Random(); // Allocate work to make estimated cost of work per thread as even as possible if (this.useThreads) { for (int i = 0; i < numThreads; i++) { queues[i] = new List <T>(); } int[] queueEstimatedTotalCost = new int[numThreads]; foreach (T datum in this.dataset) { int datumEstimatedCost = this.EstimateRelativeRuntime(datum); int minCostQueue = 0; for (int i_1 = 0; i_1 < numThreads; i_1++) { if (queueEstimatedTotalCost[i_1] < queueEstimatedTotalCost[minCostQueue]) { minCostQueue = i_1; } } queueEstimatedTotalCost[minCostQueue] += datumEstimatedCost; queues[minCostQueue].Add(datum); } } while (!this.isFinished) { // Collect log-likelihood and derivatives long startTime = Runtime.CurrentTimeMillis(); long threadWaiting = 0; ConcatVector derivative = this.weights.NewEmptyClone(); double logLikelihood = 0.0; if (this.useThreads) { AbstractBatchOptimizer.GradientWorker[] workers = new AbstractBatchOptimizer.GradientWorker[numThreads]; Thread[] threads = new Thread[numThreads]; for (int i = 0; i < workers.Length; i++) { workers[i] = new AbstractBatchOptimizer.GradientWorker(this, i, numThreads, queues[i], this.fn, this.weights); threads[i] = new Thread(workers[i]); workers[i].jvmThreadId = threads[i].GetId(); threads[i].Start(); } // This is for logging long minFinishTime = long.MaxValue; long maxFinishTime = long.MinValue; // This is for re-balancing long minCPUTime = long.MaxValue; long maxCPUTime = long.MinValue; int slowestWorker = 0; int fastestWorker = 0; for (int i_1 = 0; i_1 < workers.Length; i_1++) { try { threads[i_1].Join(); } catch (Exception e) { throw new RuntimeInterruptedException(e); } logLikelihood += workers[i_1].localLogLikelihood; derivative.AddVectorInPlace(workers[i_1].localDerivative, 1.0); if (workers[i_1].finishedAtTime < minFinishTime) { minFinishTime = workers[i_1].finishedAtTime; } if (workers[i_1].finishedAtTime > maxFinishTime) { maxFinishTime = workers[i_1].finishedAtTime; } if (workers[i_1].cpuTimeRequired < minCPUTime) { fastestWorker = i_1; minCPUTime = workers[i_1].cpuTimeRequired; } if (workers[i_1].cpuTimeRequired > maxCPUTime) { slowestWorker = i_1; maxCPUTime = workers[i_1].cpuTimeRequired; } } threadWaiting = maxFinishTime - minFinishTime; // Try to reallocate work dynamically to minimize waiting on subsequent rounds // Figure out the percentage of work represented by the waiting double waitingPercentage = (double)(maxCPUTime - minCPUTime) / (double)maxCPUTime; int needTransferItems = (int)Math.Floor(queues[slowestWorker].Count * waitingPercentage * 0.5); for (int i_2 = 0; i_2 < needTransferItems; i_2++) { int toTransfer = r.NextInt(queues[slowestWorker].Count); T datum = queues[slowestWorker][toTransfer]; queues[slowestWorker].Remove(toTransfer); queues[fastestWorker].Add(datum); } // Check for user interrupt if (this.isFinished) { return; } } else { foreach (T datum in this.dataset) { System.Diagnostics.Debug.Assert((datum != null)); logLikelihood += this.fn.GetSummaryForInstance(datum, this.weights, derivative); // Check for user interrupt if (this.isFinished) { return; } } } logLikelihood /= this.dataset.Length; derivative.MapInPlace(null); long gradientComputationTime = Runtime.CurrentTimeMillis() - startTime; // Regularization logLikelihood = logLikelihood - (this.l2regularization * this.weights.DotProduct(this.weights)); derivative.AddVectorInPlace(this.weights, -2 * this.l2regularization); // Zero out the derivative on the components we're holding fixed foreach (AbstractBatchOptimizer.Constraint constraint in this._enclosing.constraints) { constraint.ApplyToDerivative(derivative); } // If our derivative is sufficiently small, we've converged double derivativeNorm = derivative.DotProduct(derivative); if (derivativeNorm < this.convergenceDerivativeNorm) { if (!this.quiet) { AbstractBatchOptimizer.log.Info("Derivative norm " + derivativeNorm + " < " + this.convergenceDerivativeNorm + ": quitting"); } break; } // Do the actual computation if (!this.quiet) { AbstractBatchOptimizer.log.Info("[" + gradientComputationTime + " ms, threads waiting " + threadWaiting + " ms]"); } bool converged = this._enclosing.UpdateWeights(this.weights, derivative, logLikelihood, this.optimizationState, this.quiet); // Apply constraints to the weights vector foreach (AbstractBatchOptimizer.Constraint constraint_1 in this._enclosing.constraints) { constraint_1.ApplyToWeights(this.weights); } if (converged) { break; } } lock (this.naturalTerminationBarrier) { Sharpen.Runtime.NotifyAll(this.naturalTerminationBarrier); } this.isFinished = true; }