Ejemplo n.º 1
0
        public virtual void TestOptimizeLogLikelihood(AbstractBatchOptimizer optimizer, GraphicalModel[] dataset, ConcatVector initialWeights, double l2regularization)
        {
            AbstractDifferentiableFunction <GraphicalModel> ll = new LogLikelihoodDifferentiableFunction();
            ConcatVector finalWeights = optimizer.Optimize((GraphicalModel[])dataset, ll, (ConcatVector)initialWeights, (double)l2regularization, 1.0e-9, true);

            System.Console.Error.WriteLine("Finished optimizing");
            double logLikelihood = GetValueSum((GraphicalModel[])dataset, finalWeights, ll, (double)l2regularization);
            // Check in a whole bunch of random directions really nearby that there is no nearby point with a higher log
            // likelihood
            Random r = new Random(42);

            for (int i = 0; i < 1000; i++)
            {
                int          size            = finalWeights.GetNumberOfComponents();
                ConcatVector randomDirection = new ConcatVector(size);
                for (int j = 0; j < size; j++)
                {
                    double[] dense = new double[finalWeights.IsComponentSparse(j) ? finalWeights.GetSparseIndex(j) + 1 : finalWeights.GetDenseComponent(j).Length];
                    for (int k = 0; k < dense.Length; k++)
                    {
                        dense[k] = (r.NextDouble() - 0.5) * 1.0e-3;
                    }
                    randomDirection.SetDenseComponent(j, dense);
                }
                ConcatVector randomPerturbation = finalWeights.DeepClone();
                randomPerturbation.AddVectorInPlace(randomDirection, 1.0);
                double randomPerturbedLogLikelihood = GetValueSum((GraphicalModel[])dataset, randomPerturbation, ll, (double)l2regularization);
                // Check that we're within a very small margin of error (around 3 decimal places) of the randomly
                // discovered value
                if (logLikelihood < randomPerturbedLogLikelihood - (1.0e-3 * Math.Max(1.0, Math.Abs(logLikelihood))))
                {
                    System.Console.Error.WriteLine("Thought optimal point was: " + logLikelihood);
                    System.Console.Error.WriteLine("Discovered better point: " + randomPerturbedLogLikelihood);
                }
                NUnit.Framework.Assert.IsTrue(logLikelihood >= randomPerturbedLogLikelihood - (1.0e-3 * Math.Max(1.0, Math.Abs(logLikelihood))));
            }
        }
            public virtual void Run()
            {
                // Multithreading stuff
                int numThreads = Math.Max(1, Runtime.GetRuntime().AvailableProcessors());

                IList <T>[] queues = (IList <T>[])(new IList[numThreads]);
                Random      r      = new Random();

                // Allocate work to make estimated cost of work per thread as even as possible
                if (this.useThreads)
                {
                    for (int i = 0; i < numThreads; i++)
                    {
                        queues[i] = new List <T>();
                    }
                    int[] queueEstimatedTotalCost = new int[numThreads];
                    foreach (T datum in this.dataset)
                    {
                        int datumEstimatedCost = this.EstimateRelativeRuntime(datum);
                        int minCostQueue       = 0;
                        for (int i_1 = 0; i_1 < numThreads; i_1++)
                        {
                            if (queueEstimatedTotalCost[i_1] < queueEstimatedTotalCost[minCostQueue])
                            {
                                minCostQueue = i_1;
                            }
                        }
                        queueEstimatedTotalCost[minCostQueue] += datumEstimatedCost;
                        queues[minCostQueue].Add(datum);
                    }
                }
                while (!this.isFinished)
                {
                    // Collect log-likelihood and derivatives
                    long         startTime     = Runtime.CurrentTimeMillis();
                    long         threadWaiting = 0;
                    ConcatVector derivative    = this.weights.NewEmptyClone();
                    double       logLikelihood = 0.0;
                    if (this.useThreads)
                    {
                        AbstractBatchOptimizer.GradientWorker[] workers = new AbstractBatchOptimizer.GradientWorker[numThreads];
                        Thread[] threads = new Thread[numThreads];
                        for (int i = 0; i < workers.Length; i++)
                        {
                            workers[i]             = new AbstractBatchOptimizer.GradientWorker(this, i, numThreads, queues[i], this.fn, this.weights);
                            threads[i]             = new Thread(workers[i]);
                            workers[i].jvmThreadId = threads[i].GetId();
                            threads[i].Start();
                        }
                        // This is for logging
                        long minFinishTime = long.MaxValue;
                        long maxFinishTime = long.MinValue;
                        // This is for re-balancing
                        long minCPUTime    = long.MaxValue;
                        long maxCPUTime    = long.MinValue;
                        int  slowestWorker = 0;
                        int  fastestWorker = 0;
                        for (int i_1 = 0; i_1 < workers.Length; i_1++)
                        {
                            try
                            {
                                threads[i_1].Join();
                            }
                            catch (Exception e)
                            {
                                throw new RuntimeInterruptedException(e);
                            }
                            logLikelihood += workers[i_1].localLogLikelihood;
                            derivative.AddVectorInPlace(workers[i_1].localDerivative, 1.0);
                            if (workers[i_1].finishedAtTime < minFinishTime)
                            {
                                minFinishTime = workers[i_1].finishedAtTime;
                            }
                            if (workers[i_1].finishedAtTime > maxFinishTime)
                            {
                                maxFinishTime = workers[i_1].finishedAtTime;
                            }
                            if (workers[i_1].cpuTimeRequired < minCPUTime)
                            {
                                fastestWorker = i_1;
                                minCPUTime    = workers[i_1].cpuTimeRequired;
                            }
                            if (workers[i_1].cpuTimeRequired > maxCPUTime)
                            {
                                slowestWorker = i_1;
                                maxCPUTime    = workers[i_1].cpuTimeRequired;
                            }
                        }
                        threadWaiting = maxFinishTime - minFinishTime;
                        // Try to reallocate work dynamically to minimize waiting on subsequent rounds
                        // Figure out the percentage of work represented by the waiting
                        double waitingPercentage = (double)(maxCPUTime - minCPUTime) / (double)maxCPUTime;
                        int    needTransferItems = (int)Math.Floor(queues[slowestWorker].Count * waitingPercentage * 0.5);
                        for (int i_2 = 0; i_2 < needTransferItems; i_2++)
                        {
                            int toTransfer = r.NextInt(queues[slowestWorker].Count);
                            T   datum      = queues[slowestWorker][toTransfer];
                            queues[slowestWorker].Remove(toTransfer);
                            queues[fastestWorker].Add(datum);
                        }
                        // Check for user interrupt
                        if (this.isFinished)
                        {
                            return;
                        }
                    }
                    else
                    {
                        foreach (T datum in this.dataset)
                        {
                            System.Diagnostics.Debug.Assert((datum != null));
                            logLikelihood += this.fn.GetSummaryForInstance(datum, this.weights, derivative);
                            // Check for user interrupt
                            if (this.isFinished)
                            {
                                return;
                            }
                        }
                    }
                    logLikelihood /= this.dataset.Length;
                    derivative.MapInPlace(null);
                    long gradientComputationTime = Runtime.CurrentTimeMillis() - startTime;
                    // Regularization
                    logLikelihood = logLikelihood - (this.l2regularization * this.weights.DotProduct(this.weights));
                    derivative.AddVectorInPlace(this.weights, -2 * this.l2regularization);
                    // Zero out the derivative on the components we're holding fixed
                    foreach (AbstractBatchOptimizer.Constraint constraint in this._enclosing.constraints)
                    {
                        constraint.ApplyToDerivative(derivative);
                    }
                    // If our derivative is sufficiently small, we've converged
                    double derivativeNorm = derivative.DotProduct(derivative);
                    if (derivativeNorm < this.convergenceDerivativeNorm)
                    {
                        if (!this.quiet)
                        {
                            AbstractBatchOptimizer.log.Info("Derivative norm " + derivativeNorm + " < " + this.convergenceDerivativeNorm + ": quitting");
                        }
                        break;
                    }
                    // Do the actual computation
                    if (!this.quiet)
                    {
                        AbstractBatchOptimizer.log.Info("[" + gradientComputationTime + " ms, threads waiting " + threadWaiting + " ms]");
                    }
                    bool converged = this._enclosing.UpdateWeights(this.weights, derivative, logLikelihood, this.optimizationState, this.quiet);
                    // Apply constraints to the weights vector
                    foreach (AbstractBatchOptimizer.Constraint constraint_1 in this._enclosing.constraints)
                    {
                        constraint_1.ApplyToWeights(this.weights);
                    }
                    if (converged)
                    {
                        break;
                    }
                }
                lock (this.naturalTerminationBarrier)
                {
                    Sharpen.Runtime.NotifyAll(this.naturalTerminationBarrier);
                }
                this.isFinished = true;
            }
Ejemplo n.º 3
0
        // This sets a gold observation for a model to use as training gold data
        /// <summary>
        /// Gets a summary of the log-likelihood of a singe model at a point
        /// <p>
        /// It assumes that the models have observations for training set as metadata in
        /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING.
        /// </summary>
        /// <remarks>
        /// Gets a summary of the log-likelihood of a singe model at a point
        /// <p>
        /// It assumes that the models have observations for training set as metadata in
        /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. The models can also have observations fixed in
        /// CliqueTree.VARIABLE_OBSERVED_VALUE, but these will be considered fixed and will not be learned against.
        /// </remarks>
        /// <param name="model">the model to find the log-likelihood of</param>
        /// <param name="weights">the weights to use</param>
        /// <returns>the gradient and value of the function at that point</returns>
        public override double GetSummaryForInstance(GraphicalModel model, ConcatVector weights, ConcatVector gradient)
        {
            double logLikelihood = 0.0;

            CliqueTree.MarginalResult result = new CliqueTree(model, weights).CalculateMarginals();
            // Cache everything in preparation for multiple redundant requests for feature vectors
            foreach (GraphicalModel.Factor factor in model.factors)
            {
                factor.featuresTable.CacheVectors();
            }
            // Subtract log partition function
            logLikelihood -= Math.Log(result.partitionFunction);
            // Quit if we have an infinite partition function
            if (double.IsInfinite(logLikelihood))
            {
                return(0.0);
            }
            // Add the determined assignment by training values
            foreach (GraphicalModel.Factor factor_1 in model.factors)
            {
                // Find the assignment, taking both fixed and training observed variables into account
                int[] assignment = new int[factor_1.neigborIndices.Length];
                for (int i = 0; i < assignment.Length; i++)
                {
                    int deterministicValue = GetDeterministicAssignment(result.marginals[factor_1.neigborIndices[i]]);
                    if (deterministicValue != -1)
                    {
                        assignment[i] = deterministicValue;
                    }
                    else
                    {
                        int trainingObservation = System.Convert.ToInt32(model.GetVariableMetaDataByReference(factor_1.neigborIndices[i])[LogLikelihoodDifferentiableFunction.VariableTrainingValue]);
                        assignment[i] = trainingObservation;
                    }
                }
                ConcatVector features = factor_1.featuresTable.GetAssignmentValue(assignment).Get();
                // Add the log-likelihood from this observation to the log-likelihood
                logLikelihood += features.DotProduct(weights);
                // Add the vector from this observation to the gradient
                gradient.AddVectorInPlace(features, 1.0);
            }
            // Take expectations over features given marginals
            // NOTE: This is extremely expensive. Not sure what to do about that
            foreach (GraphicalModel.Factor factor_2 in model.factors)
            {
                // OPTIMIZATION:
                // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd,
                // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal.
                IEnumerator <int[]> fastPassByReferenceIterator = factor_2.featuresTable.FastPassByReferenceIterator();
                int[] assignment = fastPassByReferenceIterator.Current;
                while (true)
                {
                    // calculate assignment prob
                    double assignmentProb = result.jointMarginals[factor_2].GetAssignmentValue(assignment);
                    // subtract this feature set, weighted by the probability of the assignment
                    if (assignmentProb > 0)
                    {
                        gradient.AddVectorInPlace(factor_2.featuresTable.GetAssignmentValue(assignment).Get(), -assignmentProb);
                    }
                    // This mutates the assignment[] array, rather than creating a new one
                    if (fastPassByReferenceIterator.MoveNext())
                    {
                        fastPassByReferenceIterator.Current;
                    }
                    else
                    {
                        break;
                    }
                }
            }
            // Uncache everything, now that the computations have completed
            foreach (GraphicalModel.Factor factor_3 in model.factors)
            {
                factor_3.featuresTable.ReleaseCache();
            }
            return(logLikelihood);
        }