public virtual void TestGetSummaryForInstance(GraphicalModel[] dataset, ConcatVector weights)
        {
            LogLikelihoodDifferentiableFunction fn = new LogLikelihoodDifferentiableFunction();

            foreach (GraphicalModel model in dataset)
            {
                double       goldLogLikelihood = LogLikelihood(model, (ConcatVector)weights);
                ConcatVector goldGradient      = DefinitionOfDerivative(model, (ConcatVector)weights);
                ConcatVector gradient          = new ConcatVector(0);
                double       logLikelihood     = fn.GetSummaryForInstance(model, (ConcatVector)weights, gradient);
                NUnit.Framework.Assert.AreEqual(logLikelihood, Math.Max(1.0e-3, goldLogLikelihood * 1.0e-2), goldLogLikelihood);
                // Our check for gradient similarity involves distance between endpoints of vectors, instead of elementwise
                // similarity, b/c it can be controlled as a percentage
                ConcatVector difference = goldGradient.DeepClone();
                difference.AddVectorInPlace(gradient, -1);
                double distance = Math.Sqrt(difference.DotProduct(difference));
                // The tolerance here is pretty large, since the gold gradient is computed approximately
                // 5% still tells us whether everything is working or not though
                if (distance > 5.0e-2)
                {
                    System.Console.Error.WriteLine("Definitional and calculated gradient differ!");
                    System.Console.Error.WriteLine("Definition approx: " + goldGradient);
                    System.Console.Error.WriteLine("Calculated: " + gradient);
                }
                NUnit.Framework.Assert.AreEqual(distance, 5.0e-2, 0.0);
            }
        }
Exemplo n.º 2
0
        /*
         * @Theory
         * public void testOptimizeLogLikelihoodWithConstraints(AbstractBatchOptimizer optimizer,
         * @ForAll(sampleSize = 5) @From(LogLikelihoodFunctionTest.GraphicalModelDatasetGenerator.class) GraphicalModel[] dataset,
         * @ForAll(sampleSize = 2) @From(LogLikelihoodFunctionTest.WeightsGenerator.class) ConcatVector initialWeights,
         * @ForAll(sampleSize = 2) @InRange(minDouble = 0.0, maxDouble = 5.0) double l2regularization) throws Exception {
         * Random r = new Random(42);
         *
         * int constraintComponent = r.nextInt(initialWeights.getNumberOfComponents());
         * double constraintValue = r.nextDouble();
         *
         * if (r.nextBoolean()) {
         * optimizer.addSparseConstraint(constraintComponent, 0, constraintValue);
         * } else {
         * optimizer.addDenseConstraint(constraintComponent, new double[]{constraintValue});
         * }
         *
         * // Put in some constraints
         *
         * AbstractDifferentiableFunction<GraphicalModel> ll = new LogLikelihoodDifferentiableFunction();
         * ConcatVector finalWeights = optimizer.optimize(dataset, ll, initialWeights, l2regularization, 1.0e-9, false);
         * System.err.println("Finished optimizing");
         *
         * assertEquals(constraintValue, finalWeights.getValueAt(constraintComponent, 0), 1.0e-9);
         *
         * double logLikelihood = getValueSum(dataset, finalWeights, ll, l2regularization);
         *
         * // Check in a whole bunch of random directions really nearby that there is no nearby point with a higher log
         * // likelihood
         * for (int i = 0; i < 1000; i++) {
         * int size = finalWeights.getNumberOfComponents();
         * ConcatVector randomDirection = new ConcatVector(size);
         * for (int j = 0; j < size; j++) {
         * if (j == constraintComponent) continue;
         * double[] dense = new double[finalWeights.isComponentSparse(j) ? finalWeights.getSparseIndex(j) + 1 : finalWeights.getDenseComponent(j).length];
         * for (int k = 0; k < dense.length; k++) {
         * dense[k] = (r.nextDouble() - 0.5) * 1.0e-3;
         * }
         * randomDirection.setDenseComponent(j, dense);
         * }
         *
         * ConcatVector randomPerturbation = finalWeights.deepClone();
         * randomPerturbation.addVectorInPlace(randomDirection, 1.0);
         *
         * double randomPerturbedLogLikelihood = getValueSum(dataset, randomPerturbation, ll, l2regularization);
         *
         * // Check that we're within a very small margin of error (around 3 decimal places) of the randomly
         * // discovered value
         *
         * if (logLikelihood < randomPerturbedLogLikelihood - (1.0e-3 * Math.max(1.0, Math.abs(logLikelihood)))) {
         * System.err.println("Thought optimal point was: " + logLikelihood);
         * System.err.println("Discovered better point: " + randomPerturbedLogLikelihood);
         * }
         *
         * assertTrue(logLikelihood >= randomPerturbedLogLikelihood - (1.0e-3 * Math.max(1.0, Math.abs(logLikelihood))));
         * }
         * }
         */
        private double GetValueSum <T>(T[] dataset, ConcatVector weights, AbstractDifferentiableFunction <T> fn, double l2regularization)
        {
            double value = 0.0;

            foreach (T t in dataset)
            {
                value += fn.GetSummaryForInstance(t, weights, new ConcatVector(0));
            }
            return((value / dataset.Length) - (weights.DotProduct(weights) * l2regularization));
        }
            public virtual void Run()
            {
                // Multithreading stuff
                int numThreads = Math.Max(1, Runtime.GetRuntime().AvailableProcessors());

                IList <T>[] queues = (IList <T>[])(new IList[numThreads]);
                Random      r      = new Random();

                // Allocate work to make estimated cost of work per thread as even as possible
                if (this.useThreads)
                {
                    for (int i = 0; i < numThreads; i++)
                    {
                        queues[i] = new List <T>();
                    }
                    int[] queueEstimatedTotalCost = new int[numThreads];
                    foreach (T datum in this.dataset)
                    {
                        int datumEstimatedCost = this.EstimateRelativeRuntime(datum);
                        int minCostQueue       = 0;
                        for (int i_1 = 0; i_1 < numThreads; i_1++)
                        {
                            if (queueEstimatedTotalCost[i_1] < queueEstimatedTotalCost[minCostQueue])
                            {
                                minCostQueue = i_1;
                            }
                        }
                        queueEstimatedTotalCost[minCostQueue] += datumEstimatedCost;
                        queues[minCostQueue].Add(datum);
                    }
                }
                while (!this.isFinished)
                {
                    // Collect log-likelihood and derivatives
                    long         startTime     = Runtime.CurrentTimeMillis();
                    long         threadWaiting = 0;
                    ConcatVector derivative    = this.weights.NewEmptyClone();
                    double       logLikelihood = 0.0;
                    if (this.useThreads)
                    {
                        AbstractBatchOptimizer.GradientWorker[] workers = new AbstractBatchOptimizer.GradientWorker[numThreads];
                        Thread[] threads = new Thread[numThreads];
                        for (int i = 0; i < workers.Length; i++)
                        {
                            workers[i]             = new AbstractBatchOptimizer.GradientWorker(this, i, numThreads, queues[i], this.fn, this.weights);
                            threads[i]             = new Thread(workers[i]);
                            workers[i].jvmThreadId = threads[i].GetId();
                            threads[i].Start();
                        }
                        // This is for logging
                        long minFinishTime = long.MaxValue;
                        long maxFinishTime = long.MinValue;
                        // This is for re-balancing
                        long minCPUTime    = long.MaxValue;
                        long maxCPUTime    = long.MinValue;
                        int  slowestWorker = 0;
                        int  fastestWorker = 0;
                        for (int i_1 = 0; i_1 < workers.Length; i_1++)
                        {
                            try
                            {
                                threads[i_1].Join();
                            }
                            catch (Exception e)
                            {
                                throw new RuntimeInterruptedException(e);
                            }
                            logLikelihood += workers[i_1].localLogLikelihood;
                            derivative.AddVectorInPlace(workers[i_1].localDerivative, 1.0);
                            if (workers[i_1].finishedAtTime < minFinishTime)
                            {
                                minFinishTime = workers[i_1].finishedAtTime;
                            }
                            if (workers[i_1].finishedAtTime > maxFinishTime)
                            {
                                maxFinishTime = workers[i_1].finishedAtTime;
                            }
                            if (workers[i_1].cpuTimeRequired < minCPUTime)
                            {
                                fastestWorker = i_1;
                                minCPUTime    = workers[i_1].cpuTimeRequired;
                            }
                            if (workers[i_1].cpuTimeRequired > maxCPUTime)
                            {
                                slowestWorker = i_1;
                                maxCPUTime    = workers[i_1].cpuTimeRequired;
                            }
                        }
                        threadWaiting = maxFinishTime - minFinishTime;
                        // Try to reallocate work dynamically to minimize waiting on subsequent rounds
                        // Figure out the percentage of work represented by the waiting
                        double waitingPercentage = (double)(maxCPUTime - minCPUTime) / (double)maxCPUTime;
                        int    needTransferItems = (int)Math.Floor(queues[slowestWorker].Count * waitingPercentage * 0.5);
                        for (int i_2 = 0; i_2 < needTransferItems; i_2++)
                        {
                            int toTransfer = r.NextInt(queues[slowestWorker].Count);
                            T   datum      = queues[slowestWorker][toTransfer];
                            queues[slowestWorker].Remove(toTransfer);
                            queues[fastestWorker].Add(datum);
                        }
                        // Check for user interrupt
                        if (this.isFinished)
                        {
                            return;
                        }
                    }
                    else
                    {
                        foreach (T datum in this.dataset)
                        {
                            System.Diagnostics.Debug.Assert((datum != null));
                            logLikelihood += this.fn.GetSummaryForInstance(datum, this.weights, derivative);
                            // Check for user interrupt
                            if (this.isFinished)
                            {
                                return;
                            }
                        }
                    }
                    logLikelihood /= this.dataset.Length;
                    derivative.MapInPlace(null);
                    long gradientComputationTime = Runtime.CurrentTimeMillis() - startTime;
                    // Regularization
                    logLikelihood = logLikelihood - (this.l2regularization * this.weights.DotProduct(this.weights));
                    derivative.AddVectorInPlace(this.weights, -2 * this.l2regularization);
                    // Zero out the derivative on the components we're holding fixed
                    foreach (AbstractBatchOptimizer.Constraint constraint in this._enclosing.constraints)
                    {
                        constraint.ApplyToDerivative(derivative);
                    }
                    // If our derivative is sufficiently small, we've converged
                    double derivativeNorm = derivative.DotProduct(derivative);
                    if (derivativeNorm < this.convergenceDerivativeNorm)
                    {
                        if (!this.quiet)
                        {
                            AbstractBatchOptimizer.log.Info("Derivative norm " + derivativeNorm + " < " + this.convergenceDerivativeNorm + ": quitting");
                        }
                        break;
                    }
                    // Do the actual computation
                    if (!this.quiet)
                    {
                        AbstractBatchOptimizer.log.Info("[" + gradientComputationTime + " ms, threads waiting " + threadWaiting + " ms]");
                    }
                    bool converged = this._enclosing.UpdateWeights(this.weights, derivative, logLikelihood, this.optimizationState, this.quiet);
                    // Apply constraints to the weights vector
                    foreach (AbstractBatchOptimizer.Constraint constraint_1 in this._enclosing.constraints)
                    {
                        constraint_1.ApplyToWeights(this.weights);
                    }
                    if (converged)
                    {
                        break;
                    }
                }
                lock (this.naturalTerminationBarrier)
                {
                    Sharpen.Runtime.NotifyAll(this.naturalTerminationBarrier);
                }
                this.isFinished = true;
            }
Exemplo n.º 4
0
        // This sets a gold observation for a model to use as training gold data
        /// <summary>
        /// Gets a summary of the log-likelihood of a singe model at a point
        /// <p>
        /// It assumes that the models have observations for training set as metadata in
        /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING.
        /// </summary>
        /// <remarks>
        /// Gets a summary of the log-likelihood of a singe model at a point
        /// <p>
        /// It assumes that the models have observations for training set as metadata in
        /// LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. The models can also have observations fixed in
        /// CliqueTree.VARIABLE_OBSERVED_VALUE, but these will be considered fixed and will not be learned against.
        /// </remarks>
        /// <param name="model">the model to find the log-likelihood of</param>
        /// <param name="weights">the weights to use</param>
        /// <returns>the gradient and value of the function at that point</returns>
        public override double GetSummaryForInstance(GraphicalModel model, ConcatVector weights, ConcatVector gradient)
        {
            double logLikelihood = 0.0;

            CliqueTree.MarginalResult result = new CliqueTree(model, weights).CalculateMarginals();
            // Cache everything in preparation for multiple redundant requests for feature vectors
            foreach (GraphicalModel.Factor factor in model.factors)
            {
                factor.featuresTable.CacheVectors();
            }
            // Subtract log partition function
            logLikelihood -= Math.Log(result.partitionFunction);
            // Quit if we have an infinite partition function
            if (double.IsInfinite(logLikelihood))
            {
                return(0.0);
            }
            // Add the determined assignment by training values
            foreach (GraphicalModel.Factor factor_1 in model.factors)
            {
                // Find the assignment, taking both fixed and training observed variables into account
                int[] assignment = new int[factor_1.neigborIndices.Length];
                for (int i = 0; i < assignment.Length; i++)
                {
                    int deterministicValue = GetDeterministicAssignment(result.marginals[factor_1.neigborIndices[i]]);
                    if (deterministicValue != -1)
                    {
                        assignment[i] = deterministicValue;
                    }
                    else
                    {
                        int trainingObservation = System.Convert.ToInt32(model.GetVariableMetaDataByReference(factor_1.neigborIndices[i])[LogLikelihoodDifferentiableFunction.VariableTrainingValue]);
                        assignment[i] = trainingObservation;
                    }
                }
                ConcatVector features = factor_1.featuresTable.GetAssignmentValue(assignment).Get();
                // Add the log-likelihood from this observation to the log-likelihood
                logLikelihood += features.DotProduct(weights);
                // Add the vector from this observation to the gradient
                gradient.AddVectorInPlace(features, 1.0);
            }
            // Take expectations over features given marginals
            // NOTE: This is extremely expensive. Not sure what to do about that
            foreach (GraphicalModel.Factor factor_2 in model.factors)
            {
                // OPTIMIZATION:
                // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd,
                // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal.
                IEnumerator <int[]> fastPassByReferenceIterator = factor_2.featuresTable.FastPassByReferenceIterator();
                int[] assignment = fastPassByReferenceIterator.Current;
                while (true)
                {
                    // calculate assignment prob
                    double assignmentProb = result.jointMarginals[factor_2].GetAssignmentValue(assignment);
                    // subtract this feature set, weighted by the probability of the assignment
                    if (assignmentProb > 0)
                    {
                        gradient.AddVectorInPlace(factor_2.featuresTable.GetAssignmentValue(assignment).Get(), -assignmentProb);
                    }
                    // This mutates the assignment[] array, rather than creating a new one
                    if (fastPassByReferenceIterator.MoveNext())
                    {
                        fastPassByReferenceIterator.Current;
                    }
                    else
                    {
                        break;
                    }
                }
            }
            // Uncache everything, now that the computations have completed
            foreach (GraphicalModel.Factor factor_3 in model.factors)
            {
                factor_3.featuresTable.ReleaseCache();
            }
            return(logLikelihood);
        }