/// <summary>
        /// Measure quality of the training parameters.
        /// </summary>
        /// <param name="parameters">The parameters.</param>
        /// <returns>The evaluated result.</returns>
        public double Evaluate(double[] parameters)
        {
            var contexts    = indexer.GetContexts();
            var values      = indexer.Values;
            var nEventsSeen = indexer.GetNumTimesEventsSeen();
            var outcomeList = indexer.GetOutcomeList();
            var nOutcomes   = outcomeList.Length;
            var nPredLabels = indexer.GetPredLabels().Length;

            var nCorrect     = 0;
            var nTotalEvents = 0;

            for (var ei = 0; ei < contexts.Length; ei++)
            {
                var context = contexts[ei];
                var value   = values == null ? null : values[ei];

                var probs = new double[nOutcomes];

                QNModel.Eval(context, value, probs, nOutcomes, nPredLabels, parameters);

                var outcome = ArrayMath.MaxId(probs);
                if (outcome == outcomeList[ei])
                {
                    nCorrect += nEventsSeen[ei];
                }

                nTotalEvents += nEventsSeen[ei];
            }

            return(nTotalEvents == 0 ? 0 : (double)nCorrect / nTotalEvents);
        }
示例#2
0
        /// <summary>
        /// Gets the negative log-likelihood at the given input vector.
        /// </summary>
        /// <param name="x">The input vector.</param>
        /// <returns>The negative log-likelihood.</returns>
        /// <exception cref="ArgumentException">The <paramref name="x"/> is invalid, its dimension is not equal to domain dimension.</exception>
        public virtual double ValueAt(double[] x)
        {
            if (x.Length != dimension)
            {
                throw new ArgumentException("x is invalid, its dimension is not equal to domain dimension.", nameof(x));
            }

            int    ci;
            double negLogLikelihood = 0;

            for (ci = 0; ci < numContexts; ci++)
            {
                int oi;
                for (oi = 0; oi < numOutcomes; oi++)
                {
                    tempSums[oi] = 0;
                    int ai;
                    for (ai = 0; ai < contexts[ci].Length; ai++)
                    {
                        var vectorIndex = IndexOf(oi, contexts[ci][ai]);
                        var predValue   = values != null ? values[ci][ai] : 1.0;
                        tempSums[oi] += predValue * x[vectorIndex];
                    }
                }

                var logSumOfExps = ArrayMath.LogSumOfExps(tempSums);

                negLogLikelihood -= (tempSums[outcomeList[ci]] - logSumOfExps) * numTimesEventsSeen[ci];
            }

            return(negLogLikelihood);
        }
        private void NegLLCompute(int threadIndex, int startIndex, int length, double[] x)
        {
            negLogLikelihoodThread[threadIndex] = 0;

            // Knuppe: In parallel we can't use the tempSums variable ;)
            var temp = new double[numOutcomes];

            for (var ci = startIndex; ci < startIndex + length; ci++)
            {
                for (var oi = 0; oi < numOutcomes; oi++)
                {
                    temp[oi] = 0;
                    for (var ai = 0; ai < contexts[ci].Length; ai++)
                    {
                        var vectorIndex = IndexOf(oi, contexts[ci][ai]);
                        var predValue   = values != null ? values[ci][ai] : 1.0;
                        temp[oi] += predValue * x[vectorIndex];
                    }
                }

                var logSumOfExps = ArrayMath.LogSumOfExps(temp);

                var outcome = outcomeList[ci];

                negLogLikelihoodThread[threadIndex] -= (temp[outcome] - logSumOfExps) * numTimesEventsSeen[ci];
            }
        }
示例#4
0
        /// <summary>
        /// L-BFGS two-loop recursion (see Nocedal &amp; Wright 2006, Numerical Optimization, p. 178)
        /// </summary>
        /// <param name="direction">The direction.</param>
        private void ComputeDirection(double[] direction)
        {
            // Implemented two-loop Hessian update method.
            var k     = updateInfo.kCounter;
            var rho   = updateInfo.rho;
            var alpha = updateInfo.alpha; // just to avoid recreating alpha
            var S     = updateInfo.S;
            var Y     = updateInfo.Y;

            // First loop
            for (var i = k - 1; i >= 0; i--)
            {
                alpha[i] = rho[i] * ArrayMath.InnerProduct(S[i], direction);
                for (var j = 0; j < dimension; j++)
                {
                    direction[j] = direction[j] - alpha[i] * Y[i][j];
                }
            }

            // Second loop
            for (var i = 0; i < k; i++)
            {
                var beta = rho[i] * ArrayMath.InnerProduct(Y[i], direction);
                for (var j = 0; j < dimension; j++)
                {
                    direction[j] = direction[j] + S[i][j] * (alpha[i] - beta);
                }
            }

            for (var i = 0; i < dimension; i++)
            {
                direction[i] = -direction[i];
            }
        }
示例#5
0
        /// <summary>
        /// Gets the function value at the given input vector.
        /// </summary>
        /// <param name="x">The input vector.</param>
        /// <returns>The function value.</returns>
        public double ValueAt(double[] x)
        {
            CheckDimension(x);
            var value = func.ValueAt(x);

            if (l2Cost > 0)
            {
                value += l2Cost * ArrayMath.InnerProduct(x, x);
            }
            return(value);
        }
示例#6
0
        /// <summary>
        /// Gets the gradient at the given input vector.
        /// </summary>
        /// <param name="x">The input vector.</param>
        /// <returns>The gradient value.</returns>
        /// <exception cref="System.ArgumentException">x is invalid, its dimension is not equal to domain dimension.;x</exception>
        /// <exception cref="ArgumentException">The <paramref name="x" /> is invalid, its dimension is not equal to domain dimension.</exception>
        public virtual double[] GradientAt(double[] x)
        {
            if (x.Length != dimension)
            {
                throw new ArgumentException("x is invalid, its dimension is not equal to domain dimension.", nameof(x));
            }

            int ci;

            // Reset gradient
            for (var i = 0; i < gradient.Length; i++)
            {
                gradient[i] = 0;
            }

            for (ci = 0; ci < numContexts; ci++)
            {
                int    oi;
                double predValue;
                int    vectorIndex;
                int    ai;
                for (oi = 0; oi < numOutcomes; oi++)
                {
                    expectation[oi] = 0;
                    for (ai = 0; ai < contexts[ci].Length; ai++)
                    {
                        vectorIndex      = IndexOf(oi, contexts[ci][ai]);
                        predValue        = values != null ? values[ci][ai] : 1.0;
                        expectation[oi] += predValue * x[vectorIndex];
                    }
                }

                var logSumOfExps = ArrayMath.LogSumOfExps(expectation);

                for (oi = 0; oi < numOutcomes; oi++)
                {
                    expectation[oi] = Math.Exp(expectation[oi] - logSumOfExps);
                }

                for (oi = 0; oi < numOutcomes; oi++)
                {
                    var empirical = outcomeList[ci] == oi ? 1 : 0;
                    for (ai = 0; ai < contexts[ci].Length; ai++)
                    {
                        vectorIndex            = IndexOf(oi, contexts[ci][ai]);
                        predValue              = values != null ? values[ci][ai] : 1.0;
                        gradient[vectorIndex] +=
                            predValue * (expectation[oi] - empirical) * numTimesEventsSeen[ci];
                    }
                }
            }

            return(gradient);
        }
示例#7
0
        private static readonly double RHO = 0.5; // decrease of step size (must be from 0 to 1)

        /// <summary>
        /// Backtracking line search. (see Nocedal &amp; Wright 2006, Numerical Optimization, p. 37)
        /// </summary>
        /// <param name="function">The function.</param>
        /// <param name="direction">The direction.</param>
        /// <param name="lsr">The result.</param>
        /// <param name="initialStepSize">Initial step size.</param>
        public static void DoLineSearch(
            IFunction function,
            double[] direction,
            LineSearchResult lsr,
            double initialStepSize)
        {
            var stepSize         = initialStepSize;
            var currFctEvalCount = lsr.FctEvalCount;
            var x         = lsr.NextPoint;
            var gradAtX   = lsr.GradAtNext;
            var valueAtX  = lsr.ValueAtNext;
            var dimension = x.Length;

            // Retrieve current points and gradient for array reuse purpose
            var    nextPoint       = lsr.CurrPoint;
            var    gradAtNextPoint = lsr.GradAtCurr;
            double valueAtNextPoint;

            var dirGradientAtX = ArrayMath.InnerProduct(direction, gradAtX);

            // To avoid recomputing in the loop
            var cachedProd = C * dirGradientAtX;

            while (true)
            {
                // Get next point
                for (var i = 0; i < dimension; i++)
                {
                    nextPoint[i] = x[i] + direction[i] * stepSize;
                }

                // New value
                valueAtNextPoint = function.ValueAt(nextPoint);

                currFctEvalCount++;

                // Check Armijo condition
                if (valueAtNextPoint <= valueAtX + cachedProd * stepSize)
                {
                    break;
                }



                // Shrink step size
                stepSize *= RHO;
            }

            // Compute and save gradient at the new point
            Array.Copy(function.GradientAt(nextPoint), 0, gradAtNextPoint, 0, gradAtNextPoint.Length);

            // Update line search result
            lsr.SetAll(stepSize, valueAtX, valueAtNextPoint, gradAtX, gradAtNextPoint, x, nextPoint, currFctEvalCount);
        }
示例#8
0
        private bool IsConverged(LineSearchResult lsr)
        {
            // Check function's change rate
            if (lsr.FuncChangeRate < ConvergeTolerance)
            {
                if (monitor != null)
                {
                    Display("Function change rate is smaller than the threshold " + ConvergeTolerance + ".\nTraining will stop.\n\n");
                }

                return(true);
            }

            // Check gradient's norm using the criteria: ||g(x)|| / max(1, ||x||) < threshold
            var xNorm    = Math.Max(1, ArrayMath.L2Norm(lsr.NextPoint));
            var gradNorm = l1Cost > 0 ? ArrayMath.L2Norm(lsr.PseudoGradAtNext) : ArrayMath.L2Norm(lsr.GradAtNext);

            if (gradNorm / xNorm < RelGradNormTol)
            {
                if (monitor != null)
                {
                    Display("Relative L2-norm of the gradient is smaller than the threshold "
                            + RelGradNormTol + ".\nTraining will stop.\n\n");
                }
                return(true);
            }

            // Check step size
            if (lsr.StepSize < MinStepSize)
            {
                if (monitor != null)
                {
                    Display("Step size is smaller than the minimum step size "
                            + MinStepSize + ".\nTraining will stop.\n\n");
                }
                return(true);
            }

            // Check number of function evaluations
            if (lsr.FctEvalCount > maxFctEval)
            {
                if (monitor != null)
                {
                    Display("Maximum number of function evaluations has exceeded the threshold "
                            + maxFctEval + ".\nTraining will stop.\n\n");
                }
                return(true);
            }

            return(false);
        }
        private void GradientCompute(int threadIndex, int startIndex, int length, double[] x)
        {
            var exp = new double[numOutcomes];

            // Reset gradientThread
            Array.Clear(gradientThread[threadIndex], 0, gradientThread[threadIndex].Length);

            for (var ci = startIndex; ci < startIndex + length; ci++)
            {
                double predValue;
                int    vectorIndex;
                for (var oi = 0; oi < numOutcomes; oi++)
                {
                    exp[oi] = 0;
                    for (var ai = 0; ai < contexts[ci].Length; ai++)
                    {
                        vectorIndex = IndexOf(oi, contexts[ci][ai]);
                        predValue   = values != null ? values[ci][ai] : 1.0;
                        exp[oi]    += predValue * x[vectorIndex];
                    }
                }

                var logSumOfExps = ArrayMath.LogSumOfExps(exp);

                for (var oi = 0; oi < numOutcomes; oi++)
                {
                    exp[oi] = Math.Exp(exp[oi] - logSumOfExps);
                }

                for (var oi = 0; oi < numOutcomes; oi++)
                {
                    var empirical = outcomeList[ci] == oi ? 1 : 0;
                    for (var ai = 0; ai < contexts[ci].Length; ai++)
                    {
                        vectorIndex = IndexOf(oi, contexts[ci][ai]);
                        predValue   = values != null ? values[ci][ai] : 1.0;
                        gradientThread[threadIndex][vectorIndex] += predValue * (exp[oi] - empirical) *
                                                                    numTimesEventsSeen[ci];
                    }
                }
            }
        }
示例#10
0
        /// <summary>
        /// Model evaluation which should be used during training to report model accuracy.
        /// </summary>
        /// <param name="context">Indices of the predicates which have been observed at the present decision point.</param>
        /// <param name="values">The weights of the predicates which have been observed at the present decision point.</param>
        /// <param name="probs">The probability for outcomes.</param>
        /// <param name="nOutcomes">The number of outcomes.</param>
        /// <param name="nPredLabels">The number of unique predicates.</param>
        /// <param name="parameters">The model parameters.</param>
        /// <returns>The normalized probabilities for the outcomes given the context.</returns>
        public static double[] Eval(int[] context, float[] values, double[] probs, int nOutcomes, int nPredLabels, double[] parameters)
        {
            for (var i = 0; i < context.Length; i++)
            {
                var predIdx   = context[i];
                var predValue = values != null ? values[i] : 1d;

                for (var oi = 0; oi < nOutcomes; oi++)
                {
                    probs[oi] += predValue * parameters[oi * nPredLabels + predIdx];
                }
            }

            var logSumExp = ArrayMath.LogSumOfExps(probs);

            for (var oi = 0; oi < nOutcomes; oi++)
            {
                probs[oi] = Math.Exp(probs[oi] - logSumExp);
            }


            return(probs);
        }
示例#11
0
        /// <summary>
        /// Model evaluation which should be used during inference.
        /// </summary>
        /// <param name="context">The predicates which have been observed at the present decision point.</param>
        /// <param name="values">The weights of the predicates which have been observed at the present decision point.</param>
        /// <param name="probs">The probability for outcomes.</param>
        /// <returns>The normalized probabilities for the outcomes given the context.</returns>
        public double[] Eval(string[] context, float[] values, double[] probs)
        {
            var ep = evalParameters.Parameters;

            for (var ci = 0; ci < context.Length; ci++)
            {
                var predIdx = GetPredIndex(context[ci]);

                if (predIdx < 0)
                {
                    continue;
                }

                var predValue = 1d;

                if (values != null)
                {
                    predValue = values[ci];
                }

                var outcomes   = ep[predIdx].Outcomes;
                var parameters = ep[predIdx].Parameters;
                for (var i = 0; i < outcomes.Length; i++)
                {
                    probs[outcomes[i]] += predValue * parameters[i];
                }
            }

            var logSumExp = ArrayMath.LogSumOfExps(probs);

            for (var oi = 0; oi < outcomeNames.Length; oi++)
            {
                probs[oi] = Math.Exp(probs[oi] - logSumExp);
            }

            return(probs);
        }
示例#12
0
        /// <summary>
        /// Constrained line search (see section 3.2 in the paper "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al. 2007)
        /// </summary>
        /// <param name="function">The function.</param>
        /// <param name="direction">The direction.</param>
        /// <param name="lsr">The line search result.</param>
        /// <param name="l1Cost">The l1 cost.</param>
        /// <param name="initialStepSize">Initial size of the step.</param>
        public static void DoConstrainedLineSearch(
            IFunction function,
            double[] direction,
            LineSearchResult lsr,
            double l1Cost,
            double initialStepSize)
        {
            var stepSize         = initialStepSize;
            var currFctEvalCount = lsr.FctEvalCount;
            var x             = lsr.NextPoint;
            var signX         = lsr.SignVector; // existing sign vector
            var gradAtX       = lsr.GradAtNext;
            var pseudoGradAtX = lsr.PseudoGradAtNext;
            var valueAtX      = lsr.ValueAtNext;
            var dimension     = x.Length;

            // Retrieve current points and gradient for array reuse purpose
            var    nextPoint       = lsr.CurrPoint;
            var    gradAtNextPoint = lsr.GradAtCurr;
            double valueAtNextPoint;

            // New sign vector
            for (var i = 0; i < dimension; i++)
            {
                signX[i] = x[i].Equals(0d) ? -pseudoGradAtX[i] : x[i];
            }

            while (true)
            {
                // Get next point
                for (var i = 0; i < dimension; i++)
                {
                    nextPoint[i] = x[i] + direction[i] * stepSize;
                }

                // Projection
                for (var i = 0; i < dimension; i++)
                {
                    if (nextPoint[i] * signX[i] <= 0)
                    {
                        nextPoint[i] = 0;
                    }
                }

                // New value
                valueAtNextPoint = function.ValueAt(nextPoint) + l1Cost * ArrayMath.L1Norm(nextPoint);

                currFctEvalCount++;

                double dirGradientAtX = 0;
                for (var i = 0; i < dimension; i++)
                {
                    dirGradientAtX += (nextPoint[i] - x[i]) * pseudoGradAtX[i];
                }

                // Check the sufficient decrease condition
                if (valueAtNextPoint <= valueAtX + C * dirGradientAtX)
                {
                    break;
                }

                // Shrink step size
                stepSize *= RHO;
            }

            // Compute and save gradient at the new point
            Array.Copy(function.GradientAt(nextPoint), 0, gradAtNextPoint, 0, gradAtNextPoint.Length);

            // Update line search result
            lsr.SetAll(stepSize, valueAtX, valueAtNextPoint, gradAtX, gradAtNextPoint, pseudoGradAtX, x, nextPoint,
                       signX, currFctEvalCount);
        }
示例#13
0
        /// <summary>
        /// Find the parameters that minimize the objective function.
        /// </summary>
        /// <param name="function">The objective function.</param>
        /// <returns>The minimizing parameters.</returns>
        /// <exception cref="OperationCanceledException">Occurs when the evaluation monitor cancels the operation.</exception>
        public double[] Minimize(IFunction function)
        {
            var l2RegFunction = new L2RegFunction(function, l2Cost);

            dimension  = l2RegFunction.Dimension;
            updateInfo = new UpdateInfo(updates, dimension);

            // Current point is at the origin
            var currPoint = new double[dimension];
            var currValue = l2RegFunction.ValueAt(currPoint);

            // Gradient at the current point
            var currGrad = new double[dimension];

            Array.Copy(l2RegFunction.GradientAt(currPoint), 0, currGrad, 0, dimension);

            // Pseudo-gradient - only use when L1-regularization is enabled
            double[] pseudoGrad = null;
            if (l1Cost > 0)
            {
                currValue += l1Cost * ArrayMath.L1Norm(currPoint);
                pseudoGrad = new double[dimension];
                ComputePseudoGrad(currPoint, currGrad, pseudoGrad);
            }

            var lsr = l1Cost > 0
                ? LineSearchResult.GetInitialObjectForL1(currValue, currGrad, pseudoGrad, currPoint)
                : LineSearchResult.GetInitialObject(currValue, currGrad, currPoint);

            if (monitor != null)
            {
                Display("\nSolving convex optimization problem.");
                Display("\nObjective function has " + dimension + " variable(s).");
                Display("\n\nPerforming " + iterations + " iterations with " + "L1Cost=" + l1Cost + " and L2Cost=" + l2Cost + "\n");
            }

            var direction = new double[dimension];
            var startTime = DateTime.Now;
            var token     = monitor != null ? monitor.Token : CancellationToken.None;


            // Initial step size for the 1st iteration
            var initialStepSize = l1Cost > 0
                ? ArrayMath.InvL2Norm(lsr.PseudoGradAtNext)
                : ArrayMath.InvL2Norm(lsr.GradAtNext);

            for (var iteration = 1; iteration <= iterations; iteration++)
            {
                // cancel if requested
                token.ThrowIfCancellationRequested();

                // Find direction
                Array.Copy(l1Cost > 0
                    ? lsr.PseudoGradAtNext
                    : lsr.GradAtNext, 0, direction, 0, direction.Length);

                ComputeDirection(direction);

                // Line search
                if (l1Cost > 0)
                {
                    // Constrain the search direction
                    pseudoGrad = lsr.PseudoGradAtNext;

                    for (var i = 0; i < dimension; i++)
                    {
                        if (direction[i] * pseudoGrad[i] >= 0)
                        {
                            direction[i] = 0;
                        }
                    }

                    LineSearch.DoConstrainedLineSearch(l2RegFunction, direction, lsr, l1Cost, initialStepSize);

                    ComputePseudoGrad(lsr.NextPoint, lsr.GradAtNext, pseudoGrad);

                    lsr.PseudoGradAtNext = pseudoGrad;
                }
                else
                {
                    LineSearch.DoLineSearch(l2RegFunction, direction, lsr, initialStepSize);
                }

                // Save Hessian updates
                updateInfo.Update(lsr);

                if (monitor != null)
                {
                    if (iteration < 10)
                    {
                        Display("  " + iteration + ":  ");
                    }
                    else if (iteration < 100)
                    {
                        Display(" " + iteration + ":  ");
                    }
                    else
                    {
                        Display(iteration + ":  ");
                    }

                    if (Evaluator != null)
                    {
                        Display("\t" + lsr.ValueAtNext
                                + "\t" + lsr.FuncChangeRate
                                + "\t" + Evaluator.Evaluate(lsr.NextPoint) + "\n");
                    }
                    else
                    {
                        Display("\t " + lsr.ValueAtNext +
                                "\t" + lsr.FuncChangeRate + "\n");
                    }
                }

                if (IsConverged(lsr))
                {
                    break;
                }

                initialStepSize = InitialStepSize;
            }

            // Undo L2-shrinkage if Elastic Net is used (since in that case, the shrinkage is done twice)
            //
            // Knuppe: The original code makes no sense, so I change the NextPoint value!
            //
            // if (l1Cost > 0 && l2Cost > 0) {
            //     double[] x = lsr.getNextPoint();
            //     for (int i = 0; i < dimension; i++) {
            //         x[i] = Math.sqrt(1 + l2Cost) * x[i];
            //     }
            // }

            if (l1Cost > 0 && l2Cost > 0)
            {
                for (var i = 0; i < dimension; i++)
                {
                    lsr.NextPoint[i] = Math.Sqrt(1 + l2Cost) * lsr.NextPoint[i];
                }
            }

            if (monitor != null)
            {
                var endTime  = DateTime.Now;
                var duration = endTime - startTime;

                Display("Running time: " + duration.TotalSeconds + "s\n");
            }


            // Release memory
            updateInfo = null;

            // Avoid returning the reference to LineSearchResult's member so that GC can
            // collect memory occupied by lsr after this function completes (is it necessary?)
            // double[] parameters = new double[dimension];
            // System.arraycopy(lsr.getNextPoint(), 0, parameters, 0, dimension);

            return(lsr.NextPoint);
        }