Exemplo n.º 1
0
 protected internal override double[] TrainWeights(int[][][][] data, int[][] labels, IEvaluator[] evaluators, int pruneFeatureItr, double[][][][] featureVals)
 {
     if (flags.secondOrderNonLinear)
     {
         CRFNonLinearSecondOrderLogConditionalObjectiveFunction func = new CRFNonLinearSecondOrderLogConditionalObjectiveFunction(data, labels, windowSize, classIndex, labelIndices, map, flags, nodeFeatureIndicesMap.Size(), edgeFeatureIndicesMap.Size
                                                                                                                                      ());
         cliquePotentialFunctionHelper = func;
         double[] allWeights = TrainWeightsUsingNonLinearCRF(func, evaluators);
         Quadruple <double[][], double[][], double[][], double[][]> @params = func.SeparateWeights(allWeights);
         this.inputLayerWeights4Edge  = @params.First();
         this.outputLayerWeights4Edge = @params.Second();
         this.inputLayerWeights       = @params.Third();
         this.outputLayerWeights      = @params.Fourth();
     }
     else
     {
         CRFNonLinearLogConditionalObjectiveFunction func = new CRFNonLinearLogConditionalObjectiveFunction(data, labels, windowSize, classIndex, labelIndices, map, flags, nodeFeatureIndicesMap.Size(), edgeFeatureIndicesMap.Size(), featureVals);
         if (flags.useAdaGradFOBOS)
         {
             func.gradientsOnly = true;
         }
         cliquePotentialFunctionHelper = func;
         double[] allWeights = TrainWeightsUsingNonLinearCRF(func, evaluators);
         Triple <double[][], double[][], double[][]> @params = func.SeparateWeights(allWeights);
         this.linearWeights      = @params.First();
         this.inputLayerWeights  = @params.Second();
         this.outputLayerWeights = @params.Third();
     }
     return(null);
 }
        public virtual ICliquePotentialFunction GetCliquePotentialFunction(double[] x)
        {
            Quadruple <double[][], double[][], double[][], double[][]> allParams = SeparateWeights(x);

            double[][] W4Edge = allParams.First();
            // inputLayerWeights4Edge
            double[][] U4Edge = allParams.Second();
            // outputLayerWeights4Edge
            double[][] W = allParams.Third();
            // inputLayerWeights
            double[][] U = allParams.Fourth();
            // outputLayerWeights
            return(new NonLinearSecondOrderCliquePotentialFunction(W4Edge, U4Edge, W, U, flags));
        }
        // todo [cdm]: Below data[m] --> docData
        /// <summary>Calculates both value and partial derivatives at the point x, and save them internally.</summary>
        protected internal override void Calculate(double[] x)
        {
            double prob = 0.0;
            // the log prob of the sequence given the model, which is the negation of value at this point
            Quadruple <double[][], double[][], double[][], double[][]> allParams = SeparateWeights(x);

            double[][] W4Edge = allParams.First();
            // inputLayerWeights4Edge
            double[][] U4Edge = allParams.Second();
            // outputLayerWeights4Edge
            double[][] W = allParams.Third();
            // inputLayerWeights
            double[][] U = allParams.Fourth();
            // outputLayerWeights
            double[][] Y4Edge = null;
            double[][] Y      = null;
            if (flags.softmaxOutputLayer)
            {
                Y4Edge = new double[U4Edge.Length][];
                for (int i = 0; i < U4Edge.Length; i++)
                {
                    Y4Edge[i] = ArrayMath.Softmax(U4Edge[i]);
                }
                Y = new double[U.Length][];
                for (int i_1 = 0; i_1 < U.Length; i_1++)
                {
                    Y[i_1] = ArrayMath.Softmax(U[i_1]);
                }
            }
            double[][] What4Edge = EmptyW4Edge();
            double[][] Uhat4Edge = EmptyU4Edge();
            double[][] What      = EmptyW();
            double[][] Uhat      = EmptyU();
            // the expectations over counts
            // first index is feature index, second index is of possible labeling
            double[][] eW4Edge = EmptyW4Edge();
            double[][] eU4Edge = EmptyU4Edge();
            double[][] eW      = EmptyW();
            double[][] eU      = EmptyU();
            // iterate over all the documents
            for (int m = 0; m < data.Length; m++)
            {
                int[][][] docData   = data[m];
                int[]     docLabels = labels[m];
                NonLinearSecondOrderCliquePotentialFunction cliquePotentialFunction = new NonLinearSecondOrderCliquePotentialFunction(W4Edge, U4Edge, W, U, flags);
                // make a clique tree for this document
                CRFCliqueTree <string> cliqueTree = CRFCliqueTree.GetCalibratedCliqueTree(docData, labelIndices, numClasses, classIndex, backgroundSymbol, cliquePotentialFunction, null);
                // compute the log probability of the document given the model with the parameters x
                int[] given = new int[window - 1];
                Arrays.Fill(given, classIndex.IndexOf(backgroundSymbol));
                int[] windowLabels = new int[window];
                Arrays.Fill(windowLabels, classIndex.IndexOf(backgroundSymbol));
                if (docLabels.Length > docData.Length)
                {
                    // only true for self-training
                    // fill the given array with the extra docLabels
                    System.Array.Copy(docLabels, 0, given, 0, given.Length);
                    System.Array.Copy(docLabels, 0, windowLabels, 0, windowLabels.Length);
                    // shift the docLabels array left
                    int[] newDocLabels = new int[docData.Length];
                    System.Array.Copy(docLabels, docLabels.Length - newDocLabels.Length, newDocLabels, 0, newDocLabels.Length);
                    docLabels = newDocLabels;
                }
                // iterate over the positions in this document
                for (int i = 0; i < docData.Length; i++)
                {
                    int    label = docLabels[i];
                    double p     = cliqueTree.CondLogProbGivenPrevious(i, label, given);
                    if (Verbose)
                    {
                        log.Info("P(" + label + "|" + ArrayMath.ToString(given) + ")=" + p);
                    }
                    prob += p;
                    System.Array.Copy(given, 1, given, 0, given.Length - 1);
                    given[given.Length - 1] = label;
                }
                // compute the expected counts for this document, which we will need to compute the derivative
                // iterate over the positions in this document
                for (int i_1 = 0; i_1 < docData.Length; i_1++)
                {
                    // for each possible clique at this position
                    System.Array.Copy(windowLabels, 1, windowLabels, 0, window - 1);
                    windowLabels[window - 1] = docLabels[i_1];
                    for (int j = 0; j < docData[i_1].Length; j++)
                    {
                        IIndex <CRFLabel> labelIndex = labelIndices[j];
                        // for each possible labeling for that clique
                        int[]      cliqueFeatures = docData[i_1][j];
                        double[]   As             = null;
                        double[]   fDeriv         = null;
                        double[][] yTimesA        = null;
                        double[]   sumOfYTimesA   = null;
                        int        inputSize;
                        int        outputSize = -1;
                        if (j == 0)
                        {
                            inputSize  = inputLayerSize;
                            outputSize = outputLayerSize;
                            As         = cliquePotentialFunction.HiddenLayerOutput(W, cliqueFeatures, flags, null, j + 1);
                        }
                        else
                        {
                            inputSize  = inputLayerSize4Edge;
                            outputSize = outputLayerSize4Edge;
                            As         = cliquePotentialFunction.HiddenLayerOutput(W4Edge, cliqueFeatures, flags, null, j + 1);
                        }
                        fDeriv = new double[inputSize];
                        double fD = 0;
                        for (int q = 0; q < inputSize; q++)
                        {
                            if (useSigmoid)
                            {
                                fD = As[q] * (1 - As[q]);
                            }
                            else
                            {
                                fD = 1 - As[q] * As[q];
                            }
                            fDeriv[q] = fD;
                        }
                        // calculating yTimesA for softmax
                        if (flags.softmaxOutputLayer)
                        {
                            double val = 0;
                            yTimesA = new double[outputSize][];
                            for (int ii = 0; ii < outputSize; ii++)
                            {
                                yTimesA[ii] = new double[numHiddenUnits];
                            }
                            sumOfYTimesA = new double[outputSize];
                            for (int k = 0; k < outputSize; k++)
                            {
                                double[] Yk = null;
                                if (flags.tieOutputLayer)
                                {
                                    if (j == 0)
                                    {
                                        Yk = Y[0];
                                    }
                                    else
                                    {
                                        Yk = Y4Edge[0];
                                    }
                                }
                                else
                                {
                                    if (j == 0)
                                    {
                                        Yk = Y[k];
                                    }
                                    else
                                    {
                                        Yk = Y4Edge[k];
                                    }
                                }
                                double sum = 0;
                                for (int q_1 = 0; q_1 < inputSize; q_1++)
                                {
                                    if (q_1 % outputSize == k)
                                    {
                                        int hiddenUnitNo = q_1 / outputSize;
                                        val = As[q_1] * Yk[hiddenUnitNo];
                                        yTimesA[k][hiddenUnitNo] = val;
                                        sum += val;
                                    }
                                }
                                sumOfYTimesA[k] = sum;
                            }
                        }
                        // calculating Uhat What
                        int[] cliqueLabel = new int[j + 1];
                        System.Array.Copy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
                        CRFLabel crfLabel        = new CRFLabel(cliqueLabel);
                        int      givenLabelIndex = labelIndex.IndexOf(crfLabel);
                        double[] Uk            = null;
                        double[] UhatK         = null;
                        double[] Yk_1          = null;
                        double[] yTimesAK      = null;
                        double   sumOfYTimesAK = 0;
                        if (flags.tieOutputLayer)
                        {
                            if (j == 0)
                            {
                                Uk    = U[0];
                                UhatK = Uhat[0];
                            }
                            else
                            {
                                Uk    = U4Edge[0];
                                UhatK = Uhat4Edge[0];
                            }
                            if (flags.softmaxOutputLayer)
                            {
                                if (j == 0)
                                {
                                    Yk_1 = Y[0];
                                }
                                else
                                {
                                    Yk_1 = Y4Edge[0];
                                }
                            }
                        }
                        else
                        {
                            if (j == 0)
                            {
                                Uk    = U[givenLabelIndex];
                                UhatK = Uhat[givenLabelIndex];
                            }
                            else
                            {
                                Uk    = U4Edge[givenLabelIndex];
                                UhatK = Uhat4Edge[givenLabelIndex];
                            }
                            if (flags.softmaxOutputLayer)
                            {
                                if (j == 0)
                                {
                                    Yk_1 = Y[givenLabelIndex];
                                }
                                else
                                {
                                    Yk_1 = Y4Edge[givenLabelIndex];
                                }
                            }
                        }
                        if (flags.softmaxOutputLayer)
                        {
                            yTimesAK      = yTimesA[givenLabelIndex];
                            sumOfYTimesAK = sumOfYTimesA[givenLabelIndex];
                        }
                        for (int k_1 = 0; k_1 < inputSize; k_1++)
                        {
                            double deltaK = 1;
                            if (flags.sparseOutputLayer || flags.tieOutputLayer)
                            {
                                if (k_1 % outputSize == givenLabelIndex)
                                {
                                    int hiddenUnitNo = k_1 / outputSize;
                                    if (flags.softmaxOutputLayer)
                                    {
                                        UhatK[hiddenUnitNo] += (yTimesAK[hiddenUnitNo] - Yk_1[hiddenUnitNo] * sumOfYTimesAK);
                                        deltaK *= Yk_1[hiddenUnitNo];
                                    }
                                    else
                                    {
                                        UhatK[hiddenUnitNo] += As[k_1];
                                        deltaK *= Uk[hiddenUnitNo];
                                    }
                                }
                            }
                            else
                            {
                                UhatK[k_1] += As[k_1];
                                if (useOutputLayer)
                                {
                                    deltaK *= Uk[k_1];
                                }
                            }
                            if (useHiddenLayer)
                            {
                                deltaK *= fDeriv[k_1];
                            }
                            if (useOutputLayer)
                            {
                                if (flags.sparseOutputLayer || flags.tieOutputLayer)
                                {
                                    if (k_1 % outputSize == givenLabelIndex)
                                    {
                                        double[] WhatK = null;
                                        if (j == 0)
                                        {
                                            WhatK = What[k_1];
                                        }
                                        else
                                        {
                                            WhatK = What4Edge[k_1];
                                        }
                                        foreach (int cliqueFeature in cliqueFeatures)
                                        {
                                            WhatK[cliqueFeature] += deltaK;
                                        }
                                    }
                                }
                                else
                                {
                                    double[] WhatK = null;
                                    if (j == 0)
                                    {
                                        WhatK = What[k_1];
                                    }
                                    else
                                    {
                                        WhatK = What4Edge[k_1];
                                    }
                                    foreach (int cliqueFeature in cliqueFeatures)
                                    {
                                        WhatK[cliqueFeature] += deltaK;
                                    }
                                }
                            }
                            else
                            {
                                if (k_1 == givenLabelIndex)
                                {
                                    double[] WhatK = null;
                                    if (j == 0)
                                    {
                                        WhatK = What[k_1];
                                    }
                                    else
                                    {
                                        WhatK = What4Edge[k_1];
                                    }
                                    foreach (int cliqueFeature in cliqueFeatures)
                                    {
                                        WhatK[cliqueFeature] += deltaK;
                                    }
                                }
                            }
                        }
                        for (int k_2 = 0; k_2 < labelIndex.Size(); k_2++)
                        {
                            // labelIndex.size() == numClasses
                            int[]  label = labelIndex.Get(k_2).GetLabel();
                            double p     = cliqueTree.Prob(i_1, label);
                            // probability of these labels occurring in this clique with these features
                            double[] Uk2 = null;
                            double[] eUK = null;
                            double[] Yk2 = null;
                            if (flags.tieOutputLayer)
                            {
                                if (j == 0)
                                {
                                    // for node features
                                    Uk2 = U[0];
                                    eUK = eU[0];
                                }
                                else
                                {
                                    Uk2 = U4Edge[0];
                                    eUK = eU4Edge[0];
                                }
                                if (flags.softmaxOutputLayer)
                                {
                                    if (j == 0)
                                    {
                                        Yk2 = Y[0];
                                    }
                                    else
                                    {
                                        Yk2 = Y4Edge[0];
                                    }
                                }
                            }
                            else
                            {
                                if (j == 0)
                                {
                                    Uk2 = U[k_2];
                                    eUK = eU[k_2];
                                }
                                else
                                {
                                    Uk2 = U4Edge[k_2];
                                    eUK = eU4Edge[k_2];
                                }
                                if (flags.softmaxOutputLayer)
                                {
                                    if (j == 0)
                                    {
                                        Yk2 = Y[k_2];
                                    }
                                    else
                                    {
                                        Yk2 = Y4Edge[k_2];
                                    }
                                }
                            }
                            if (useOutputLayer)
                            {
                                for (int q_1 = 0; q_1 < inputSize; q_1++)
                                {
                                    double deltaQ = 1;
                                    if (flags.sparseOutputLayer || flags.tieOutputLayer)
                                    {
                                        if (q_1 % outputSize == k_2)
                                        {
                                            int hiddenUnitNo = q_1 / outputSize;
                                            if (flags.softmaxOutputLayer)
                                            {
                                                eUK[hiddenUnitNo] += (yTimesA[k_2][hiddenUnitNo] - Yk2[hiddenUnitNo] * sumOfYTimesA[k_2]) * p;
                                                deltaQ             = Yk2[hiddenUnitNo];
                                            }
                                            else
                                            {
                                                eUK[hiddenUnitNo] += As[q_1] * p;
                                                deltaQ             = Uk2[hiddenUnitNo];
                                            }
                                        }
                                    }
                                    else
                                    {
                                        eUK[q_1] += As[q_1] * p;
                                        deltaQ    = Uk2[q_1];
                                    }
                                    if (useHiddenLayer)
                                    {
                                        deltaQ *= fDeriv[q_1];
                                    }
                                    if (flags.sparseOutputLayer || flags.tieOutputLayer)
                                    {
                                        if (q_1 % outputSize == k_2)
                                        {
                                            double[] eWq = null;
                                            if (j == 0)
                                            {
                                                eWq = eW[q_1];
                                            }
                                            else
                                            {
                                                eWq = eW4Edge[q_1];
                                            }
                                            foreach (int cliqueFeature in cliqueFeatures)
                                            {
                                                eWq[cliqueFeature] += deltaQ * p;
                                            }
                                        }
                                    }
                                    else
                                    {
                                        double[] eWq = null;
                                        if (j == 0)
                                        {
                                            eWq = eW[q_1];
                                        }
                                        else
                                        {
                                            eWq = eW4Edge[q_1];
                                        }
                                        foreach (int cliqueFeature in cliqueFeatures)
                                        {
                                            eWq[cliqueFeature] += deltaQ * p;
                                        }
                                    }
                                }
                            }
                            else
                            {
                                double deltaK = 1;
                                if (useHiddenLayer)
                                {
                                    deltaK *= fDeriv[k_2];
                                }
                                double[] eWK = null;
                                if (j == 0)
                                {
                                    eWK = eW[k_2];
                                }
                                else
                                {
                                    eWK = eW4Edge[k_2];
                                }
                                foreach (int cliqueFeature in cliqueFeatures)
                                {
                                    eWK[cliqueFeature] += deltaK * p;
                                }
                            }
                        }
                    }
                }
            }
            if (double.IsNaN(prob))
            {
                // shouldn't be the case
                throw new Exception("Got NaN for prob in CRFNonLinearSecondOrderLogConditionalObjectiveFunction.calculate()");
            }
            value = -prob;
            if (Verbose)
            {
                log.Info("value is " + value);
            }
            // compute the partial derivative for each feature by comparing expected counts to empirical counts
            int index = 0;

            for (int i_2 = 0; i_2 < eW4Edge.Length; i_2++)
            {
                for (int j = 0; j < eW4Edge[i_2].Length; j++)
                {
                    derivative[index++] = (eW4Edge[i_2][j] - What4Edge[i_2][j]);
                    if (Verbose)
                    {
                        log.Info("inputLayerWeights4Edge deriv(" + i_2 + "," + j + ") = " + eW4Edge[i_2][j] + " - " + What4Edge[i_2][j] + " = " + derivative[index - 1]);
                    }
                }
            }
            for (int i_3 = 0; i_3 < eW.Length; i_3++)
            {
                for (int j = 0; j < eW[i_3].Length; j++)
                {
                    derivative[index++] = (eW[i_3][j] - What[i_3][j]);
                    if (Verbose)
                    {
                        log.Info("inputLayerWeights deriv(" + i_3 + "," + j + ") = " + eW[i_3][j] + " - " + What[i_3][j] + " = " + derivative[index - 1]);
                    }
                }
            }
            if (index != beforeOutputWeights)
            {
                throw new Exception("after W derivative, index(" + index + ") != beforeOutputWeights(" + beforeOutputWeights + ")");
            }
            if (useOutputLayer)
            {
                for (int i = 0; i_3 < eU4Edge.Length; i_3++)
                {
                    for (int j = 0; j < eU4Edge[i_3].Length; j++)
                    {
                        derivative[index++] = (eU4Edge[i_3][j] - Uhat4Edge[i_3][j]);
                        if (Verbose)
                        {
                            log.Info("outputLayerWeights4Edge deriv(" + i_3 + "," + j + ") = " + eU4Edge[i_3][j] + " - " + Uhat4Edge[i_3][j] + " = " + derivative[index - 1]);
                        }
                    }
                }
                for (int i_1 = 0; i_1 < eU.Length; i_1++)
                {
                    for (int j = 0; j < eU[i_1].Length; j++)
                    {
                        derivative[index++] = (eU[i_1][j] - Uhat[i_1][j]);
                        if (Verbose)
                        {
                            log.Info("outputLayerWeights deriv(" + i_1 + "," + j + ") = " + eU[i_1][j] + " - " + Uhat[i_1][j] + " = " + derivative[index - 1]);
                        }
                    }
                }
            }
            if (index != x.Length)
            {
                throw new Exception("after W derivative, index(" + index + ") != x.length(" + x.Length + ")");
            }
            int regSize = x.Length;

            if (flags.skipOutputRegularization || flags.softmaxOutputLayer)
            {
                regSize = beforeOutputWeights;
            }
            // incorporate priors
            if (prior == QuadraticPrior)
            {
                double sigmaSq = sigma * sigma;
                for (int i = 0; i_3 < regSize; i_3++)
                {
                    double k = 1.0;
                    double w = x[i_3];
                    value           += k * w * w / 2.0 / sigmaSq;
                    derivative[i_3] += k * w / sigmaSq;
                }
            }
            else
            {
                if (prior == HuberPrior)
                {
                    double sigmaSq = sigma * sigma;
                    for (int i = 0; i_3 < regSize; i_3++)
                    {
                        double w    = x[i_3];
                        double wabs = System.Math.Abs(w);
                        if (wabs < epsilon)
                        {
                            value           += w * w / 2.0 / epsilon / sigmaSq;
                            derivative[i_3] += w / epsilon / sigmaSq;
                        }
                        else
                        {
                            value           += (wabs - epsilon / 2) / sigmaSq;
                            derivative[i_3] += ((w < 0.0) ? -1.0 : 1.0) / sigmaSq;
                        }
                    }
                }
                else
                {
                    if (prior == QuarticPrior)
                    {
                        double sigmaQu = sigma * sigma * sigma * sigma;
                        for (int i = 0; i_3 < regSize; i_3++)
                        {
                            double k = 1.0;
                            double w = x[i_3];
                            value           += k * w * w * w * w / 2.0 / sigmaQu;
                            derivative[i_3] += k * w / sigmaQu;
                        }
                    }
                }
            }
        }
        /// <summary>Calculates both value and partial derivatives at the point x, and save them internally.</summary>
        protected internal override void Calculate(double[] x)
        {
            double prob = 0.0;

            // the log prob of the sequence given the model, which is the negation of value at this point
            // final double[][] weights = to2D(x);
            To2D(x, weights);
            SetWeights(weights);
            // the expectations over counts
            // first index is feature index, second index is of possible labeling
            // double[][] E = empty2D();
            Clear2D(E);
            Clear2D(dropoutPriorGradTotal);
            MulticoreWrapper <Pair <int, bool>, Quadruple <int, double, IDictionary <int, double[]>, IDictionary <int, double[]> > > wrapper = new MulticoreWrapper <Pair <int, bool>, Quadruple <int, double, IDictionary <int, double[]>, IDictionary <int, double[]> > >
                                                                                                                                                   (multiThreadGrad, dropoutPriorThreadProcessor);

            // supervised part
            for (int m = 0; m < totalData.Length; m++)
            {
                bool submitIsUnsup = (m >= unsupDropoutStartIndex);
                wrapper.Put(new Pair <int, bool>(m, submitIsUnsup));
                while (wrapper.Peek())
                {
                    Quadruple <int, double, IDictionary <int, double[]>, IDictionary <int, double[]> > result = wrapper.Poll();
                    int  docIndex = result.First();
                    bool isUnsup  = docIndex >= unsupDropoutStartIndex;
                    if (isUnsup)
                    {
                        prob += unsupDropoutScale * result.Second();
                    }
                    else
                    {
                        prob += result.Second();
                    }
                    IDictionary <int, double[]> partialDropout = result.Fourth();
                    if (partialDropout != null)
                    {
                        if (isUnsup)
                        {
                            Combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
                        }
                        else
                        {
                            Combine2DArr(dropoutPriorGradTotal, partialDropout);
                        }
                    }
                    if (!isUnsup)
                    {
                        IDictionary <int, double[]> partialE = result.Third();
                        if (partialE != null)
                        {
                            Combine2DArr(E, partialE);
                        }
                    }
                }
            }
            wrapper.Join();
            while (wrapper.Peek())
            {
                Quadruple <int, double, IDictionary <int, double[]>, IDictionary <int, double[]> > result = wrapper.Poll();
                int  docIndex = result.First();
                bool isUnsup  = docIndex >= unsupDropoutStartIndex;
                if (isUnsup)
                {
                    prob += unsupDropoutScale * result.Second();
                }
                else
                {
                    prob += result.Second();
                }
                IDictionary <int, double[]> partialDropout = result.Fourth();
                if (partialDropout != null)
                {
                    if (isUnsup)
                    {
                        Combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
                    }
                    else
                    {
                        Combine2DArr(dropoutPriorGradTotal, partialDropout);
                    }
                }
                if (!isUnsup)
                {
                    IDictionary <int, double[]> partialE = result.Third();
                    if (partialE != null)
                    {
                        Combine2DArr(E, partialE);
                    }
                }
            }
            if (double.IsNaN(prob))
            {
                // shouldn't be the case
                throw new Exception("Got NaN for prob in CRFLogConditionalObjectiveFunctionWithDropout.calculate()" + " - this may well indicate numeric underflow due to overly long documents.");
            }
            // because we minimize -L(\theta)
            value = -prob;
            if (Verbose)
            {
                log.Info("value is " + System.Math.Exp(-value));
            }
            // compute the partial derivative for each feature by comparing expected counts to empirical counts
            int index = 0;

            for (int i = 0; i < E.Length; i++)
            {
                for (int j = 0; j < E[i].Length; j++)
                {
                    // because we minimize -L(\theta)
                    derivative[index]  = (E[i][j] - Ehat[i][j]);
                    derivative[index] += dropoutScale * dropoutPriorGradTotal[i][j];
                    if (Verbose)
                    {
                        log.Info("deriv(" + i + ',' + j + ") = " + E[i][j] + " - " + Ehat[i][j] + " = " + derivative[index]);
                    }
                    index++;
                }
            }
        }