예제 #1
0
 private void EmpiricalCounts(int[][][][] data, int[][] labels)
 {
     Ehat = Empty2D();
     for (int m = 0; m < data.Length; m++)
     {
         int[][][] dataDoc   = data[m];
         int[]     labelsDoc = labels[m];
         int[]     label     = new int[window];
         //Arrays.fill(label, classIndex.indexOf("O"));
         Arrays.Fill(label, classIndex.IndexOf(backgroundSymbol));
         for (int i = 0; i < dataDoc.Length; i++)
         {
             System.Array.Copy(label, 1, label, 0, window - 1);
             label[window - 1] = labelsDoc[i];
             for (int j = 0; j < dataDoc[i].Length; j++)
             {
                 int[] cliqueLabel = new int[j + 1];
                 System.Array.Copy(label, window - 1 - j, cliqueLabel, 0, j + 1);
                 CRFLabel crfLabel   = new CRFLabel(cliqueLabel);
                 int      labelIndex = labelIndices[j].IndexOf(crfLabel);
                 //log.info(crfLabel + " " + labelIndex);
                 for (int k = 0; k < dataDoc[i][j].Length; k++)
                 {
                     Ehat[dataDoc[i][j][k]][labelIndex]++;
                 }
             }
         }
     }
 }
 private void ComputeEHat()
 {
     Ehat = Empty2D();
     for (int m = 0; m < data.Length; m++)
     {
         int[][][] docData      = data[m];
         int[]     docLabels    = labels[m];
         int[]     windowLabels = new int[window];
         Arrays.Fill(windowLabels, classIndex.IndexOf(backgroundSymbol));
         if (docLabels.Length > docData.Length)
         {
             // only true for self-training
             // fill the windowLabel array with the extra docLabels
             System.Array.Copy(docLabels, 0, windowLabels, 0, windowLabels.Length);
             // shift the docLabels array left
             int[] newDocLabels = new int[docData.Length];
             System.Array.Copy(docLabels, docLabels.Length - newDocLabels.Length, newDocLabels, 0, newDocLabels.Length);
             docLabels = newDocLabels;
         }
         for (int i = 0; i < docData.Length; i++)
         {
             System.Array.Copy(windowLabels, 1, windowLabels, 0, window - 1);
             windowLabels[window - 1] = docLabels[i];
             int[][] docDataI = docData[i];
             for (int j = 0; j < docDataI.Length; j++)
             {
                 // j iterates over cliques
                 int[] docDataIJ   = docDataI[j];
                 int[] cliqueLabel = new int[j + 1];
                 System.Array.Copy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
                 CRFLabel          crfLabel   = new CRFLabel(cliqueLabel);
                 IIndex <CRFLabel> labelIndex = labelIndices[j];
                 int observedLabelIndex       = labelIndex.IndexOf(crfLabel);
                 //log.info(crfLabel + " " + observedLabelIndex);
                 for (int lopIter = 0; lopIter < numLopExpert; lopIter++)
                 {
                     double[][]        ehatOfIter = Ehat[lopIter];
                     ICollection <int> indicesSet = featureIndicesSetArray[lopIter];
                     foreach (int featureIdx in docDataIJ)
                     {
                         // k iterates over features
                         if (indicesSet.Contains(featureIdx))
                         {
                             ehatOfIter[featureIdx][observedLabelIndex]++;
                         }
                     }
                 }
             }
         }
     }
 }
예제 #3
0
 protected internal virtual void EmpiricalCountsForADoc(double[][] eHat, int docIndex)
 {
     int[][][] docData      = data[docIndex];
     int[]     docLabels    = labels[docIndex];
     int[]     windowLabels = new int[window];
     Arrays.Fill(windowLabels, classIndex.IndexOf(backgroundSymbol));
     double[][][] featureValArr = null;
     if (featureVal != null)
     {
         featureValArr = featureVal[docIndex];
     }
     if (docLabels.Length > docData.Length)
     {
         // only true for self-training
         // fill the windowLabel array with the extra docLabels
         System.Array.Copy(docLabels, 0, windowLabels, 0, windowLabels.Length);
         // shift the docLabels array left
         int[] newDocLabels = new int[docData.Length];
         System.Array.Copy(docLabels, docLabels.Length - newDocLabels.Length, newDocLabels, 0, newDocLabels.Length);
         docLabels = newDocLabels;
     }
     for (int i = 0; i < docData.Length; i++)
     {
         System.Array.Copy(windowLabels, 1, windowLabels, 0, window - 1);
         windowLabels[window - 1] = docLabels[i];
         for (int j = 0; j < docData[i].Length; j++)
         {
             int[] cliqueLabel = new int[j + 1];
             System.Array.Copy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
             CRFLabel crfLabel   = new CRFLabel(cliqueLabel);
             int      labelIndex = labelIndices[j].IndexOf(crfLabel);
             //log.info(crfLabel + " " + labelIndex);
             for (int n = 0; n < docData[i][j].Length; n++)
             {
                 double fVal = 1.0;
                 if (featureValArr != null && j == 0)
                 {
                     // j == 0 because only node features gets feature values
                     fVal = featureValArr[i][j][n];
                 }
                 eHat[docData[i][j][n]][labelIndex] += fVal;
             }
         }
     }
 }
        // todo [cdm]: Below data[m] --> docData
        /// <summary>Calculates both value and partial derivatives at the point x, and save them internally.</summary>
        protected internal override void Calculate(double[] x)
        {
            double prob = 0.0;
            // the log prob of the sequence given the model, which is the negation of value at this point
            Quadruple <double[][], double[][], double[][], double[][]> allParams = SeparateWeights(x);

            double[][] W4Edge = allParams.First();
            // inputLayerWeights4Edge
            double[][] U4Edge = allParams.Second();
            // outputLayerWeights4Edge
            double[][] W = allParams.Third();
            // inputLayerWeights
            double[][] U = allParams.Fourth();
            // outputLayerWeights
            double[][] Y4Edge = null;
            double[][] Y      = null;
            if (flags.softmaxOutputLayer)
            {
                Y4Edge = new double[U4Edge.Length][];
                for (int i = 0; i < U4Edge.Length; i++)
                {
                    Y4Edge[i] = ArrayMath.Softmax(U4Edge[i]);
                }
                Y = new double[U.Length][];
                for (int i_1 = 0; i_1 < U.Length; i_1++)
                {
                    Y[i_1] = ArrayMath.Softmax(U[i_1]);
                }
            }
            double[][] What4Edge = EmptyW4Edge();
            double[][] Uhat4Edge = EmptyU4Edge();
            double[][] What      = EmptyW();
            double[][] Uhat      = EmptyU();
            // the expectations over counts
            // first index is feature index, second index is of possible labeling
            double[][] eW4Edge = EmptyW4Edge();
            double[][] eU4Edge = EmptyU4Edge();
            double[][] eW      = EmptyW();
            double[][] eU      = EmptyU();
            // iterate over all the documents
            for (int m = 0; m < data.Length; m++)
            {
                int[][][] docData   = data[m];
                int[]     docLabels = labels[m];
                NonLinearSecondOrderCliquePotentialFunction cliquePotentialFunction = new NonLinearSecondOrderCliquePotentialFunction(W4Edge, U4Edge, W, U, flags);
                // make a clique tree for this document
                CRFCliqueTree <string> cliqueTree = CRFCliqueTree.GetCalibratedCliqueTree(docData, labelIndices, numClasses, classIndex, backgroundSymbol, cliquePotentialFunction, null);
                // compute the log probability of the document given the model with the parameters x
                int[] given = new int[window - 1];
                Arrays.Fill(given, classIndex.IndexOf(backgroundSymbol));
                int[] windowLabels = new int[window];
                Arrays.Fill(windowLabels, classIndex.IndexOf(backgroundSymbol));
                if (docLabels.Length > docData.Length)
                {
                    // only true for self-training
                    // fill the given array with the extra docLabels
                    System.Array.Copy(docLabels, 0, given, 0, given.Length);
                    System.Array.Copy(docLabels, 0, windowLabels, 0, windowLabels.Length);
                    // shift the docLabels array left
                    int[] newDocLabels = new int[docData.Length];
                    System.Array.Copy(docLabels, docLabels.Length - newDocLabels.Length, newDocLabels, 0, newDocLabels.Length);
                    docLabels = newDocLabels;
                }
                // iterate over the positions in this document
                for (int i = 0; i < docData.Length; i++)
                {
                    int    label = docLabels[i];
                    double p     = cliqueTree.CondLogProbGivenPrevious(i, label, given);
                    if (Verbose)
                    {
                        log.Info("P(" + label + "|" + ArrayMath.ToString(given) + ")=" + p);
                    }
                    prob += p;
                    System.Array.Copy(given, 1, given, 0, given.Length - 1);
                    given[given.Length - 1] = label;
                }
                // compute the expected counts for this document, which we will need to compute the derivative
                // iterate over the positions in this document
                for (int i_1 = 0; i_1 < docData.Length; i_1++)
                {
                    // for each possible clique at this position
                    System.Array.Copy(windowLabels, 1, windowLabels, 0, window - 1);
                    windowLabels[window - 1] = docLabels[i_1];
                    for (int j = 0; j < docData[i_1].Length; j++)
                    {
                        IIndex <CRFLabel> labelIndex = labelIndices[j];
                        // for each possible labeling for that clique
                        int[]      cliqueFeatures = docData[i_1][j];
                        double[]   As             = null;
                        double[]   fDeriv         = null;
                        double[][] yTimesA        = null;
                        double[]   sumOfYTimesA   = null;
                        int        inputSize;
                        int        outputSize = -1;
                        if (j == 0)
                        {
                            inputSize  = inputLayerSize;
                            outputSize = outputLayerSize;
                            As         = cliquePotentialFunction.HiddenLayerOutput(W, cliqueFeatures, flags, null, j + 1);
                        }
                        else
                        {
                            inputSize  = inputLayerSize4Edge;
                            outputSize = outputLayerSize4Edge;
                            As         = cliquePotentialFunction.HiddenLayerOutput(W4Edge, cliqueFeatures, flags, null, j + 1);
                        }
                        fDeriv = new double[inputSize];
                        double fD = 0;
                        for (int q = 0; q < inputSize; q++)
                        {
                            if (useSigmoid)
                            {
                                fD = As[q] * (1 - As[q]);
                            }
                            else
                            {
                                fD = 1 - As[q] * As[q];
                            }
                            fDeriv[q] = fD;
                        }
                        // calculating yTimesA for softmax
                        if (flags.softmaxOutputLayer)
                        {
                            double val = 0;
                            yTimesA = new double[outputSize][];
                            for (int ii = 0; ii < outputSize; ii++)
                            {
                                yTimesA[ii] = new double[numHiddenUnits];
                            }
                            sumOfYTimesA = new double[outputSize];
                            for (int k = 0; k < outputSize; k++)
                            {
                                double[] Yk = null;
                                if (flags.tieOutputLayer)
                                {
                                    if (j == 0)
                                    {
                                        Yk = Y[0];
                                    }
                                    else
                                    {
                                        Yk = Y4Edge[0];
                                    }
                                }
                                else
                                {
                                    if (j == 0)
                                    {
                                        Yk = Y[k];
                                    }
                                    else
                                    {
                                        Yk = Y4Edge[k];
                                    }
                                }
                                double sum = 0;
                                for (int q_1 = 0; q_1 < inputSize; q_1++)
                                {
                                    if (q_1 % outputSize == k)
                                    {
                                        int hiddenUnitNo = q_1 / outputSize;
                                        val = As[q_1] * Yk[hiddenUnitNo];
                                        yTimesA[k][hiddenUnitNo] = val;
                                        sum += val;
                                    }
                                }
                                sumOfYTimesA[k] = sum;
                            }
                        }
                        // calculating Uhat What
                        int[] cliqueLabel = new int[j + 1];
                        System.Array.Copy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
                        CRFLabel crfLabel        = new CRFLabel(cliqueLabel);
                        int      givenLabelIndex = labelIndex.IndexOf(crfLabel);
                        double[] Uk            = null;
                        double[] UhatK         = null;
                        double[] Yk_1          = null;
                        double[] yTimesAK      = null;
                        double   sumOfYTimesAK = 0;
                        if (flags.tieOutputLayer)
                        {
                            if (j == 0)
                            {
                                Uk    = U[0];
                                UhatK = Uhat[0];
                            }
                            else
                            {
                                Uk    = U4Edge[0];
                                UhatK = Uhat4Edge[0];
                            }
                            if (flags.softmaxOutputLayer)
                            {
                                if (j == 0)
                                {
                                    Yk_1 = Y[0];
                                }
                                else
                                {
                                    Yk_1 = Y4Edge[0];
                                }
                            }
                        }
                        else
                        {
                            if (j == 0)
                            {
                                Uk    = U[givenLabelIndex];
                                UhatK = Uhat[givenLabelIndex];
                            }
                            else
                            {
                                Uk    = U4Edge[givenLabelIndex];
                                UhatK = Uhat4Edge[givenLabelIndex];
                            }
                            if (flags.softmaxOutputLayer)
                            {
                                if (j == 0)
                                {
                                    Yk_1 = Y[givenLabelIndex];
                                }
                                else
                                {
                                    Yk_1 = Y4Edge[givenLabelIndex];
                                }
                            }
                        }
                        if (flags.softmaxOutputLayer)
                        {
                            yTimesAK      = yTimesA[givenLabelIndex];
                            sumOfYTimesAK = sumOfYTimesA[givenLabelIndex];
                        }
                        for (int k_1 = 0; k_1 < inputSize; k_1++)
                        {
                            double deltaK = 1;
                            if (flags.sparseOutputLayer || flags.tieOutputLayer)
                            {
                                if (k_1 % outputSize == givenLabelIndex)
                                {
                                    int hiddenUnitNo = k_1 / outputSize;
                                    if (flags.softmaxOutputLayer)
                                    {
                                        UhatK[hiddenUnitNo] += (yTimesAK[hiddenUnitNo] - Yk_1[hiddenUnitNo] * sumOfYTimesAK);
                                        deltaK *= Yk_1[hiddenUnitNo];
                                    }
                                    else
                                    {
                                        UhatK[hiddenUnitNo] += As[k_1];
                                        deltaK *= Uk[hiddenUnitNo];
                                    }
                                }
                            }
                            else
                            {
                                UhatK[k_1] += As[k_1];
                                if (useOutputLayer)
                                {
                                    deltaK *= Uk[k_1];
                                }
                            }
                            if (useHiddenLayer)
                            {
                                deltaK *= fDeriv[k_1];
                            }
                            if (useOutputLayer)
                            {
                                if (flags.sparseOutputLayer || flags.tieOutputLayer)
                                {
                                    if (k_1 % outputSize == givenLabelIndex)
                                    {
                                        double[] WhatK = null;
                                        if (j == 0)
                                        {
                                            WhatK = What[k_1];
                                        }
                                        else
                                        {
                                            WhatK = What4Edge[k_1];
                                        }
                                        foreach (int cliqueFeature in cliqueFeatures)
                                        {
                                            WhatK[cliqueFeature] += deltaK;
                                        }
                                    }
                                }
                                else
                                {
                                    double[] WhatK = null;
                                    if (j == 0)
                                    {
                                        WhatK = What[k_1];
                                    }
                                    else
                                    {
                                        WhatK = What4Edge[k_1];
                                    }
                                    foreach (int cliqueFeature in cliqueFeatures)
                                    {
                                        WhatK[cliqueFeature] += deltaK;
                                    }
                                }
                            }
                            else
                            {
                                if (k_1 == givenLabelIndex)
                                {
                                    double[] WhatK = null;
                                    if (j == 0)
                                    {
                                        WhatK = What[k_1];
                                    }
                                    else
                                    {
                                        WhatK = What4Edge[k_1];
                                    }
                                    foreach (int cliqueFeature in cliqueFeatures)
                                    {
                                        WhatK[cliqueFeature] += deltaK;
                                    }
                                }
                            }
                        }
                        for (int k_2 = 0; k_2 < labelIndex.Size(); k_2++)
                        {
                            // labelIndex.size() == numClasses
                            int[]  label = labelIndex.Get(k_2).GetLabel();
                            double p     = cliqueTree.Prob(i_1, label);
                            // probability of these labels occurring in this clique with these features
                            double[] Uk2 = null;
                            double[] eUK = null;
                            double[] Yk2 = null;
                            if (flags.tieOutputLayer)
                            {
                                if (j == 0)
                                {
                                    // for node features
                                    Uk2 = U[0];
                                    eUK = eU[0];
                                }
                                else
                                {
                                    Uk2 = U4Edge[0];
                                    eUK = eU4Edge[0];
                                }
                                if (flags.softmaxOutputLayer)
                                {
                                    if (j == 0)
                                    {
                                        Yk2 = Y[0];
                                    }
                                    else
                                    {
                                        Yk2 = Y4Edge[0];
                                    }
                                }
                            }
                            else
                            {
                                if (j == 0)
                                {
                                    Uk2 = U[k_2];
                                    eUK = eU[k_2];
                                }
                                else
                                {
                                    Uk2 = U4Edge[k_2];
                                    eUK = eU4Edge[k_2];
                                }
                                if (flags.softmaxOutputLayer)
                                {
                                    if (j == 0)
                                    {
                                        Yk2 = Y[k_2];
                                    }
                                    else
                                    {
                                        Yk2 = Y4Edge[k_2];
                                    }
                                }
                            }
                            if (useOutputLayer)
                            {
                                for (int q_1 = 0; q_1 < inputSize; q_1++)
                                {
                                    double deltaQ = 1;
                                    if (flags.sparseOutputLayer || flags.tieOutputLayer)
                                    {
                                        if (q_1 % outputSize == k_2)
                                        {
                                            int hiddenUnitNo = q_1 / outputSize;
                                            if (flags.softmaxOutputLayer)
                                            {
                                                eUK[hiddenUnitNo] += (yTimesA[k_2][hiddenUnitNo] - Yk2[hiddenUnitNo] * sumOfYTimesA[k_2]) * p;
                                                deltaQ             = Yk2[hiddenUnitNo];
                                            }
                                            else
                                            {
                                                eUK[hiddenUnitNo] += As[q_1] * p;
                                                deltaQ             = Uk2[hiddenUnitNo];
                                            }
                                        }
                                    }
                                    else
                                    {
                                        eUK[q_1] += As[q_1] * p;
                                        deltaQ    = Uk2[q_1];
                                    }
                                    if (useHiddenLayer)
                                    {
                                        deltaQ *= fDeriv[q_1];
                                    }
                                    if (flags.sparseOutputLayer || flags.tieOutputLayer)
                                    {
                                        if (q_1 % outputSize == k_2)
                                        {
                                            double[] eWq = null;
                                            if (j == 0)
                                            {
                                                eWq = eW[q_1];
                                            }
                                            else
                                            {
                                                eWq = eW4Edge[q_1];
                                            }
                                            foreach (int cliqueFeature in cliqueFeatures)
                                            {
                                                eWq[cliqueFeature] += deltaQ * p;
                                            }
                                        }
                                    }
                                    else
                                    {
                                        double[] eWq = null;
                                        if (j == 0)
                                        {
                                            eWq = eW[q_1];
                                        }
                                        else
                                        {
                                            eWq = eW4Edge[q_1];
                                        }
                                        foreach (int cliqueFeature in cliqueFeatures)
                                        {
                                            eWq[cliqueFeature] += deltaQ * p;
                                        }
                                    }
                                }
                            }
                            else
                            {
                                double deltaK = 1;
                                if (useHiddenLayer)
                                {
                                    deltaK *= fDeriv[k_2];
                                }
                                double[] eWK = null;
                                if (j == 0)
                                {
                                    eWK = eW[k_2];
                                }
                                else
                                {
                                    eWK = eW4Edge[k_2];
                                }
                                foreach (int cliqueFeature in cliqueFeatures)
                                {
                                    eWK[cliqueFeature] += deltaK * p;
                                }
                            }
                        }
                    }
                }
            }
            if (double.IsNaN(prob))
            {
                // shouldn't be the case
                throw new Exception("Got NaN for prob in CRFNonLinearSecondOrderLogConditionalObjectiveFunction.calculate()");
            }
            value = -prob;
            if (Verbose)
            {
                log.Info("value is " + value);
            }
            // compute the partial derivative for each feature by comparing expected counts to empirical counts
            int index = 0;

            for (int i_2 = 0; i_2 < eW4Edge.Length; i_2++)
            {
                for (int j = 0; j < eW4Edge[i_2].Length; j++)
                {
                    derivative[index++] = (eW4Edge[i_2][j] - What4Edge[i_2][j]);
                    if (Verbose)
                    {
                        log.Info("inputLayerWeights4Edge deriv(" + i_2 + "," + j + ") = " + eW4Edge[i_2][j] + " - " + What4Edge[i_2][j] + " = " + derivative[index - 1]);
                    }
                }
            }
            for (int i_3 = 0; i_3 < eW.Length; i_3++)
            {
                for (int j = 0; j < eW[i_3].Length; j++)
                {
                    derivative[index++] = (eW[i_3][j] - What[i_3][j]);
                    if (Verbose)
                    {
                        log.Info("inputLayerWeights deriv(" + i_3 + "," + j + ") = " + eW[i_3][j] + " - " + What[i_3][j] + " = " + derivative[index - 1]);
                    }
                }
            }
            if (index != beforeOutputWeights)
            {
                throw new Exception("after W derivative, index(" + index + ") != beforeOutputWeights(" + beforeOutputWeights + ")");
            }
            if (useOutputLayer)
            {
                for (int i = 0; i_3 < eU4Edge.Length; i_3++)
                {
                    for (int j = 0; j < eU4Edge[i_3].Length; j++)
                    {
                        derivative[index++] = (eU4Edge[i_3][j] - Uhat4Edge[i_3][j]);
                        if (Verbose)
                        {
                            log.Info("outputLayerWeights4Edge deriv(" + i_3 + "," + j + ") = " + eU4Edge[i_3][j] + " - " + Uhat4Edge[i_3][j] + " = " + derivative[index - 1]);
                        }
                    }
                }
                for (int i_1 = 0; i_1 < eU.Length; i_1++)
                {
                    for (int j = 0; j < eU[i_1].Length; j++)
                    {
                        derivative[index++] = (eU[i_1][j] - Uhat[i_1][j]);
                        if (Verbose)
                        {
                            log.Info("outputLayerWeights deriv(" + i_1 + "," + j + ") = " + eU[i_1][j] + " - " + Uhat[i_1][j] + " = " + derivative[index - 1]);
                        }
                    }
                }
            }
            if (index != x.Length)
            {
                throw new Exception("after W derivative, index(" + index + ") != x.length(" + x.Length + ")");
            }
            int regSize = x.Length;

            if (flags.skipOutputRegularization || flags.softmaxOutputLayer)
            {
                regSize = beforeOutputWeights;
            }
            // incorporate priors
            if (prior == QuadraticPrior)
            {
                double sigmaSq = sigma * sigma;
                for (int i = 0; i_3 < regSize; i_3++)
                {
                    double k = 1.0;
                    double w = x[i_3];
                    value           += k * w * w / 2.0 / sigmaSq;
                    derivative[i_3] += k * w / sigmaSq;
                }
            }
            else
            {
                if (prior == HuberPrior)
                {
                    double sigmaSq = sigma * sigma;
                    for (int i = 0; i_3 < regSize; i_3++)
                    {
                        double w    = x[i_3];
                        double wabs = System.Math.Abs(w);
                        if (wabs < epsilon)
                        {
                            value           += w * w / 2.0 / epsilon / sigmaSq;
                            derivative[i_3] += w / epsilon / sigmaSq;
                        }
                        else
                        {
                            value           += (wabs - epsilon / 2) / sigmaSq;
                            derivative[i_3] += ((w < 0.0) ? -1.0 : 1.0) / sigmaSq;
                        }
                    }
                }
                else
                {
                    if (prior == QuarticPrior)
                    {
                        double sigmaQu = sigma * sigma * sigma * sigma;
                        for (int i = 0; i_3 < regSize; i_3++)
                        {
                            double k = 1.0;
                            double w = x[i_3];
                            value           += k * w * w * w * w / 2.0 / sigmaQu;
                            derivative[i_3] += k * w / sigmaQu;
                        }
                    }
                }
            }
        }
예제 #5
0
        public virtual void CalculateWeird1(float[] x)
        {
            float[][] weights = To2D(x);
            float[][] E       = Empty2D();
            value = 0.0f;
            Arrays.Fill(derivative, 0.0f);
            float[][] sums   = new float[labelIndices.Count][];
            float[][] probs  = new float[labelIndices.Count][];
            float[][] counts = new float[labelIndices.Count][];
            for (int i = 0; i < sums.Length; i++)
            {
                int size = labelIndices[i].Size();
                sums[i]   = new float[size];
                probs[i]  = new float[size];
                counts[i] = new float[size];
            }
            // Arrays.fill(counts[i], 0.0f); // not needed; Java arrays zero initialized
            for (int d = 0; d < data.Length; d++)
            {
                int[] llabels = labels[d];
                for (int e = 0; e < data[d].Length; e++)
                {
                    int[][] ddata = this.data[d][e];
                    for (int cl = 0; cl < ddata.Length; cl++)
                    {
                        int[] features = ddata[cl];
                        // activation
                        Arrays.Fill(sums[cl], 0.0f);
                        int numClasses = labelIndices[cl].Size();
                        for (int c = 0; c < numClasses; c++)
                        {
                            foreach (int feature in features)
                            {
                                sums[cl][c] += weights[feature][c];
                            }
                        }
                    }
                    for (int cl_1 = 0; cl_1 < ddata.Length; cl_1++)
                    {
                        int[] label = new int[cl_1 + 1];
                        //Arrays.fill(label, classIndex.indexOf("O"));
                        Arrays.Fill(label, classIndex.IndexOf(backgroundSymbol));
                        int index1 = label.Length - 1;
                        for (int pos = e; pos >= 0 && index1 >= 0; pos--)
                        {
                            //log.info(index1+" "+pos);
                            label[index1--] = llabels[pos];
                        }
                        CRFLabel crfLabel   = new CRFLabel(label);
                        int      labelIndex = labelIndices[cl_1].IndexOf(crfLabel);
                        float    total      = ArrayMath.LogSum(sums[cl_1]);
                        //          int[] features = ddata[cl];
                        int numClasses = labelIndices[cl_1].Size();
                        for (int c = 0; c < numClasses; c++)
                        {
                            probs[cl_1][c] = (float)System.Math.Exp(sums[cl_1][c] - total);
                        }
                        //          for (int f=0; f<features.length; f++) {
                        //          for (int c=0; c<numClasses; c++) {
                        //              //probs[cl][c] = Math.exp(sums[cl][c]-total);
                        //              derivative[index] += probs[cl][c];
                        //              if (c == labelIndex) {
                        //              derivative[index]--;
                        //              }
                        //              index++;
                        //          }
                        //          }
                        value -= sums[cl_1][labelIndex] - total;
                    }
                    //          // observed
                    //          for (int f=0; f<features.length; f++) {
                    //              //int i = indexOf(features[f], labels[d]);
                    //              derivative[index+labelIndex] -= 1.0;
                    //          }
                    // go through each clique...
                    for (int j = 0; j < data[d][e].Length; j++)
                    {
                        IIndex <CRFLabel> labelIndex = labelIndices[j];
                        // ...and each possible labeling for that clique
                        for (int k = 0; k < labelIndex.Size(); k++)
                        {
                            //int[] label = ((CRFLabel) labelIndex.get(k)).getLabel();
                            // float p = Math.pow(Math.E, factorTables[i].logProbEnd(label));
                            float p = probs[j][k];
                            for (int n = 0; n < data[d][e][j].Length; n++)
                            {
                                E[data[d][e][j][n]][k] += p;
                            }
                        }
                    }
                }
            }
            // compute the partial derivative for each feature
            int index = 0;

            for (int i_1 = 0; i_1 < E.Length; i_1++)
            {
                for (int j = 0; j < E[i_1].Length; j++)
                {
                    derivative[index++] = (E[i_1][j] - Ehat[i_1][j]);
                }
            }
            // observed
            //  int index = 0;
            //  for (int i = 0; i < Ehat.length; i++) {
            //      for (int j = 0; j < Ehat[i].length; j++) {
            //      derivative[index++] -= Ehat[i][j];
            //      }
            //  }
            // priors
            if (prior == QuadraticPrior)
            {
                float sigmaSq = sigma * sigma;
                for (int i_2 = 0; i_2 < x.Length; i_2++)
                {
                    float k = 1.0f;
                    float w = x[i_2];
                    value           += k * w * w / 2.0 / sigmaSq;
                    derivative[i_2] += k * w / sigmaSq;
                }
            }
            else
            {
                if (prior == HuberPrior)
                {
                    float sigmaSq = sigma * sigma;
                    for (int i_2 = 0; i_2 < x.Length; i_2++)
                    {
                        float w    = x[i_2];
                        float wabs = System.Math.Abs(w);
                        if (wabs < epsilon)
                        {
                            value           += w * w / 2.0 / epsilon / sigmaSq;
                            derivative[i_2] += w / epsilon / sigmaSq;
                        }
                        else
                        {
                            value           += (wabs - epsilon / 2) / sigmaSq;
                            derivative[i_2] += ((w < 0.0) ? -1.0 : 1.0) / sigmaSq;
                        }
                    }
                }
                else
                {
                    if (prior == QuarticPrior)
                    {
                        float sigmaQu = sigma * sigma * sigma * sigma;
                        for (int i_2 = 0; i_2 < x.Length; i_2++)
                        {
                            float k = 1.0f;
                            float w = x[i_2];
                            value           += k * w * w * w * w / 2.0 / sigmaQu;
                            derivative[i_2] += k * w / sigmaQu;
                        }
                    }
                }
            }
        }
 private void LogPotential(double[][][] learnedLopExpertWeights2D)
 {
     sumOfExpectedLogPotential = new double[data.Length][][][][];
     sumOfObservedLogPotential = new double[numLopExpert];
     for (int m = 0; m < data.Length; m++)
     {
         int[][][] docData      = data[m];
         int[]     docLabels    = labels[m];
         int[]     windowLabels = new int[window];
         Arrays.Fill(windowLabels, classIndex.IndexOf(backgroundSymbol));
         double[][][][] sumOfELPm = new double[docData.Length][][][];
         if (docLabels.Length > docData.Length)
         {
             // only true for self-training
             // fill the windowLabel array with the extra docLabels
             System.Array.Copy(docLabels, 0, windowLabels, 0, windowLabels.Length);
             // shift the docLabels array left
             int[] newDocLabels = new int[docData.Length];
             System.Array.Copy(docLabels, docLabels.Length - newDocLabels.Length, newDocLabels, 0, newDocLabels.Length);
             docLabels = newDocLabels;
         }
         for (int i = 0; i < docData.Length; i++)
         {
             System.Array.Copy(windowLabels, 1, windowLabels, 0, window - 1);
             windowLabels[window - 1] = docLabels[i];
             double[][][] sumOfELPmi = new double[docData[i].Length][][];
             int[][]      docDataI   = docData[i];
             for (int j = 0; j < docDataI.Length; j++)
             {
                 // j iterates over cliques
                 int[] docDataIJ   = docDataI[j];
                 int[] cliqueLabel = new int[j + 1];
                 System.Array.Copy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
                 CRFLabel          crfLabel    = new CRFLabel(cliqueLabel);
                 IIndex <CRFLabel> labelIndex  = labelIndices[j];
                 double[][]        sumOfELPmij = new double[numLopExpert][];
                 int observedLabelIndex        = labelIndex.IndexOf(crfLabel);
                 //log.info(crfLabel + " " + observedLabelIndex);
                 for (int lopIter = 0; lopIter < numLopExpert; lopIter++)
                 {
                     double[]          sumOfELPmijIter = new double[labelIndex.Size()];
                     ICollection <int> indicesSet      = featureIndicesSetArray[lopIter];
                     foreach (int featureIdx in docDataIJ)
                     {
                         // k iterates over features
                         if (indicesSet.Contains(featureIdx))
                         {
                             sumOfObservedLogPotential[lopIter] += learnedLopExpertWeights2D[lopIter][featureIdx][observedLabelIndex];
                             // sum over potential of this clique over all possible labels, used later in calculating expected counts
                             for (int l = 0; l < labelIndex.Size(); l++)
                             {
                                 sumOfELPmijIter[l] += learnedLopExpertWeights2D[lopIter][featureIdx][l];
                             }
                         }
                     }
                     sumOfELPmij[lopIter] = sumOfELPmijIter;
                 }
                 sumOfELPmi[j] = sumOfELPmij;
             }
             sumOfELPm[i] = sumOfELPmi;
         }
         sumOfExpectedLogPotential[m] = sumOfELPm;
     }
 }