/// <summary>
 /// Trains a
 /// <see cref="IClassifier{L, F}"/>
 /// on a
 /// <see cref="Dataset{L, F}"/>
 /// .
 /// </summary>
 /// <returns>
 /// A
 /// <see cref="IClassifier{L, F}"/>
 /// trained on the data.
 /// </returns>
 public virtual LinearClassifier <L, F> TrainClassifier(GeneralDataset <L, F> data)
 {
     labelIndex   = data.LabelIndex();
     featureIndex = data.FeatureIndex();
     double[][] weights = TrainWeights(data);
     return(new LinearClassifier <L, F>(weights, featureIndex, labelIndex));
 }
Example #2
0
        protected internal override void Calculate(double[] x)
        {
            classifier.SetWeights(To2D(x));
            if (derivative == null)
            {
                derivative = new double[x.Length];
            }
            else
            {
                Arrays.Fill(derivative, 0.0);
            }
            ICounter <Triple <int, int, int> > feature2classPairDerivatives = new ClassicCounter <Triple <int, int, int> >();

            value = 0.0;
            for (int n = 0; n < geFeatures.Count; n++)
            {
                //F feature = geFeatures.get(n);
                double[] modelDist = new double[numClasses];
                Arrays.Fill(modelDist, 0);
                //go over the unlabeled active data to compute expectations
                IList <int> activeData = geFeature2DatumList[n];
                foreach (int activeDatum in activeData)
                {
                    IDatum <L, F> datum = unlabeledDataList[activeDatum];
                    double[]      probs = GetModelProbs(datum);
                    for (int c = 0; c < numClasses; c++)
                    {
                        modelDist[c] += probs[c];
                    }
                    UpdateDerivative(datum, probs, feature2classPairDerivatives);
                }
                //computes p(y_d)*(1-p(y_d))*f_d for all active features.
                //now  compute the value (KL-divergence) and the final value of the derivative.
                if (activeData.Count > 0)
                {
                    for (int c = 0; c < numClasses; c++)
                    {
                        modelDist[c] /= activeData.Count;
                    }
                    SmoothDistribution(modelDist);
                    for (int c_1 = 0; c_1 < numClasses; c_1++)
                    {
                        value += -geFeature2EmpiricalDist[n][c_1] * Math.Log(modelDist[c_1]);
                    }
                    for (int f = 0; f < labeledDataset.FeatureIndex().Size(); f++)
                    {
                        for (int c_2 = 0; c_2 < numClasses; c_2++)
                        {
                            int wtIndex = IndexOf(f, c_2);
                            for (int cPrime = 0; cPrime < numClasses; cPrime++)
                            {
                                derivative[wtIndex] += feature2classPairDerivatives.GetCount(new Triple <int, int, int>(f, c_2, cPrime)) * geFeature2EmpiricalDist[n][cPrime] / modelDist[cPrime];
                            }
                            derivative[wtIndex] /= activeData.Count;
                        }
                    }
                }
            }
        }
Example #3
0
        public static Edu.Stanford.Nlp.Classify.OneVsAllClassifier <L, F> Train <L, F>(IClassifierFactory <string, F, IClassifier <string, F> > classifierFactory, GeneralDataset <L, F> dataset, ICollection <L> trainLabels)
        {
            IIndex <L> labelIndex   = dataset.LabelIndex();
            IIndex <F> featureIndex = dataset.FeatureIndex();
            IDictionary <L, IClassifier <string, F> > classifiers = Generics.NewHashMap();

            foreach (L label in trainLabels)
            {
                int i = labelIndex.IndexOf(label);
                logger.Info("Training " + label + " = " + i + ", posIndex = " + posIndex);
                // Create training data for training this classifier
                IDictionary <L, string> posLabelMap = new ArrayMap <L, string>();
                posLabelMap[label] = PosLabel;
                GeneralDataset <string, F> binaryDataset    = dataset.MapDataset(dataset, binaryIndex, posLabelMap, NegLabel);
                IClassifier <string, F>    binaryClassifier = classifierFactory.TrainClassifier(binaryDataset);
                classifiers[label] = binaryClassifier;
            }
            Edu.Stanford.Nlp.Classify.OneVsAllClassifier <L, F> classifier = new Edu.Stanford.Nlp.Classify.OneVsAllClassifier <L, F>(featureIndex, labelIndex, classifiers);
            return(classifier);
        }