public virtual double Score <F>(IClassifier <L, F> classifier, GeneralDataset <L, F> data)
        {
            IList <L> guesses = new List <L>();
            IList <L> labels  = new List <L>();

            for (int i = 0; i < data.Size(); i++)
            {
                IDatum <L, F> d     = data.GetRVFDatum(i);
                L             guess = classifier.ClassOf(d);
                guesses.Add(guess);
            }
            int[] labelsArr = data.GetLabelsArray();
            labelIndex = data.labelIndex;
            for (int i_1 = 0; i_1 < data.Size(); i_1++)
            {
                labels.Add(labelIndex.Get(labelsArr[i_1]));
            }
            labelIndex = new HashIndex <L>();
            labelIndex.AddAll(data.LabelIndex().ObjectsList());
            labelIndex.AddAll(classifier.Labels());
            int numClasses = labelIndex.Size();

            tpCount  = new int[numClasses];
            fpCount  = new int[numClasses];
            fnCount  = new int[numClasses];
            negIndex = labelIndex.IndexOf(negLabel);
            for (int i_2 = 0; i_2 < guesses.Count; ++i_2)
            {
                L   guess      = guesses[i_2];
                int guessIndex = labelIndex.IndexOf(guess);
                L   label      = labels[i_2];
                int trueIndex  = labelIndex.IndexOf(label);
                if (guessIndex == trueIndex)
                {
                    if (guessIndex != negIndex)
                    {
                        tpCount[guessIndex]++;
                    }
                }
                else
                {
                    if (guessIndex != negIndex)
                    {
                        fpCount[guessIndex]++;
                    }
                    if (trueIndex != negIndex)
                    {
                        fnCount[trueIndex]++;
                    }
                }
            }
            return(GetFMeasure());
        }
예제 #2
0
 public override double Score <F>(IClassifier <L, F> classifier, GeneralDataset <L, F> data)
 {
     labelIndex = new HashIndex <L>();
     labelIndex.AddAll(classifier.Labels());
     labelIndex.AddAll(data.labelIndex.ObjectsList());
     ClearCounts();
     int[] labelsArr = data.GetLabelsArray();
     for (int i = 0; i < data.Size(); i++)
     {
         IDatum <L, F> d     = data.GetRVFDatum(i);
         L             guess = classifier.ClassOf(d);
         AddGuess(guess, labelIndex.Get(labelsArr[i]));
     }
     FinalizeCounts();
     return(GetFMeasure());
 }
예제 #3
0
        /// <summary>A helper function for dumping the accuracy of the trained classifier.</summary>
        /// <param name="classifier">The classifier to evaluate.</param>
        /// <param name="dataset">The dataset to evaluate the classifier on.</param>
        public static void DumpAccuracy(IClassifier <ClauseSplitter.ClauseClassifierLabel, string> classifier, GeneralDataset <ClauseSplitter.ClauseClassifierLabel, string> dataset)
        {
            DecimalFormat df = new DecimalFormat("0.00%");

            Redwood.Log("size:         " + dataset.Size());
            Redwood.Log("split count:  " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count);
            Redwood.Log("interm count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count);
            Pair <double, double> pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseSplit);

            Redwood.Log("p  (split):   " + df.Format(pr.first));
            Redwood.Log("r  (split):   " + df.Format(pr.second));
            Redwood.Log("f1 (split):   " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second)));
            pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseInterm);
            Redwood.Log("p  (interm):  " + df.Format(pr.first));
            Redwood.Log("r  (interm):  " + df.Format(pr.second));
            Redwood.Log("f1 (interm):  " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second)));
        }
        public virtual void InitMC <F>(IProbabilisticClassifier <L, F> classifier, GeneralDataset <L, F> data)
        {
            //if (!(gData instanceof Dataset)) {
            //  throw new UnsupportedOperationException("Can only handle Datasets, not "+gData.getClass().getName());
            //}
            //
            //Dataset data = (Dataset)gData;
            IPriorityQueue <Pair <int, Pair <double, bool> > > q = new BinaryHeapPriorityQueue <Pair <int, Pair <double, bool> > >();

            total         = 0;
            correct       = 0;
            logLikelihood = 0.0;
            for (int i = 0; i < data.Size(); i++)
            {
                IDatum <L, F> d            = data.GetRVFDatum(i);
                ICounter <L>  scores       = classifier.LogProbabilityOf(d);
                L             guess        = Counters.Argmax(scores);
                L             correctLab   = d.Label();
                double        guessScore   = scores.GetCount(guess);
                double        correctScore = scores.GetCount(correctLab);
                int           guessInd     = data.LabelIndex().IndexOf(guess);
                int           correctInd   = data.LabelIndex().IndexOf(correctLab);
                total++;
                if (guessInd == correctInd)
                {
                    correct++;
                }
                logLikelihood += correctScore;
                q.Add(new Pair <int, Pair <double, bool> >(int.Parse(i), new Pair <double, bool>(guessScore, bool.ValueOf(guessInd == correctInd))), -guessScore);
            }
            accuracy = (double)correct / (double)total;
            IList <Pair <int, Pair <double, bool> > > sorted = q.ToSortedList();

            scores    = new double[sorted.Count];
            isCorrect = new bool[sorted.Count];
            for (int i_1 = 0; i_1 < sorted.Count; i_1++)
            {
                Pair <double, bool> next = sorted[i_1].Second();
                scores[i_1]    = next.First();
                isCorrect[i_1] = next.Second();
            }
        }
예제 #5
0
        public virtual double Score <F>(IProbabilisticClassifier <L, F> classifier, GeneralDataset <L, F> data)
        {
            List <Pair <double, int> > dataScores = new List <Pair <double, int> >();

            for (int i = 0; i < data.Size(); i++)
            {
                IDatum <L, F> d      = data.GetRVFDatum(i);
                ICounter <L>  scores = classifier.LogProbabilityOf(d);
                int           labelD = d.Label().Equals(posLabel) ? 1 : 0;
                dataScores.Add(new Pair <double, int>(Math.Exp(scores.GetCount(posLabel)), labelD));
            }
            PRCurve prc = new PRCurve(dataScores);

            confWeightedAccuracy    = prc.Cwa();
            accuracy                = prc.Accuracy();
            optAccuracy             = prc.OptimalAccuracy();
            optConfWeightedAccuracy = prc.OptimalCwa();
            logLikelihood           = prc.LogLikelihood();
            accrecall               = prc.CwaArray();
            optaccrecall            = prc.OptimalCwaArray();
            return(accuracy);
        }