Ejemplo n.º 1
0
 /// <summary>Constructs a new RVFDatum taking the data from a Datum.</summary>
 /// <remarks>
 /// Constructs a new RVFDatum taking the data from a Datum. <i>Implementation
 /// note:</i> This constructor allocates its own counter over features, but is
 /// only guaranteed correct if the label and feature names are immutable.
 /// </remarks>
 /// <param name="m">The Datum to copy.</param>
 public RVFDatum(IDatum <L, F> m)
 {
     this.features = new ClassicCounter <F>();
     foreach (F key in m.AsFeatures())
     {
         features.IncrementCount(key, 1.0);
     }
     SetLabel(m.Label());
 }
Ejemplo n.º 2
0
        private void ComputeEmpiricalStatistics(IList <F> geFeatures)
        {
            //allocate memory to the containers and initialize them
            geFeature2EmpiricalDist = new double[][] {  };
            geFeature2DatumList     = new List <IList <int> >(geFeatures.Count);
            IDictionary <F, int> geFeatureMap            = Generics.NewHashMap();
            ICollection <int>    activeUnlabeledExamples = Generics.NewHashSet();

            for (int n = 0; n < geFeatures.Count; n++)
            {
                F geFeature = geFeatures[n];
                geFeature2DatumList.Add(new List <int>());
                Arrays.Fill(geFeature2EmpiricalDist[n], 0);
                geFeatureMap[geFeature] = n;
            }
            //compute the empirical label distribution for each GE feature
            for (int i = 0; i < labeledDataset.Size(); i++)
            {
                IDatum <L, F> datum   = labeledDataset.GetDatum(i);
                int           labelID = labeledDataset.labelIndex.IndexOf(datum.Label());
                foreach (F feature in datum.AsFeatures())
                {
                    if (geFeatureMap.Contains(feature))
                    {
                        int geFnum = geFeatureMap[feature];
                        geFeature2EmpiricalDist[geFnum][labelID]++;
                    }
                }
            }
            //now normalize and smooth the label distribution for each feature.
            for (int n_1 = 0; n_1 < geFeatures.Count; n_1++)
            {
                ArrayMath.Normalize(geFeature2EmpiricalDist[n_1]);
                SmoothDistribution(geFeature2EmpiricalDist[n_1]);
            }
            //now build the inverted index from each GE feature to unlabeled datums that contain it.
            for (int i_1 = 0; i_1 < unlabeledDataList.Count; i_1++)
            {
                IDatum <L, F> datum = unlabeledDataList[i_1];
                foreach (F feature in datum.AsFeatures())
                {
                    if (geFeatureMap.Contains(feature))
                    {
                        int geFnum = geFeatureMap[feature];
                        geFeature2DatumList[geFnum].Add(i_1);
                        activeUnlabeledExamples.Add(i_1);
                    }
                }
            }
            System.Console.Out.WriteLine("Number of active unlabeled examples:" + activeUnlabeledExamples.Count);
        }
Ejemplo n.º 3
0
        public static IDatum <L2, F> MapDatum <L, L2, F>(IDatum <L, F> d, IDictionary <L, L2> labelMapping, L2 defaultLabel)
        {
            // TODO: How to copy datum?
            L2 newLabel = labelMapping[d.Label()];

            if (newLabel == null)
            {
                newLabel = defaultLabel;
            }
            if (d is RVFDatum)
            {
                return(new RVFDatum <L2, F>(((RVFDatum <L, F>)d).AsFeaturesCounter(), newLabel));
            }
            else
            {
                return(new BasicDatum <L2, F>(d.AsFeatures(), newLabel));
            }
        }
        public virtual void InitMC <F>(IProbabilisticClassifier <L, F> classifier, GeneralDataset <L, F> data)
        {
            //if (!(gData instanceof Dataset)) {
            //  throw new UnsupportedOperationException("Can only handle Datasets, not "+gData.getClass().getName());
            //}
            //
            //Dataset data = (Dataset)gData;
            IPriorityQueue <Pair <int, Pair <double, bool> > > q = new BinaryHeapPriorityQueue <Pair <int, Pair <double, bool> > >();

            total         = 0;
            correct       = 0;
            logLikelihood = 0.0;
            for (int i = 0; i < data.Size(); i++)
            {
                IDatum <L, F> d            = data.GetRVFDatum(i);
                ICounter <L>  scores       = classifier.LogProbabilityOf(d);
                L             guess        = Counters.Argmax(scores);
                L             correctLab   = d.Label();
                double        guessScore   = scores.GetCount(guess);
                double        correctScore = scores.GetCount(correctLab);
                int           guessInd     = data.LabelIndex().IndexOf(guess);
                int           correctInd   = data.LabelIndex().IndexOf(correctLab);
                total++;
                if (guessInd == correctInd)
                {
                    correct++;
                }
                logLikelihood += correctScore;
                q.Add(new Pair <int, Pair <double, bool> >(int.Parse(i), new Pair <double, bool>(guessScore, bool.ValueOf(guessInd == correctInd))), -guessScore);
            }
            accuracy = (double)correct / (double)total;
            IList <Pair <int, Pair <double, bool> > > sorted = q.ToSortedList();

            scores    = new double[sorted.Count];
            isCorrect = new bool[sorted.Count];
            for (int i_1 = 0; i_1 < sorted.Count; i_1++)
            {
                Pair <double, bool> next = sorted[i_1].Second();
                scores[i_1]    = next.First();
                isCorrect[i_1] = next.Second();
            }
        }
Ejemplo n.º 5
0
        public virtual double Score <F>(IProbabilisticClassifier <L, F> classifier, GeneralDataset <L, F> data)
        {
            List <Pair <double, int> > dataScores = new List <Pair <double, int> >();

            for (int i = 0; i < data.Size(); i++)
            {
                IDatum <L, F> d      = data.GetRVFDatum(i);
                ICounter <L>  scores = classifier.LogProbabilityOf(d);
                int           labelD = d.Label().Equals(posLabel) ? 1 : 0;
                dataScores.Add(new Pair <double, int>(Math.Exp(scores.GetCount(posLabel)), labelD));
            }
            PRCurve prc = new PRCurve(dataScores);

            confWeightedAccuracy    = prc.Cwa();
            accuracy                = prc.Accuracy();
            optAccuracy             = prc.OptimalAccuracy();
            optConfWeightedAccuracy = prc.OptimalCwa();
            logLikelihood           = prc.LogLikelihood();
            accrecall               = prc.CwaArray();
            optaccrecall            = prc.OptimalCwaArray();
            return(accuracy);
        }
Ejemplo n.º 6
0
 public PrecisionRecallStats(IClassifier <L, F> classifier, Dataset <L, F> data, L positiveClass)
 {
     for (int i = 0; i < data.Size(); ++i)
     {
         IDatum <L, F> d             = data.GetDatum(i);
         L             guess         = classifier.ClassOf(d);
         L             label         = d.Label();
         bool          guessPositive = guess.Equals(positiveClass);
         bool          isPositive    = label.Equals(positiveClass);
         if (isPositive && guessPositive)
         {
             tpCount++;
         }
         if (isPositive && !guessPositive)
         {
             fnCount++;
         }
         if (!isPositive && guessPositive)
         {
             fpCount++;
         }
     }
 }
Ejemplo n.º 7
0
 public virtual void Add(IDatum <L, F> d, float weight)
 {
     Add(d.AsFeatures(), d.Label(), weight);
 }
 public override void Add(IDatum <L, F> d)
 {
     Add(d.AsFeatures(), d.Label());
 }