Beispiel #1
0
        /// <summary>The examples are assumed to be a list of RFVDatum.</summary>
        /// <remarks>
        /// The examples are assumed to be a list of RFVDatum.
        /// The datums are assumed to not contain the zeroes and then they are added to each instance.
        /// </remarks>
        public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet)
        {
            int numFeatures = featureSet.Count;

            int[][] data   = new int[][] {  };
            int[]   labels = new int[examples.Size()];
            labelIndex   = new HashIndex <L>();
            featureIndex = new HashIndex <F>();
            foreach (F feat in featureSet)
            {
                featureIndex.Add(feat);
            }
            for (int d = 0; d < examples.Size(); d++)
            {
                RVFDatum <L, F> datum = examples.GetRVFDatum(d);
                ICounter <F>    c     = datum.AsFeaturesCounter();
                foreach (F feature in c.KeySet())
                {
                    int fNo   = featureIndex.IndexOf(feature);
                    int value = (int)c.GetCount(feature);
                    data[d][fNo] = value;
                }
                labelIndex.Add(datum.Label());
                labels[d] = labelIndex.IndexOf(datum.Label());
            }
            int numClasses = labelIndex.Size();

            return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex));
        }
        public virtual RVFDatum <L, F> ScaleDatumGaussian(RVFDatum <L, F> datum)
        {
            // scale this dataset before scaling the datum
            if (means == null || stdevs == null)
            {
                ScaleFeaturesGaussian();
            }
            ICounter <F> scaledFeatures = new ClassicCounter <F>();

            foreach (F feature in datum.AsFeatures())
            {
                int fID = this.featureIndex.IndexOf(feature);
                if (fID >= 0)
                {
                    double oldVal = datum.AsFeaturesCounter().GetCount(feature);
                    double newVal;
                    if (stdevs[fID] != 0)
                    {
                        newVal = (oldVal - means[fID]) / stdevs[fID];
                    }
                    else
                    {
                        newVal = oldVal;
                    }
                    scaledFeatures.IncrementCount(feature, newVal);
                }
            }
            return(new RVFDatum <L, F>(scaledFeatures, datum.Label()));
        }
Beispiel #3
0
        /// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary>
        private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset)
        {
            RVFDataset <L, L> plattDataset = new RVFDataset <L, L>();

            for (int i = 0; i < dataset.Size(); i++)
            {
                RVFDatum <L, F> d      = dataset.GetRVFDatum(i);
                ICounter <L>    scores = classifier.ScoresOf((IDatum <L, F>)d);
                scores.IncrementCount(null);
                plattDataset.Add(new RVFDatum <L, L>(scores, d.Label()));
            }
            LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>();

            factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null));
            return(factory.TrainClassifier(plattDataset));
        }
Beispiel #4
0
        public virtual float Accuracy(IEnumerator <RVFDatum <L, F> > exampleIterator)
        {
            int correct = 0;
            int total   = 0;

            for (; exampleIterator.MoveNext();)
            {
                RVFDatum <L, F> next = exampleIterator.Current;
                L guess = ClassOf(next);
                if (guess.Equals(next.Label()))
                {
                    correct++;
                }
                total++;
            }
            logger.Info("correct " + correct + " out of " + total);
            return(correct / (float)total);
        }
Beispiel #5
0
 private double ProbabilityOfRVFDatum(RVFDatum <L, F> example)
 {
     return(ProbabilityOf(example.AsFeaturesCounter(), example.Label()));
 }