Exemple #1
0
        /// <summary>The examples are assumed to be a list of RFVDatum.</summary>
        /// <remarks>
        /// The examples are assumed to be a list of RFVDatum.
        /// The datums are assumed to not contain the zeroes and then they are added to each instance.
        /// </remarks>
        public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet)
        {
            int numFeatures = featureSet.Count;

            int[][] data   = new int[][] {  };
            int[]   labels = new int[examples.Size()];
            labelIndex   = new HashIndex <L>();
            featureIndex = new HashIndex <F>();
            foreach (F feat in featureSet)
            {
                featureIndex.Add(feat);
            }
            for (int d = 0; d < examples.Size(); d++)
            {
                RVFDatum <L, F> datum = examples.GetRVFDatum(d);
                ICounter <F>    c     = datum.AsFeaturesCounter();
                foreach (F feature in c.KeySet())
                {
                    int fNo   = featureIndex.IndexOf(feature);
                    int value = (int)c.GetCount(feature);
                    data[d][fNo] = value;
                }
                labelIndex.Add(datum.Label());
                labels[d] = labelIndex.IndexOf(datum.Label());
            }
            int numClasses = labelIndex.Size();

            return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex));
        }
Exemple #2
0
        /// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary>
        private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset)
        {
            RVFDataset <L, L> plattDataset = new RVFDataset <L, L>();

            for (int i = 0; i < dataset.Size(); i++)
            {
                RVFDatum <L, F> d      = dataset.GetRVFDatum(i);
                ICounter <L>    scores = classifier.ScoresOf((IDatum <L, F>)d);
                scores.IncrementCount(null);
                plattDataset.Add(new RVFDatum <L, L>(scores, d.Label()));
            }
            LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>();

            factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null));
            return(factory.TrainClassifier(plattDataset));
        }