/// <summary>The examples are assumed to be a list of RFVDatum.</summary> /// <remarks> /// The examples are assumed to be a list of RFVDatum. /// The datums are assumed to not contain the zeroes and then they are added to each instance. /// </remarks> public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet) { int numFeatures = featureSet.Count; int[][] data = new int[][] { }; int[] labels = new int[examples.Size()]; labelIndex = new HashIndex <L>(); featureIndex = new HashIndex <F>(); foreach (F feat in featureSet) { featureIndex.Add(feat); } for (int d = 0; d < examples.Size(); d++) { RVFDatum <L, F> datum = examples.GetRVFDatum(d); ICounter <F> c = datum.AsFeaturesCounter(); foreach (F feature in c.KeySet()) { int fNo = featureIndex.IndexOf(feature); int value = (int)c.GetCount(feature); data[d][fNo] = value; } labelIndex.Add(datum.Label()); labels[d] = labelIndex.IndexOf(datum.Label()); } int numClasses = labelIndex.Size(); return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex)); }
public virtual RVFDatum <L, F> ScaleDatumGaussian(RVFDatum <L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) { ScaleFeaturesGaussian(); } ICounter <F> scaledFeatures = new ClassicCounter <F>(); foreach (F feature in datum.AsFeatures()) { int fID = this.featureIndex.IndexOf(feature); if (fID >= 0) { double oldVal = datum.AsFeaturesCounter().GetCount(feature); double newVal; if (stdevs[fID] != 0) { newVal = (oldVal - means[fID]) / stdevs[fID]; } else { newVal = oldVal; } scaledFeatures.IncrementCount(feature, newVal); } } return(new RVFDatum <L, F>(scaledFeatures, datum.Label())); }
/// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary> private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset) { RVFDataset <L, L> plattDataset = new RVFDataset <L, L>(); for (int i = 0; i < dataset.Size(); i++) { RVFDatum <L, F> d = dataset.GetRVFDatum(i); ICounter <L> scores = classifier.ScoresOf((IDatum <L, F>)d); scores.IncrementCount(null); plattDataset.Add(new RVFDatum <L, L>(scores, d.Label())); } LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>(); factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null)); return(factory.TrainClassifier(plattDataset)); }
public virtual float Accuracy(IEnumerator <RVFDatum <L, F> > exampleIterator) { int correct = 0; int total = 0; for (; exampleIterator.MoveNext();) { RVFDatum <L, F> next = exampleIterator.Current; L guess = ClassOf(next); if (guess.Equals(next.Label())) { correct++; } total++; } logger.Info("correct " + correct + " out of " + total); return(correct / (float)total); }
private double ProbabilityOfRVFDatum(RVFDatum <L, F> example) { return(ProbabilityOf(example.AsFeaturesCounter(), example.Label())); }