/// <summary>The examples are assumed to be a list of RFVDatum.</summary> /// <remarks> /// The examples are assumed to be a list of RFVDatum. /// The datums are assumed to not contain the zeroes and then they are added to each instance. /// </remarks> public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet) { int numFeatures = featureSet.Count; int[][] data = new int[][] { }; int[] labels = new int[examples.Size()]; labelIndex = new HashIndex <L>(); featureIndex = new HashIndex <F>(); foreach (F feat in featureSet) { featureIndex.Add(feat); } for (int d = 0; d < examples.Size(); d++) { RVFDatum <L, F> datum = examples.GetRVFDatum(d); ICounter <F> c = datum.AsFeaturesCounter(); foreach (F feature in c.KeySet()) { int fNo = featureIndex.IndexOf(feature); int value = (int)c.GetCount(feature); data[d][fNo] = value; } labelIndex.Add(datum.Label()); labels[d] = labelIndex.IndexOf(datum.Label()); } int numClasses = labelIndex.Size(); return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex)); }
/// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary> private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset) { RVFDataset <L, L> plattDataset = new RVFDataset <L, L>(); for (int i = 0; i < dataset.Size(); i++) { RVFDatum <L, F> d = dataset.GetRVFDatum(i); ICounter <L> scores = classifier.ScoresOf((IDatum <L, F>)d); scores.IncrementCount(null); plattDataset.Add(new RVFDatum <L, L>(scores, d.Label())); } LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>(); factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null)); return(factory.TrainClassifier(plattDataset)); }