public virtual ClassicCounter <L> ScoresOf(RVFDatum <L, F> example) { ClassicCounter <L> scores = new ClassicCounter <L>(); Counters.AddInPlace(scores, priors); if (addZeroValued) { Counters.AddInPlace(scores, priorZero); } foreach (L l in labels) { double score = 0.0; ICounter <F> features = example.AsFeaturesCounter(); foreach (F f in features.KeySet()) { int value = (int)features.GetCount(f); score += Weight(l, f, int.Parse(value)); if (addZeroValued) { score -= Weight(l, f, zero); } } scores.IncrementCount(l, score); } return(scores); }
public virtual RVFDatum <L, F> ScaleDatumGaussian(RVFDatum <L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) { ScaleFeaturesGaussian(); } ICounter <F> scaledFeatures = new ClassicCounter <F>(); foreach (F feature in datum.AsFeatures()) { int fID = this.featureIndex.IndexOf(feature); if (fID >= 0) { double oldVal = datum.AsFeaturesCounter().GetCount(feature); double newVal; if (stdevs[fID] != 0) { newVal = (oldVal - means[fID]) / stdevs[fID]; } else { newVal = oldVal; } scaledFeatures.IncrementCount(feature, newVal); } } return(new RVFDatum <L, F>(scaledFeatures, datum.Label())); }
/// <summary>The examples are assumed to be a list of RFVDatum.</summary> /// <remarks> /// The examples are assumed to be a list of RFVDatum. /// The datums are assumed to not contain the zeroes and then they are added to each instance. /// </remarks> public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet) { int numFeatures = featureSet.Count; int[][] data = new int[][] { }; int[] labels = new int[examples.Size()]; labelIndex = new HashIndex <L>(); featureIndex = new HashIndex <F>(); foreach (F feat in featureSet) { featureIndex.Add(feat); } for (int d = 0; d < examples.Size(); d++) { RVFDatum <L, F> datum = examples.GetRVFDatum(d); ICounter <F> c = datum.AsFeaturesCounter(); foreach (F feature in c.KeySet()) { int fNo = featureIndex.IndexOf(feature); int value = (int)c.GetCount(feature); data[d][fNo] = value; } labelIndex.Add(datum.Label()); labels[d] = labelIndex.IndexOf(datum.Label()); } int numClasses = labelIndex.Size(); return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex)); }
private ICounter <L> ScoresOfRVFDatum(RVFDatum <L, F> example) { ICounter <F> features = example.AsFeaturesCounter(); double sum = ScoreOf(features); ICounter <L> c = new ClassicCounter <L>(); c.SetCount(classes[0], -sum); c.SetCount(classes[1], sum); return(c); }
// todo: Fix javadoc, have unit tested /// <summary>Print SVM Light Format file.</summary> /// <remarks> /// Print SVM Light Format file. /// The following comments are no longer applicable because I am /// now printing out the exact labelID for each example. -Ramesh ([email protected]) 12/17/2009. /// If the Dataset has more than 2 classes, then it /// prints using the label index (+1) (for svm_struct). If it is 2 classes, then the labelIndex.get(0) /// is mapped to +1 and labelIndex.get(1) is mapped to -1 (for svm_light). /// </remarks> public virtual void PrintSVMLightFormat(PrintWriter pw) { //assumes each data item has a few features on, and sorts the feature keys while collecting the values in a counter // old comment: // the following code commented out by Ramesh ([email protected]) 12/17/2009. // why not simply print the exact id of the label instead of mapping to some values?? // new comment: // mihai: we NEED this, because svm_light has special conventions not supported by default by our labels, // e.g., in a multiclass setting it assumes that labels start at 1 whereas our labels start at 0 (08/31/2010) string[] labelMap = MakeSvmLabelMap(); for (int i = 0; i < size; i++) { RVFDatum <L, F> d = GetRVFDatum(i); ICounter <F> c = d.AsFeaturesCounter(); ClassicCounter <int> printC = new ClassicCounter <int>(); foreach (F f in c.KeySet()) { printC.SetCount(featureIndex.IndexOf(f), c.GetCount(f)); } int[] features = Sharpen.Collections.ToArray(printC.KeySet(), new int[printC.KeySet().Count]); Arrays.Sort(features); StringBuilder sb = new StringBuilder(); sb.Append(labelMap[labels[i]]).Append(' '); // sb.append(labels[i]).append(' '); // commented out by mihai: labels[i] breaks svm_light conventions! /* Old code: assumes that F is Integer.... * * for (int f: features) { * sb.append((f + 1)).append(":").append(c.getCount(f)).append(" "); * } */ //I think this is what was meant (using printC rather than c), but not sure // ~Sarah Spikes ([email protected]) foreach (int f_1 in features) { sb.Append((f_1 + 1)).Append(':').Append(printC.GetCount(f_1)).Append(' '); } pw.Println(sb.ToString()); } }
/// <summary> /// Given an instance to classify, scores and returns /// score by class. /// </summary> /// <remarks> /// Given an instance to classify, scores and returns /// score by class. /// NOTE: supports only RVFDatums /// </remarks> public virtual ClassicCounter <K> ScoresOf(IDatum <K, V> datum) { if (datum is RVFDatum <object, object> ) { RVFDatum <K, V> vec = (RVFDatum <K, V>)datum; if (l2Normalize) { ClassicCounter <V> featVec = new ClassicCounter <V>(vec.AsFeaturesCounter()); Counters.Normalize(featVec); vec = new RVFDatum <K, V>(featVec); } ClassicCounter <ICounter <V> > scores = new ClassicCounter <ICounter <V> >(); foreach (ICounter <V> instance in instances.AllValues()) { scores.SetCount(instance, Counters.Cosine(vec.AsFeaturesCounter(), instance)); } // set entry, for given instance and score IList <ICounter <V> > sorted = Counters.ToSortedList(scores); ClassicCounter <K> classScores = new ClassicCounter <K>(); for (int i = 0; i < k && i < sorted.Count; i++) { K label = classLookup[sorted[i]]; double count = 1.0; if (weightedVotes) { count = scores.GetCount(sorted[i]); } classScores.IncrementCount(label, count); } return(classScores); } else { return(null); } }
public virtual double ProbabilityOfTrue(RVFDatum <bool, string> datum) { return(ProbabilityOfTrue(datum.AsFeaturesCounter())); }
private double ProbabilityOfRVFDatum(RVFDatum <L, F> example) { return(ProbabilityOf(example.AsFeaturesCounter(), example.Label())); }
private L ClassOfRVFDatum(RVFDatum <L, F> example) { return(ClassOf(example.AsFeaturesCounter())); }
public virtual L ClassOf(RVFDatum <L, F> example) { //use classOf(Datum) instead. return(ClassOf(example.AsFeaturesCounter())); }