Ejemplo n.º 1
0
        public virtual ClassicCounter <L> ScoresOf(RVFDatum <L, F> example)
        {
            ClassicCounter <L> scores = new ClassicCounter <L>();

            Counters.AddInPlace(scores, priors);
            if (addZeroValued)
            {
                Counters.AddInPlace(scores, priorZero);
            }
            foreach (L l in labels)
            {
                double       score    = 0.0;
                ICounter <F> features = example.AsFeaturesCounter();
                foreach (F f in features.KeySet())
                {
                    int value = (int)features.GetCount(f);
                    score += Weight(l, f, int.Parse(value));
                    if (addZeroValued)
                    {
                        score -= Weight(l, f, zero);
                    }
                }
                scores.IncrementCount(l, score);
            }
            return(scores);
        }
        public virtual RVFDatum <L, F> ScaleDatumGaussian(RVFDatum <L, F> datum)
        {
            // scale this dataset before scaling the datum
            if (means == null || stdevs == null)
            {
                ScaleFeaturesGaussian();
            }
            ICounter <F> scaledFeatures = new ClassicCounter <F>();

            foreach (F feature in datum.AsFeatures())
            {
                int fID = this.featureIndex.IndexOf(feature);
                if (fID >= 0)
                {
                    double oldVal = datum.AsFeaturesCounter().GetCount(feature);
                    double newVal;
                    if (stdevs[fID] != 0)
                    {
                        newVal = (oldVal - means[fID]) / stdevs[fID];
                    }
                    else
                    {
                        newVal = oldVal;
                    }
                    scaledFeatures.IncrementCount(feature, newVal);
                }
            }
            return(new RVFDatum <L, F>(scaledFeatures, datum.Label()));
        }
Ejemplo n.º 3
0
        /// <summary>The examples are assumed to be a list of RFVDatum.</summary>
        /// <remarks>
        /// The examples are assumed to be a list of RFVDatum.
        /// The datums are assumed to not contain the zeroes and then they are added to each instance.
        /// </remarks>
        public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet)
        {
            int numFeatures = featureSet.Count;

            int[][] data   = new int[][] {  };
            int[]   labels = new int[examples.Size()];
            labelIndex   = new HashIndex <L>();
            featureIndex = new HashIndex <F>();
            foreach (F feat in featureSet)
            {
                featureIndex.Add(feat);
            }
            for (int d = 0; d < examples.Size(); d++)
            {
                RVFDatum <L, F> datum = examples.GetRVFDatum(d);
                ICounter <F>    c     = datum.AsFeaturesCounter();
                foreach (F feature in c.KeySet())
                {
                    int fNo   = featureIndex.IndexOf(feature);
                    int value = (int)c.GetCount(feature);
                    data[d][fNo] = value;
                }
                labelIndex.Add(datum.Label());
                labels[d] = labelIndex.IndexOf(datum.Label());
            }
            int numClasses = labelIndex.Size();

            return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex));
        }
Ejemplo n.º 4
0
        private ICounter <L> ScoresOfRVFDatum(RVFDatum <L, F> example)
        {
            ICounter <F> features = example.AsFeaturesCounter();
            double       sum      = ScoreOf(features);
            ICounter <L> c        = new ClassicCounter <L>();

            c.SetCount(classes[0], -sum);
            c.SetCount(classes[1], sum);
            return(c);
        }
Ejemplo n.º 5
0
        // todo: Fix javadoc, have unit tested
        /// <summary>Print SVM Light Format file.</summary>
        /// <remarks>
        /// Print SVM Light Format file.
        /// The following comments are no longer applicable because I am
        /// now printing out the exact labelID for each example. -Ramesh ([email protected]) 12/17/2009.
        /// If the Dataset has more than 2 classes, then it
        /// prints using the label index (+1) (for svm_struct).  If it is 2 classes, then the labelIndex.get(0)
        /// is mapped to +1 and labelIndex.get(1) is mapped to -1 (for svm_light).
        /// </remarks>
        public virtual void PrintSVMLightFormat(PrintWriter pw)
        {
            //assumes each data item has a few features on, and sorts the feature keys while collecting the values in a counter
            // old comment:
            // the following code commented out by Ramesh ([email protected]) 12/17/2009.
            // why not simply print the exact id of the label instead of mapping to some values??
            // new comment:
            // mihai: we NEED this, because svm_light has special conventions not supported by default by our labels,
            //        e.g., in a multiclass setting it assumes that labels start at 1 whereas our labels start at 0 (08/31/2010)
            string[] labelMap = MakeSvmLabelMap();
            for (int i = 0; i < size; i++)
            {
                RVFDatum <L, F>      d      = GetRVFDatum(i);
                ICounter <F>         c      = d.AsFeaturesCounter();
                ClassicCounter <int> printC = new ClassicCounter <int>();
                foreach (F f in c.KeySet())
                {
                    printC.SetCount(featureIndex.IndexOf(f), c.GetCount(f));
                }
                int[] features = Sharpen.Collections.ToArray(printC.KeySet(), new int[printC.KeySet().Count]);
                Arrays.Sort(features);
                StringBuilder sb = new StringBuilder();
                sb.Append(labelMap[labels[i]]).Append(' ');
                // sb.append(labels[i]).append(' '); // commented out by mihai: labels[i] breaks svm_light conventions!

                /* Old code: assumes that F is Integer....
                 *
                 * for (int f: features) {
                 * sb.append((f + 1)).append(":").append(c.getCount(f)).append(" ");
                 * }
                 */
                //I think this is what was meant (using printC rather than c), but not sure
                // ~Sarah Spikes ([email protected])
                foreach (int f_1 in features)
                {
                    sb.Append((f_1 + 1)).Append(':').Append(printC.GetCount(f_1)).Append(' ');
                }
                pw.Println(sb.ToString());
            }
        }
 /// <summary>
 /// Given an instance to classify, scores and returns
 /// score by class.
 /// </summary>
 /// <remarks>
 /// Given an instance to classify, scores and returns
 /// score by class.
 /// NOTE: supports only RVFDatums
 /// </remarks>
 public virtual ClassicCounter <K> ScoresOf(IDatum <K, V> datum)
 {
     if (datum is RVFDatum <object, object> )
     {
         RVFDatum <K, V> vec = (RVFDatum <K, V>)datum;
         if (l2Normalize)
         {
             ClassicCounter <V> featVec = new ClassicCounter <V>(vec.AsFeaturesCounter());
             Counters.Normalize(featVec);
             vec = new RVFDatum <K, V>(featVec);
         }
         ClassicCounter <ICounter <V> > scores = new ClassicCounter <ICounter <V> >();
         foreach (ICounter <V> instance in instances.AllValues())
         {
             scores.SetCount(instance, Counters.Cosine(vec.AsFeaturesCounter(), instance));
         }
         // set entry, for given instance and score
         IList <ICounter <V> > sorted      = Counters.ToSortedList(scores);
         ClassicCounter <K>    classScores = new ClassicCounter <K>();
         for (int i = 0; i < k && i < sorted.Count; i++)
         {
             K      label = classLookup[sorted[i]];
             double count = 1.0;
             if (weightedVotes)
             {
                 count = scores.GetCount(sorted[i]);
             }
             classScores.IncrementCount(label, count);
         }
         return(classScores);
     }
     else
     {
         return(null);
     }
 }
Ejemplo n.º 7
0
 public virtual double ProbabilityOfTrue(RVFDatum <bool, string> datum)
 {
     return(ProbabilityOfTrue(datum.AsFeaturesCounter()));
 }
Ejemplo n.º 8
0
 private double ProbabilityOfRVFDatum(RVFDatum <L, F> example)
 {
     return(ProbabilityOf(example.AsFeaturesCounter(), example.Label()));
 }
Ejemplo n.º 9
0
 private L ClassOfRVFDatum(RVFDatum <L, F> example)
 {
     return(ClassOf(example.AsFeaturesCounter()));
 }
Ejemplo n.º 10
0
 public virtual L ClassOf(RVFDatum <L, F> example)
 {
     //use classOf(Datum) instead.
     return(ClassOf(example.AsFeaturesCounter()));
 }