コード例 #1
0
        public virtual void TestCombiningDatasets()
        {
            RVFDatum <string, string>   datum1 = NewRVFDatum(null, "a", "b", "a");
            RVFDatum <string, string>   datum2 = NewRVFDatum(null, "c", "c", "b");
            RVFDataset <string, string> data1  = new RVFDataset <string, string>();

            data1.Add(datum1);
            RVFDataset <string, string> data2 = new RVFDataset <string, string>();

            data1.Add(datum2);
            RVFDataset <string, string> data = new RVFDataset <string, string>();

            data.AddAll(data1);
            data.AddAll(data2);
            IEnumerator <RVFDatum <string, string> > iterator = data.GetEnumerator();

            NUnit.Framework.Assert.AreEqual(datum1, iterator.Current);
            NUnit.Framework.Assert.AreEqual(datum2, iterator.Current);
            NUnit.Framework.Assert.IsFalse(iterator.MoveNext());
        }
コード例 #2
0
        /// <summary>Method to convert this dataset to RVFDataset using L1-normalized TF-IDF features</summary>
        /// <returns>RVFDataset</returns>
        public virtual RVFDataset <L, F> GetL1NormalizedTFIDFDataset()
        {
            RVFDataset <L, F> rvfDataset       = new RVFDataset <L, F>(this.Size(), this.featureIndex, this.labelIndex);
            ICounter <F>      featureDocCounts = GetFeatureCounter();

            for (int i = 0; i < this.Size(); i++)
            {
                IDatum <L, F>   datum    = this.GetDatum(i);
                RVFDatum <L, F> rvfDatum = GetL1NormalizedTFIDFDatum(datum, featureDocCounts);
                rvfDataset.Add(rvfDatum);
            }
            return(rvfDataset);
        }
コード例 #3
0
        public virtual void TestSVMLightIntegerFormat()
        {
            RVFDataset <bool, int> dataset = new RVFDataset <bool, int>();

            dataset.Add(NewRVFDatum(true, 1, 2, 1, 0));
            dataset.Add(NewRVFDatum(false, 2, 2, 0, 0));
            dataset.Add(NewRVFDatum(true, 0, 1, 2, 2));
            File tempFile = File.CreateTempFile("testSVMLightIntegerFormat", ".svm");

            dataset.WriteSVMLightFormat(tempFile);
            RVFDataset <bool, int> newDataset = new RVFDataset <bool, int>();

            try
            {
                newDataset.ReadSVMLightFormat(tempFile);
                NUnit.Framework.Assert.Fail("expected failure with empty indexes");
            }
            catch (Exception)
            {
            }
            newDataset = new RVFDataset <bool, int>(dataset.Size(), dataset.FeatureIndex(), dataset.LabelIndex());
            newDataset.ReadSVMLightFormat(tempFile);
            NUnit.Framework.Assert.AreEqual(CollectionUtils.ToList(dataset), CollectionUtils.ToList(newDataset));
        }
コード例 #4
0
        /// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary>
        private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset)
        {
            RVFDataset <L, L> plattDataset = new RVFDataset <L, L>();

            for (int i = 0; i < dataset.Size(); i++)
            {
                RVFDatum <L, F> d      = dataset.GetRVFDatum(i);
                ICounter <L>    scores = classifier.ScoresOf((IDatum <L, F>)d);
                scores.IncrementCount(null);
                plattDataset.Add(new RVFDatum <L, L>(scores, d.Label()));
            }
            LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>();

            factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null));
            return(factory.TrainClassifier(plattDataset));
        }