public virtual void TestCombiningDatasets() { RVFDatum <string, string> datum1 = NewRVFDatum(null, "a", "b", "a"); RVFDatum <string, string> datum2 = NewRVFDatum(null, "c", "c", "b"); RVFDataset <string, string> data1 = new RVFDataset <string, string>(); data1.Add(datum1); RVFDataset <string, string> data2 = new RVFDataset <string, string>(); data1.Add(datum2); RVFDataset <string, string> data = new RVFDataset <string, string>(); data.AddAll(data1); data.AddAll(data2); IEnumerator <RVFDatum <string, string> > iterator = data.GetEnumerator(); NUnit.Framework.Assert.AreEqual(datum1, iterator.Current); NUnit.Framework.Assert.AreEqual(datum2, iterator.Current); NUnit.Framework.Assert.IsFalse(iterator.MoveNext()); }
/// <summary>Method to convert this dataset to RVFDataset using L1-normalized TF-IDF features</summary> /// <returns>RVFDataset</returns> public virtual RVFDataset <L, F> GetL1NormalizedTFIDFDataset() { RVFDataset <L, F> rvfDataset = new RVFDataset <L, F>(this.Size(), this.featureIndex, this.labelIndex); ICounter <F> featureDocCounts = GetFeatureCounter(); for (int i = 0; i < this.Size(); i++) { IDatum <L, F> datum = this.GetDatum(i); RVFDatum <L, F> rvfDatum = GetL1NormalizedTFIDFDatum(datum, featureDocCounts); rvfDataset.Add(rvfDatum); } return(rvfDataset); }
public virtual void TestSVMLightIntegerFormat() { RVFDataset <bool, int> dataset = new RVFDataset <bool, int>(); dataset.Add(NewRVFDatum(true, 1, 2, 1, 0)); dataset.Add(NewRVFDatum(false, 2, 2, 0, 0)); dataset.Add(NewRVFDatum(true, 0, 1, 2, 2)); File tempFile = File.CreateTempFile("testSVMLightIntegerFormat", ".svm"); dataset.WriteSVMLightFormat(tempFile); RVFDataset <bool, int> newDataset = new RVFDataset <bool, int>(); try { newDataset.ReadSVMLightFormat(tempFile); NUnit.Framework.Assert.Fail("expected failure with empty indexes"); } catch (Exception) { } newDataset = new RVFDataset <bool, int>(dataset.Size(), dataset.FeatureIndex(), dataset.LabelIndex()); newDataset.ReadSVMLightFormat(tempFile); NUnit.Framework.Assert.AreEqual(CollectionUtils.ToList(dataset), CollectionUtils.ToList(newDataset)); }
/// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary> private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset) { RVFDataset <L, L> plattDataset = new RVFDataset <L, L>(); for (int i = 0; i < dataset.Size(); i++) { RVFDatum <L, F> d = dataset.GetRVFDatum(i); ICounter <L> scores = classifier.ScoresOf((IDatum <L, F>)d); scores.IncrementCount(null); plattDataset.Add(new RVFDatum <L, L>(scores, d.Label())); } LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>(); factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null)); return(factory.TrainClassifier(plattDataset)); }