/// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary> private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset) { RVFDataset <L, L> plattDataset = new RVFDataset <L, L>(); for (int i = 0; i < dataset.Size(); i++) { RVFDatum <L, F> d = dataset.GetRVFDatum(i); ICounter <L> scores = classifier.ScoresOf((IDatum <L, F>)d); scores.IncrementCount(null); plattDataset.Add(new RVFDatum <L, L>(scores, d.Label())); } LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>(); factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null)); return(factory.TrainClassifier(plattDataset)); }
public virtual Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> SampleDataset(long randomSeed, double sampleFrac, bool sampleWithReplacement) { int sampleSize = (int)(this.Size() * sampleFrac); Random rand = new Random(randomSeed); Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> subset; if (this is RVFDataset) { subset = new RVFDataset <L, F>(); } else { if (this is Dataset) { subset = new Dataset <L, F>(); } else { throw new Exception("Can't handle this type of GeneralDataset."); } } if (sampleWithReplacement) { for (int i = 0; i < sampleSize; i++) { int datumNum = rand.NextInt(this.Size()); subset.Add(this.GetDatum(datumNum)); } } else { ICollection <int> indicedSampled = Generics.NewHashSet(); while (subset.Size() < sampleSize) { int datumNum = rand.NextInt(this.Size()); if (!indicedSampled.Contains(datumNum)) { subset.Add(this.GetDatum(datumNum)); indicedSampled.Add(datumNum); } } } return(subset); }
public virtual void TestCombiningDatasets() { RVFDatum <string, string> datum1 = NewRVFDatum(null, "a", "b", "a"); RVFDatum <string, string> datum2 = NewRVFDatum(null, "c", "c", "b"); RVFDataset <string, string> data1 = new RVFDataset <string, string>(); data1.Add(datum1); RVFDataset <string, string> data2 = new RVFDataset <string, string>(); data1.Add(datum2); RVFDataset <string, string> data = new RVFDataset <string, string>(); data.AddAll(data1); data.AddAll(data2); IEnumerator <RVFDatum <string, string> > iterator = data.GetEnumerator(); NUnit.Framework.Assert.AreEqual(datum1, iterator.Current); NUnit.Framework.Assert.AreEqual(datum2, iterator.Current); NUnit.Framework.Assert.IsFalse(iterator.MoveNext()); }
public virtual void TestSVMLightIntegerFormat() { RVFDataset <bool, int> dataset = new RVFDataset <bool, int>(); dataset.Add(NewRVFDatum(true, 1, 2, 1, 0)); dataset.Add(NewRVFDatum(false, 2, 2, 0, 0)); dataset.Add(NewRVFDatum(true, 0, 1, 2, 2)); File tempFile = File.CreateTempFile("testSVMLightIntegerFormat", ".svm"); dataset.WriteSVMLightFormat(tempFile); RVFDataset <bool, int> newDataset = new RVFDataset <bool, int>(); try { newDataset.ReadSVMLightFormat(tempFile); NUnit.Framework.Assert.Fail("expected failure with empty indexes"); } catch (Exception) { } newDataset = new RVFDataset <bool, int>(dataset.Size(), dataset.FeatureIndex(), dataset.LabelIndex()); newDataset.ReadSVMLightFormat(tempFile); NUnit.Framework.Assert.AreEqual(CollectionUtils.ToList(dataset), CollectionUtils.ToList(newDataset)); }
public _IEnumerator_980(RVFDataset <L, F> _enclosing) { this._enclosing = _enclosing; }