Пример #1
0
        /// <summary>Builds a sigmoid model to turn the classifier outputs into probabilities.</summary>
        private LinearClassifier <L, L> FitSigmoid(SVMLightClassifier <L, F> classifier, GeneralDataset <L, F> dataset)
        {
            RVFDataset <L, L> plattDataset = new RVFDataset <L, L>();

            for (int i = 0; i < dataset.Size(); i++)
            {
                RVFDatum <L, F> d      = dataset.GetRVFDatum(i);
                ICounter <L>    scores = classifier.ScoresOf((IDatum <L, F>)d);
                scores.IncrementCount(null);
                plattDataset.Add(new RVFDatum <L, L>(scores, d.Label()));
            }
            LinearClassifierFactory <L, L> factory = new LinearClassifierFactory <L, L>();

            factory.SetPrior(new LogPrior(LogPrior.LogPriorType.Null));
            return(factory.TrainClassifier(plattDataset));
        }
Пример #2
0
        public virtual Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> SampleDataset(long randomSeed, double sampleFrac, bool sampleWithReplacement)
        {
            int    sampleSize = (int)(this.Size() * sampleFrac);
            Random rand       = new Random(randomSeed);

            Edu.Stanford.Nlp.Classify.GeneralDataset <L, F> subset;
            if (this is RVFDataset)
            {
                subset = new RVFDataset <L, F>();
            }
            else
            {
                if (this is Dataset)
                {
                    subset = new Dataset <L, F>();
                }
                else
                {
                    throw new Exception("Can't handle this type of GeneralDataset.");
                }
            }
            if (sampleWithReplacement)
            {
                for (int i = 0; i < sampleSize; i++)
                {
                    int datumNum = rand.NextInt(this.Size());
                    subset.Add(this.GetDatum(datumNum));
                }
            }
            else
            {
                ICollection <int> indicedSampled = Generics.NewHashSet();
                while (subset.Size() < sampleSize)
                {
                    int datumNum = rand.NextInt(this.Size());
                    if (!indicedSampled.Contains(datumNum))
                    {
                        subset.Add(this.GetDatum(datumNum));
                        indicedSampled.Add(datumNum);
                    }
                }
            }
            return(subset);
        }
Пример #3
0
        public virtual void TestCombiningDatasets()
        {
            RVFDatum <string, string>   datum1 = NewRVFDatum(null, "a", "b", "a");
            RVFDatum <string, string>   datum2 = NewRVFDatum(null, "c", "c", "b");
            RVFDataset <string, string> data1  = new RVFDataset <string, string>();

            data1.Add(datum1);
            RVFDataset <string, string> data2 = new RVFDataset <string, string>();

            data1.Add(datum2);
            RVFDataset <string, string> data = new RVFDataset <string, string>();

            data.AddAll(data1);
            data.AddAll(data2);
            IEnumerator <RVFDatum <string, string> > iterator = data.GetEnumerator();

            NUnit.Framework.Assert.AreEqual(datum1, iterator.Current);
            NUnit.Framework.Assert.AreEqual(datum2, iterator.Current);
            NUnit.Framework.Assert.IsFalse(iterator.MoveNext());
        }
Пример #4
0
        public virtual void TestSVMLightIntegerFormat()
        {
            RVFDataset <bool, int> dataset = new RVFDataset <bool, int>();

            dataset.Add(NewRVFDatum(true, 1, 2, 1, 0));
            dataset.Add(NewRVFDatum(false, 2, 2, 0, 0));
            dataset.Add(NewRVFDatum(true, 0, 1, 2, 2));
            File tempFile = File.CreateTempFile("testSVMLightIntegerFormat", ".svm");

            dataset.WriteSVMLightFormat(tempFile);
            RVFDataset <bool, int> newDataset = new RVFDataset <bool, int>();

            try
            {
                newDataset.ReadSVMLightFormat(tempFile);
                NUnit.Framework.Assert.Fail("expected failure with empty indexes");
            }
            catch (Exception)
            {
            }
            newDataset = new RVFDataset <bool, int>(dataset.Size(), dataset.FeatureIndex(), dataset.LabelIndex());
            newDataset.ReadSVMLightFormat(tempFile);
            NUnit.Framework.Assert.AreEqual(CollectionUtils.ToList(dataset), CollectionUtils.ToList(newDataset));
        }
 public _IEnumerator_980(RVFDataset <L, F> _enclosing)
 {
     this._enclosing = _enclosing;
 }