Пример #1
0
        public void TestTFIDF()
        {
            var stringTableBuilder = new StringTableBuilder();
            var bag = new ClassificationBag {
                Classification = new[] {
                    Tuple.Create(new[] { "Chinese", "Beijing", "Chinese" }, true),
                    Tuple.Create(new[] { "Chinese", "Chinese", "Shanghai" }, true),
                    Tuple.Create(new[] { "Chinese", "Macao" }, true),
                    Tuple.Create(new[] { "Tokyo", "Japan", "Chinese" }, false),
                }.Select(d => new IndexedClassification {
                    Name = d.Item2 ? "china" : "japan",
                    Data = d.Item1.Select(s => stringTableBuilder.GetIndex(s)).ToArray()
                }).ToArray()
            };

            Assert.AreEqual(bag.Classification.Length, 4);
            Assert.AreEqual(bag.Classification[0].Data.Length, 3);
            var set = bag.ConvertToSparseVectors(true);

            Assert.AreEqual(set.Classification.Length, 2);
            Assert.AreEqual(set.Classification[0].Data.Length, 4);

            var tfidf = set.TFIDF();

            Assert.AreEqual(tfidf.Classification.Length, 2);
            Assert.AreEqual(tfidf.Classification[0].Data.Length, 4);
        }
Пример #2
0
        /// <summary>
        /// Multinomial naive bayes preserves the count of each feature within the model. Useful for long documents.
        /// </summary>
        /// <param name="data">The training data</param>
        /// <returns>A model that can be used for classification</returns>
        public static MultinomialNaiveBayes TrainMultinomialNaiveBayes(this ClassificationBag data)
        {
            var trainer = new MultinomialNaiveBayesTrainer();

            foreach (var classification in data.Classification)
            {
                trainer.AddClassification(classification.Name, classification.Data);
            }
            return(trainer.Train());
        }
Пример #3
0
        ///// <summary>
        ///// Random projections allow you to reduce the dimensions of a matrix while still preserving significant information
        ///// </summary>
        ///// <param name="lap">Linear algebra provider</param>
        ///// <param name="inputSize">The vector size to reduce from</param>
        ///// <returns></returns>
        //public static IRandomProjection CreateRandomProjection(this ILinearAlgebraProvider lap, int inputSize)
        //{
        //    var reducedSize = RandomProjection.MinDim(inputSize);
        //    return CreateRandomProjection(lap, inputSize, reducedSize);
        //}

        ///// <summary>
        ///// Markov models summarise sequential data (over a window of size 2)
        ///// </summary>
        ///// <typeparam name="T">The data type within the model</typeparam>
        ///// <param name="data">An enumerable of sequences of type T</param>
        ///// <returns>A sequence of markov model observations</returns>
        //public static MarkovModel2<T> TrainMarkovModel2<T>(this IEnumerable<IEnumerable<T>> data)
        //{
        //    var trainer = new MarkovModelTrainer2<T>();
        //    foreach (var sequence in data)
        //        trainer.Add(sequence);
        //    return trainer.Build();
        //}

        ///// <summary>
        ///// Markov models summarise sequential data (over a window of size 3)
        ///// </summary>
        ///// <typeparam name="T">The data type within the model</typeparam>
        ///// <param name="data">An enumerable of sequences of type T</param>
        ///// <returns>A sequence of markov model observations</returns>
        //public static MarkovModel3<T> TrainMarkovModel3<T>(this IEnumerable<IEnumerable<T>> data)
        //{
        //    var trainer = new MarkovModelTrainer3<T>();
        //    foreach (var sequence in data)
        //        trainer.Add(sequence);
        //    return trainer.Build();
        //}

        /// <summary>
        /// Bernoulli naive bayes treats each feature as either 1 or 0 - all feature counts are discarded. Useful for short documents.
        /// </summary>
        /// <param name="data">The training data</param>
        /// <returns>A model that can be used for classification</returns>
        public static BernoulliNaiveBayes TrainBernoulliNaiveBayes(this ClassificationBag data)
        {
            var trainer = new BernoulliNaiveBayesTrainer();

            foreach (var classification in data.Classification)
            {
                trainer.AddClassification(classification.Name, classification.Data);
            }
            return(trainer.Train());
        }