public NaiveBayesCategoryCollection(CategoryCollection categoryCollection)
        {
            m_CategoryMap = new SortedDictionary<Category, NaiveBayesCategory>();

            foreach (Category c in categoryCollection.Collection)
            {
                m_CategoryMap.Add(c, new NaiveBayesCategory());
            }
        }
        public CostSensitiveNaiveBayes(CategoryCollection collect)
            : base(collect)
        {
            int i = 0;

            foreach (Category c in collect.Collection)
            {
                className[i++] = c.Name;
            }
        }
        private static ClassificationProblem CreateChessBoard()
        {
            ClassificationProblem problem = new ClassificationProblem();

            CategoryCollection collect = new CategoryCollection();
            collect.Add(new Category(+1, "+1"));
            collect.Add(new Category(-1, "-1"));

            problem.Dimension = 2;
            problem.CategoryCollection = collect;
            problem.TrainingSet = GetExamples(collect);
            problem.ValidationSet = GetExamples(collect);

            return problem;
        }
        /// <summary>
        /// foamliu, 2009/12/21, please make sure you've uncompressed "2_newsgroups.7z" in the "data" folder.
        /// </summary>
        /// <returns></returns>
        private static ClassificationProblem CreateText()
        {
            const string DataFolder = @"..\data\2_newsgroups";

            ClassificationProblem problem = new ClassificationProblem();

            ExampleSet t_set = new ExampleSet();
            ExampleSet v_set = new ExampleSet();

            CategoryCollection collect = new CategoryCollection();
            collect.Add(new Category(+1, "+1"));
            collect.Add(new Category(-1, "-1"));

            problem.Dimension = 2;
            problem.CategoryCollection = collect;

            DirectoryInfo dataFolder = new DirectoryInfo(DataFolder);
            DirectoryInfo[] subfolders = dataFolder.GetDirectories();
            int count = 0;

            for (int i = 0; i < subfolders.Count(); i++)
            {
                DirectoryInfo categoryFolder = subfolders[i];
                int cat = i * 2 - 1;
                // for all the text files in each category
                FileInfo[] files = categoryFolder.GetFiles();

                count = 0;
                int trainSetCount = Convert.ToInt32(Constants.TrainingSetRatio * files.Count());
                for (int j = 0; j < files.Count(); j++)
                {
                    FileInfo textFile = files[j];
                    Example e = new Example();

                    if (++count < trainSetCount)
                    {
                        t_set.AddExample(e);
                    }
                    else
                    {
                        v_set.AddExample(e);
                    }

                }
            }

            problem.TrainingSet = t_set;
            problem.ValidationSet = v_set;

            return problem;
        }
        /// <summary>
        /// foamliu, 2009/04/15, 生成样本.
        /// </summary>
        /// <param name="set"></param>
        private static ExampleSet GetExamples(CategoryCollection collect)
        {
            const int Rows = 4;
            const int Columns = 4;
            const int CellWidth = 100;
            const int CellHeight = 100;
            const int ExampleNumber = 640;

            ExampleSet set = new ExampleSet();
            set.Examples.Clear();
            Random rand = new Random();

            for (int i = 0; i < ExampleNumber; i++)
            {
                int x = (int)(rand.NextDouble() * Columns * CellWidth);
                int y = (int)(rand.NextDouble() * Rows * CellHeight);

                Example e = new Example();
                e.X = new SparseVector(2);
                e.X[0] = x;
                e.X[1] = y;
                e.Label = collect.GetCategoryById(
                    GetCat(x, y, CellWidth, CellHeight));

                set.AddExample(e);
            }

            return set;
        }
 public NaiveBayesClassifier(CategoryCollection collect)
 {
     m_CategoryCollection = collect;
 }
Exemple #7
0
 public Binary_SVM_GradientDescent(CategoryCollection collect)
 {
     m_categoryCollection = collect;
     // It was said that for text classification, linear kernel is the best choice,
     //  because of the already-high-enough feature dimension
     m_kernel = new LinearKernel();
 }