public NaiveBayesCategoryCollection(CategoryCollection categoryCollection) { m_CategoryMap = new SortedDictionary<Category, NaiveBayesCategory>(); foreach (Category c in categoryCollection.Collection) { m_CategoryMap.Add(c, new NaiveBayesCategory()); } }
public CostSensitiveNaiveBayes(CategoryCollection collect) : base(collect) { int i = 0; foreach (Category c in collect.Collection) { className[i++] = c.Name; } }
private static ClassificationProblem CreateChessBoard() { ClassificationProblem problem = new ClassificationProblem(); CategoryCollection collect = new CategoryCollection(); collect.Add(new Category(+1, "+1")); collect.Add(new Category(-1, "-1")); problem.Dimension = 2; problem.CategoryCollection = collect; problem.TrainingSet = GetExamples(collect); problem.ValidationSet = GetExamples(collect); return problem; }
/// <summary> /// foamliu, 2009/12/21, please make sure you've uncompressed "2_newsgroups.7z" in the "data" folder. /// </summary> /// <returns></returns> private static ClassificationProblem CreateText() { const string DataFolder = @"..\data\2_newsgroups"; ClassificationProblem problem = new ClassificationProblem(); ExampleSet t_set = new ExampleSet(); ExampleSet v_set = new ExampleSet(); CategoryCollection collect = new CategoryCollection(); collect.Add(new Category(+1, "+1")); collect.Add(new Category(-1, "-1")); problem.Dimension = 2; problem.CategoryCollection = collect; DirectoryInfo dataFolder = new DirectoryInfo(DataFolder); DirectoryInfo[] subfolders = dataFolder.GetDirectories(); int count = 0; for (int i = 0; i < subfolders.Count(); i++) { DirectoryInfo categoryFolder = subfolders[i]; int cat = i * 2 - 1; // for all the text files in each category FileInfo[] files = categoryFolder.GetFiles(); count = 0; int trainSetCount = Convert.ToInt32(Constants.TrainingSetRatio * files.Count()); for (int j = 0; j < files.Count(); j++) { FileInfo textFile = files[j]; Example e = new Example(); if (++count < trainSetCount) { t_set.AddExample(e); } else { v_set.AddExample(e); } } } problem.TrainingSet = t_set; problem.ValidationSet = v_set; return problem; }
/// <summary> /// foamliu, 2009/04/15, 生成样本. /// </summary> /// <param name="set"></param> private static ExampleSet GetExamples(CategoryCollection collect) { const int Rows = 4; const int Columns = 4; const int CellWidth = 100; const int CellHeight = 100; const int ExampleNumber = 640; ExampleSet set = new ExampleSet(); set.Examples.Clear(); Random rand = new Random(); for (int i = 0; i < ExampleNumber; i++) { int x = (int)(rand.NextDouble() * Columns * CellWidth); int y = (int)(rand.NextDouble() * Rows * CellHeight); Example e = new Example(); e.X = new SparseVector(2); e.X[0] = x; e.X[1] = y; e.Label = collect.GetCategoryById( GetCat(x, y, CellWidth, CellHeight)); set.AddExample(e); } return set; }
public NaiveBayesClassifier(CategoryCollection collect) { m_CategoryCollection = collect; }
public Binary_SVM_GradientDescent(CategoryCollection collect) { m_categoryCollection = collect; // It was said that for text classification, linear kernel is the best choice, // because of the already-high-enough feature dimension m_kernel = new LinearKernel(); }