public static void Main(string[] args) { Console.WriteLine("\nBegin Naive Bayes classification demo"); Console.WriteLine("Goal is to predict (liberal/conservative) from job, " + "sex and income\n"); string[][] rawData = new string[30][]; rawData[0] = new string[] { "analyst", "male", "high", "conservative" }; rawData[1] = new string[] { "barista", "female", "low", "liberal" }; rawData[2] = new string[] { "cook", "male", "medium", "conservative" }; rawData[3] = new string[] { "doctor", "female", "medium", "conservative" }; rawData[4] = new string[] { "analyst", "female", "low", "liberal" }; rawData[5] = new string[] { "doctor", "male", "medium", "conservative" }; rawData[6] = new string[] { "analyst", "male", "medium", "conservative" }; rawData[7] = new string[] { "cook", "female", "low", "liberal" }; rawData[8] = new string[] { "doctor", "female", "medium", "liberal" }; rawData[9] = new string[] { "cook", "female", "low", "liberal" }; rawData[10] = new string[] { "doctor", "male", "medium", "conservative" }; rawData[11] = new string[] { "cook", "female", "high", "liberal" }; rawData[12] = new string[] { "barista", "female", "medium", "liberal" }; rawData[13] = new string[] { "analyst", "male", "low", "liberal" }; rawData[14] = new string[] { "doctor", "female", "high", "conservative" }; rawData[15] = new string[] { "barista", "female", "medium", "conservative" }; rawData[16] = new string[] { "doctor", "male", "medium", "conservative" }; rawData[17] = new string[] { "barista", "male", "high", "conservative" }; rawData[18] = new string[] { "doctor", "female", "medium", "liberal" }; rawData[19] = new string[] { "analyst", "male", "low", "liberal" }; rawData[20] = new string[] { "doctor", "male", "medium", "conservative" }; rawData[21] = new string[] { "cook", "male", "medium", "conservative" }; rawData[22] = new string[] { "doctor", "female", "high", "conservative" }; rawData[23] = new string[] { "analyst", "male", "high", "conservative" }; rawData[24] = new string[] { "barista", "female", "medium", "liberal" }; rawData[25] = new string[] { "doctor", "male", "medium", "conservative" }; rawData[26] = new string[] { "analyst", "female", "medium", "conservative" }; rawData[27] = new string[] { "analyst", "male", "medium", "conservative" }; rawData[28] = new string[] { "doctor", "female", "medium", "liberal" }; rawData[29] = new string[] { "barista", "male", "medium", "conservative" }; Console.WriteLine("The raw data is: \n"); ShowData(rawData, 5, true); Console.WriteLine("Splitting data into 80%-20% train and test sets"); string[][] trainData; string[][] testData; MakeTrainTest(rawData, 15, out trainData, out testData); Console.WriteLine("Done \n"); Console.WriteLine("Training data: \n"); ShowData(trainData, 5, true); Console.WriteLine("Test data: \n"); ShowData(testData, 5, true); Console.WriteLine("Creating Naive Bayes classifier object"); Console.WriteLine("Training classifier using training data"); BayesClassifier bc = new BayesClassifier(); bc.Train(trainData); Console.WriteLine("Done \n"); double trainAccuracy = bc.Accuracy(trainData); Console.WriteLine("Accuracy of model on train data = " + trainAccuracy.ToString("F4")); double testAccuracy = bc.Accuracy(testData); Console.WriteLine("Accuracy of model on test data = " + testAccuracy.ToString("F4")); Console.WriteLine("\nPredicting politics for job = barista, sex = female, " + "income = medium \n"); string[] features = new string[] { "barista", "female", "medium" }; string liberal = "liberal"; double pLiberal = bc.Probability(liberal, features); Console.WriteLine("Probability of liberal = " + pLiberal.ToString("F4")); string conservative = "conservative"; double pConservative = bc.Probability(conservative, features); Console.WriteLine("Probability of conservative = " + pConservative.ToString("F4")); Console.WriteLine("\nEnd Naive Bayes classification demo\n"); Console.ReadLine(); // TODO: Implement Functionality Here }
static void Main(string[] args) { Program program = new Program(); FileStream fs = new FileStream("dir_result\\result.txt", FileMode.Create); StreamWriter streamWriter = new StreamWriter(fs); int k = 10; int type = 2; List <string> listTest = new List <string>(); List <int> correct_result = new List <int>(); program.GetDictionary("dir_pre\\dictionary_dataCSV.csv"); StreamReader streamReader = new StreamReader("config.txt"); string line = streamReader.ReadLine(); streamReader.Close(); if (Int32.Parse(line.Split(' ')[0]) == 1) { type = 1; k = Int32.Parse(line.Split(' ')[1]); } // classifier được khởi tạo với tham số là mảng string (tên lớp) => key_class string[] arrClass = new string[program._predictionDictionary.Count]; foreach (var key in program._predictionDictionary.Keys) { arrClass[key] = key.ToString(); } var classifier = new BayesClassifier(arrClass); if (type == 1) // cross-validation { var inputRows = DataAccess.DataTable.New.ReadCsv("dir_pre\\dataCSV.csv").Rows.ToList(); int size = inputRows.Count; int n = size / k; Random ran = new Random(); // ram dom n phần tử (mỗi pt chỉ vị trí thứ i trong row để test, phần còn lại để trainning) List <Row> test = new List <Row>(); for (int i = 0; i < n; i++) { int j = ran.Next(0, inputRows.Count); // add row được random vào test, remove row đó khỏi trainning test.Add(inputRows.ElementAt(j)); inputRows.RemoveAt(j); } // trainning foreach (var item in inputRows) { classifier.Train(item["class"], item["text"]); } // load Test-data listTest = test.Select(row => row["text"]).ToList(); // load Test-result correct_result = test.Select(row => Int32.Parse(row["class"])).ToList(); } else if (type == 2) { // trainning data var dataTable = DataAccess.DataTable.New.ReadCsv("dir_pre\\trainning_dataCSV.csv"); foreach (var item in dataTable.Rows) { classifier.Train(item["class"], item["text"]); } dataTable = DataAccess.DataTable.New.ReadCsv("dir_pre\\test_dataCSV.csv"); // load Test-data listTest = dataTable.Rows.Select(row => row["text"]).ToList(); // load Test-result correct_result = dataTable.Rows.Select(row => Int32.Parse(row["class"])).ToList(); } List <int> result = new List <int>(); foreach (string text in listTest) { string rs = classifier.Classify(text); result.Add(Int32.Parse(rs)); streamWriter.WriteLine(rs + "," + text); } int n_class = program._predictionDictionary.Count; int total_count_correct = 0; // tổng số vb phân đúng double[] PiArr = new double[n_class]; double[] RiArr = new double[n_class]; for (int i = 0; i < n_class; i++) { int count_correct = 0; // slvb được phân đúng vào ci for (int j = 0; j < correct_result.Count; j++) { if (correct_result.ElementAt(j) == i && result.ElementAt(j) == i) { count_correct++; } } total_count_correct += count_correct; int count_machine = 0; // slvb được hệ thống phân vào lớp ci int count_doc = 0; // slvb thuộc lớp ci ban đầu count_machine = result.Count(item => item == i); // đếm sl pt = ci count_doc = correct_result.Count(item => item == i); // đếm sl pt = ci PiArr[i] = 1.0 * count_correct / count_machine; RiArr[i] = 1.0 * count_correct / count_doc; } double Pmacro = PiArr.Average(); double Rmacro = RiArr.Average(); double Fmacro = (2 * Pmacro * Rmacro) / (Pmacro + Rmacro); double Fmicro = 1.0 * total_count_correct / correct_result.Count; for (int i = 0; i < n_class; i++) { streamWriter.WriteLine("P" + i + "=" + "P" + program._predictionDictionary[i] + "=" + PiArr[i]); streamWriter.WriteLine("R" + i + "=" + "R" + program._predictionDictionary[i] + "=" + RiArr[i]); } streamWriter.WriteLine("Fmacro=" + Fmacro); streamWriter.WriteLine("Fmicro=" + Fmicro); //streamWriter.WriteLine(Pmacro + ", " + Rmacro + ", " + Fmacro + ", " + Fmicro); streamWriter.Close(); fs.Close(); }