예제 #1
0
        public static void Main(string[] args)
        {
            Console.WriteLine("\nBegin Naive Bayes classification demo");
            Console.WriteLine("Goal is to predict (liberal/conservative) from job, " + "sex and income\n");
            string[][] rawData = new string[30][];
            rawData[0]  = new string[] { "analyst", "male", "high", "conservative" };
            rawData[1]  = new string[] { "barista", "female", "low", "liberal" };
            rawData[2]  = new string[] { "cook", "male", "medium", "conservative" };
            rawData[3]  = new string[] { "doctor", "female", "medium", "conservative" };
            rawData[4]  = new string[] { "analyst", "female", "low", "liberal" };
            rawData[5]  = new string[] { "doctor", "male", "medium", "conservative" };
            rawData[6]  = new string[] { "analyst", "male", "medium", "conservative" };
            rawData[7]  = new string[] { "cook", "female", "low", "liberal" };
            rawData[8]  = new string[] { "doctor", "female", "medium", "liberal" };
            rawData[9]  = new string[] { "cook", "female", "low", "liberal" };
            rawData[10] = new string[] { "doctor", "male", "medium", "conservative" };
            rawData[11] = new string[] { "cook", "female", "high", "liberal" };
            rawData[12] = new string[] { "barista", "female", "medium", "liberal" };
            rawData[13] = new string[] { "analyst", "male", "low", "liberal" };
            rawData[14] = new string[] { "doctor", "female", "high", "conservative" };
            rawData[15] = new string[] { "barista", "female", "medium", "conservative" };
            rawData[16] = new string[] { "doctor", "male", "medium", "conservative" };
            rawData[17] = new string[] { "barista", "male", "high", "conservative" };
            rawData[18] = new string[] { "doctor", "female", "medium", "liberal" };
            rawData[19] = new string[] { "analyst", "male", "low", "liberal" };
            rawData[20] = new string[] { "doctor", "male", "medium", "conservative" };
            rawData[21] = new string[] { "cook", "male", "medium", "conservative" };
            rawData[22] = new string[] { "doctor", "female", "high", "conservative" };
            rawData[23] = new string[] { "analyst", "male", "high", "conservative" };
            rawData[24] = new string[] { "barista", "female", "medium", "liberal" };
            rawData[25] = new string[] { "doctor", "male", "medium", "conservative" };
            rawData[26] = new string[] { "analyst", "female", "medium", "conservative" };
            rawData[27] = new string[] { "analyst", "male", "medium", "conservative" };
            rawData[28] = new string[] { "doctor", "female", "medium", "liberal" };
            rawData[29] = new string[] { "barista", "male", "medium", "conservative" };
            Console.WriteLine("The raw data is: \n");
            ShowData(rawData, 5, true);
            Console.WriteLine("Splitting data into 80%-20% train and test sets");
            string[][] trainData; string[][] testData;
            MakeTrainTest(rawData, 15, out trainData, out testData);
            Console.WriteLine("Done \n");
            Console.WriteLine("Training data: \n");
            ShowData(trainData, 5, true);
            Console.WriteLine("Test data: \n");
            ShowData(testData, 5, true);
            Console.WriteLine("Creating Naive Bayes classifier object");
            Console.WriteLine("Training classifier using training data");
            BayesClassifier bc = new BayesClassifier();

            bc.Train(trainData);
            Console.WriteLine("Done \n");
            double trainAccuracy = bc.Accuracy(trainData);

            Console.WriteLine("Accuracy of model on train data = " + trainAccuracy.ToString("F4"));
            double testAccuracy = bc.Accuracy(testData);

            Console.WriteLine("Accuracy of model on test data = " + testAccuracy.ToString("F4"));
            Console.WriteLine("\nPredicting politics for job = barista, sex = female, " + "income = medium \n");
            string[] features = new string[] { "barista", "female", "medium" };
            string   liberal  = "liberal";
            double   pLiberal = bc.Probability(liberal, features);

            Console.WriteLine("Probability of liberal = " + pLiberal.ToString("F4"));
            string conservative  = "conservative";
            double pConservative = bc.Probability(conservative, features);

            Console.WriteLine("Probability of conservative = " + pConservative.ToString("F4"));
            Console.WriteLine("\nEnd Naive Bayes classification demo\n");
            Console.ReadLine();
            // TODO: Implement Functionality Here
        }
예제 #2
0
        static void Main(string[] args)
        {
            Program       program        = new Program();
            FileStream    fs             = new FileStream("dir_result\\result.txt", FileMode.Create);
            StreamWriter  streamWriter   = new StreamWriter(fs);
            int           k              = 10;
            int           type           = 2;
            List <string> listTest       = new List <string>();
            List <int>    correct_result = new List <int>();

            program.GetDictionary("dir_pre\\dictionary_dataCSV.csv");
            StreamReader streamReader = new StreamReader("config.txt");
            string       line         = streamReader.ReadLine();

            streamReader.Close();
            if (Int32.Parse(line.Split(' ')[0]) == 1)
            {
                type = 1;
                k    = Int32.Parse(line.Split(' ')[1]);
            }

            // classifier được khởi tạo với tham số là mảng string (tên lớp) => key_class
            string[] arrClass = new string[program._predictionDictionary.Count];
            foreach (var key in program._predictionDictionary.Keys)
            {
                arrClass[key] = key.ToString();
            }
            var classifier = new BayesClassifier(arrClass);


            if (type == 1) // cross-validation
            {
                var    inputRows = DataAccess.DataTable.New.ReadCsv("dir_pre\\dataCSV.csv").Rows.ToList();
                int    size      = inputRows.Count;
                int    n         = size / k;
                Random ran       = new Random();
                // ram dom n phần tử (mỗi pt chỉ vị trí thứ i trong row để test, phần còn lại để trainning)
                List <Row> test = new List <Row>();
                for (int i = 0; i < n; i++)
                {
                    int j = ran.Next(0, inputRows.Count);
                    // add row được random vào test, remove row đó khỏi trainning
                    test.Add(inputRows.ElementAt(j));
                    inputRows.RemoveAt(j);
                }

                // trainning
                foreach (var item in inputRows)
                {
                    classifier.Train(item["class"], item["text"]);
                }

                // load Test-data
                listTest = test.Select(row => row["text"]).ToList();
                // load Test-result
                correct_result = test.Select(row => Int32.Parse(row["class"])).ToList();
            }
            else if (type == 2)
            {
                // trainning data
                var dataTable = DataAccess.DataTable.New.ReadCsv("dir_pre\\trainning_dataCSV.csv");
                foreach (var item in dataTable.Rows)
                {
                    classifier.Train(item["class"], item["text"]);
                }
                dataTable = DataAccess.DataTable.New.ReadCsv("dir_pre\\test_dataCSV.csv");
                // load Test-data
                listTest = dataTable.Rows.Select(row => row["text"]).ToList();
                // load Test-result
                correct_result = dataTable.Rows.Select(row => Int32.Parse(row["class"])).ToList();
            }

            List <int> result = new List <int>();

            foreach (string text in listTest)
            {
                string rs = classifier.Classify(text);
                result.Add(Int32.Parse(rs));
                streamWriter.WriteLine(rs + "," + text);
            }

            int n_class             = program._predictionDictionary.Count;
            int total_count_correct = 0; // tổng số vb phân đúng

            double[] PiArr = new double[n_class];
            double[] RiArr = new double[n_class];

            for (int i = 0; i < n_class; i++)
            {
                int count_correct = 0; // slvb được phân đúng vào ci
                for (int j = 0; j < correct_result.Count; j++)
                {
                    if (correct_result.ElementAt(j) == i && result.ElementAt(j) == i)
                    {
                        count_correct++;
                    }
                }
                total_count_correct += count_correct;
                int count_machine = 0;                                   // slvb được hệ thống phân vào lớp ci
                int count_doc     = 0;                                   // slvb thuộc lớp ci ban đầu
                count_machine = result.Count(item => item == i);         // đếm sl pt = ci
                count_doc     = correct_result.Count(item => item == i); // đếm sl pt = ci
                PiArr[i]      = 1.0 * count_correct / count_machine;
                RiArr[i]      = 1.0 * count_correct / count_doc;
            }
            double Pmacro = PiArr.Average();
            double Rmacro = RiArr.Average();
            double Fmacro = (2 * Pmacro * Rmacro) / (Pmacro + Rmacro);
            double Fmicro = 1.0 * total_count_correct / correct_result.Count;

            for (int i = 0; i < n_class; i++)
            {
                streamWriter.WriteLine("P" + i + "=" + "P" + program._predictionDictionary[i] + "=" + PiArr[i]);
                streamWriter.WriteLine("R" + i + "=" + "R" + program._predictionDictionary[i] + "=" + RiArr[i]);
            }
            streamWriter.WriteLine("Fmacro=" + Fmacro);
            streamWriter.WriteLine("Fmicro=" + Fmicro);
            //streamWriter.WriteLine(Pmacro + ", " + Rmacro + ", " + Fmacro + ", " + Fmicro);
            streamWriter.Close();
            fs.Close();
        }