コード例 #1
0
        private void btn_predict_Click(object sender, EventArgs e)
        {
            if (txt_text.Text != "")
            {
                List <string> CMD = new List <string>();
                string        s1  = "=== ===Step 0: Văn bản gốc === ===";
                CMD.Add(s1);
                CMD.Add(" ");

                txt_out.Text = "Predicting... ";
                int k = 0;
                Int32.TryParse(txt_k.Text, out k);
                string txt = txt_text.Text;
                txt_text.Text = "Predicting... ";
                CMD.Add(txt);
                string s2 = "=== ===Step 1: Xử lý VnTokenizer === ===";
                CMD.Add(" "); CMD.Add(" ");
                CMD.Add(s2);
                CMD.Add(" ");

                File.WriteAllText(".\\temp.txt", txt, Encoding.UTF8);
                createBAT("..\\temp.txt", "..\\VnT.txt");
                vntokenizer();
                string   stopword = @".\\data\vnstopword.txt";
                string[] StopWord = File.ReadAllLines(stopword);
                string   output   = "";
                string[] text     = File.ReadAllLines(".\\VnT.txt");
                foreach (string textline in text)
                {
                    CMD.Add(textline);
                    string line = textline.ToLower(); // 1. To Lower
                    output = removeStopWord(line, output, StopWord);
                }

                string s3 = "=== ===Step 2: Xóa Stop Word === ===";
                CMD.Add(" "); CMD.Add(" ");
                CMD.Add(s3);
                CMD.Add(" ");

                List <string> words = new List <string>();
                string        word  = "";
                // find out dictionary and word each file
                for (int i = 0; i < output.Length; i++)
                {
                    if (output[i] != ' ')
                    {
                        word += output[i];
                    }
                    else
                    {
                        words.Add(word);
                        word = "";
                    }
                }

                string vb_word = "";
                string vb_code = "";

                if (rb_knn.Checked)
                {
                    List <int> data       = new List <int>();
                    string[]   dictionary = File.ReadAllLines(".\\Dictionary\\Bows.knn.dic");
                    // create a list same size with dictionary
                    for (int i = 0; i < dictionary.Length; i++)
                    {
                        data.Add(0);
                    }
                    // each file, read all word and check in dictionary

                    foreach (string line in words)
                    {
                        vb_word += line + " ";
                        for (int i = 0; i < dictionary.Length; i++)
                        {
                            if (dictionary.ElementAt(i).CompareTo(line) == 0)
                            {
                                data[i] += 1;
                            }
                        }
                    }

                    List <double> Classify = new List <double>();
                    foreach (int i in data)
                    {
                        Classify.Add((double)i * 1.0);
                        vb_code += i.ToString() + " ";
                    }
                    if (k % 2 == 0)
                    {
                        k--;
                    }
                    Knn    kNN_predict = Knn.initialiseKNN(k, ".\\Dictionary\\Dataset.knn.train", true);
                    string s           = kNN_predict.Classify(Classify);
                    int    predict     = Int32.Parse(s);
                    txt_out.Text = "Predict class is: " + get_Name_Class(predict);
                }
                else
                {
                    List <int> data       = new List <int>();
                    string[]   dictionary = File.ReadAllLines(".\\Dictionary\\Bows.svm.dic");
                    // create a list same size with dictionary
                    for (int i = 0; i < dictionary.Length; i++)
                    {
                        data.Add(0);
                    }
                    // each file, read all word and check in dictionary

                    foreach (string line in words)
                    {
                        vb_word += line + " ";
                        for (int i = 0; i < dictionary.Length; i++)
                        {
                            if (dictionary.ElementAt(i).CompareTo(line) == 0)
                            {
                                data[i] += 1;
                            }
                        }
                    }

                    string Classifi = "0";
                    int    index    = 0;
                    foreach (int i in data)
                    {
                        Classifi += " " + index.ToString() + ":" + i.ToString();
                        index++;
                        vb_code += i.ToString() + " ";
                    }
                    File.WriteAllText(".\\temp.txt", Classifi, Encoding.ASCII);
                    string wri = "cd svm \n svm_predict.exe ..\\temp.txt Model Output.txt";
                    File.WriteAllText(".\\svm_test.bat", wri, Encoding.ASCII);
                    Process proc = null;
                    try
                    {
                        string s      = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);
                        string batDir = string.Format(s);
                        proc = new Process();
                        proc.StartInfo.WorkingDirectory = batDir;
                        proc.StartInfo.FileName         = "svm_test.bat";
                        proc.StartInfo.CreateNoWindow   = false;
                        proc.Start();
                        proc.WaitForExit();
                        proc.Close();
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.StackTrace.ToString());
                    }

                    string[] result = File.ReadAllLines(".\\svm\\Output.txt");

                    int predict = Int32.Parse(result[0]);
                    txt_out.Text = "Predict class is: " + get_Name_Class(predict);
                }

                CMD.Add(vb_word);
                string s4 = "=== ===Step 4: Mã hóa bằng từ điển BOWS === ===";
                CMD.Add(" "); CMD.Add(" ");
                CMD.Add(s4);
                CMD.Add(" ");
                CMD.Add(vb_code);

                txt_text.Text = String.Join(Environment.NewLine, CMD);
            }
            else
            {
                MessageBox.Show("Text box dose not emtry !");
            }
        }
コード例 #2
0
        private void btn_knn_Click(object sender, EventArgs e)
        {
            Train         = false;
            txt_text.Text = "Testing is running...";
            // Before
            if (!File.Exists(".\\Dictionary\\Dataset.knn.test"))
            {
                SubFolders = Directory.GetDirectories(txt_folderPath.Text);
                btn_vntokenizer_Click(sender, e);
                Doc2Words();
                convertDataset();
            }
            // Reading model file
            int k = 0;

            Int32.TryParse(txt_k.Text, out k);
            if (k % 2 == 0)
            {
                k++;
            }
            Knn kNN_predict = Knn.initialiseKNN(k, ".\\Dictionary\\Dataset.knn.train", true);

            // Reading test file
            string[]      tests    = File.ReadAllLines(".\\Dictionary\\Dataset.knn.test");
            List <string> Result   = new List <string>();
            List <string> Label    = new List <string>();
            List <double> Classify = new List <double>();

            foreach (string test in tests)
            {
                string s = "";
                Classify.Clear();
                for (int i = 0; i < test.Length; i++)
                {
                    if (test[i] != ',')
                    {
                        s += test[i];
                    }
                    else
                    {
                        double value = 0.0;
                        Double.TryParse(s, out value);
                        Classify.Add(value);
                        s = "";
                    }
                }
                Label.Add(s);
                string result = kNN_predict.Classify(Classify);
                Result.Add(result);
            }

            List <string> CMD = new List <string>();
            string        cmd = "";
            List <int>    T   = new List <int>();
            List <int>    F   = new List <int>();

            for (int i = 0; i < 16; i++)
            {
                T.Add(0);
                F.Add(0);
            }
            for (int i = 0; i < Label.Count; i++)
            {
                if (Label.ElementAt(i) == Result.ElementAt(i))
                {
                    Int32.TryParse(Label.ElementAt(i), out k);
                    T[k]++;
                }
                else
                {
                    Int32.TryParse(Label.ElementAt(i), out k);
                    F[k]++;
                }
            }

            for (int i = 1; i < 16; i++)
            {
                if (F[i] + T[i] != 0)
                {
                    cmd  = "Class: " + get_Name_Class(i) + " :: T = " + T[i].ToString() + " <> F = " + F[i].ToString() + "      ";
                    cmd += "Acc: " + ((double)T[i] * 100 / (F[i] + T[i])).ToString() + " %";
                    CMD.Add(cmd);
                }
            }
            cmd = "Test Accuracy: " + ((double)T.Sum() * 100 / (T.Sum() + F.Sum())).ToString() + " %";
            CMD.Add(cmd);
            txt_text.Text = String.Join(Environment.NewLine, CMD);


            // After
            Train = true;
            MessageBox.Show("Done !");
        }