Пример #1
0
        static void Main(string[] args)
        {
            List <Dictionary <string, double> > docWordDicList = new List <Dictionary <string, double> >();
            Dictionary <string, int>            dictionary     = new Dictionary <string, int>();
            List <int> trainingAnswer = new List <int>();
            Dictionary <string, double> wordIDFDictionary = new Dictionary <string, double>();
            Hashtable     stopWordTable    = genStopwordTable(@"D:\work\KPMG\learning\project1\stopword.txt");
            List <string> testFileNameList = new List <string>();
            int           dicSize          = 5000;

            trainModel(@"D:\work\KPMG\learning\classification\project1_0422\test_data\1\Training",
                       @"D:\work\KPMG\learning\classification\project1_0422\log",
                       ref docWordDicList,
                       ref dictionary,
                       dicSize,
                       ref trainingAnswer,
                       ref wordIDFDictionary,
                       stopWordTable
                       );
            KNN knn = new KNN();

            knn.set(dicSize, docWordDicList.Count());
            knn.initial(docWordDicList, dictionary, trainingAnswer);
            knn.train(3, 20);
            knn.getAveDistance();

            //knn.genLog(@"D:\work\KPMG\learning\classification\project1_0422\log");
            List <KeyValuePair <int, int> > testAnswer = runKnnTest(knn, @"D:\work\KPMG\learning\classification\project1_0422\test_data\1\Testing", @"D:\work\KPMG\learning\classification\project1_0422\test_data\log", dictionary, wordIDFDictionary, stopWordTable, ref testFileNameList);

            genStatistic(testAnswer, testFileNameList, @"D:\work\KPMG\learning\classification\project1_0422\log");
        }
Пример #2
0
        static void Main(string[] args)
        {
            List <Dictionary <string, double> > docWordDicList = new List <Dictionary <string, double> >();
            Dictionary <string, int>            dictionary     = new Dictionary <string, int>();
            List <int> trainingAnswer = new List <int>();
            Dictionary <string, double> wordIDFDictionary = new Dictionary <string, double>();
            Hashtable     stopWordTable    = genStopwordTable(STOP_WORD_PATH);
            List <string> testFileNameList = new List <string>();
            int           dicSize          = 100;

            Console.WriteLine("==> Starting prepare data...");
            NLPAdapter nlpAdapter = new NLPAdapter(NLP_MODEL_PATH);

            trainModel(TRAINING_DATA_DIR,
                       LOG_DIR,
                       ref docWordDicList,
                       ref dictionary,
                       dicSize,
                       ref trainingAnswer,
                       ref wordIDFDictionary,
                       stopWordTable,
                       nlpAdapter
                       );
#if KNN_MODE
            KNN knn = new KNN();
            knn.set(dicSize, docWordDicList.Count());
            knn.initial(docWordDicList, dictionary, trainingAnswer);
            knn.train(3, 20);
            knn.getAveDistance();

            //knn.genLog(@"D:\work\KPMG\learning\classification\project1_0422\log");
            List <KeyValuePair <int, int> > testAnswer = runKnnTest(knn, TEST_DATA_DIR, TEST_LOG_DIR, dictionary, wordIDFDictionary, stopWordTable, ref testFileNameList, nlpAdapter);
#else
            Console.WriteLine("==> Starting get model...");
            SVMAdapter svmAdapter = new SVMAdapter();
            svm_model  model      = svmAdapter.getSVMModel(docWordDicList, dictionary, trainingAnswer, SVMAdapter.SVM_C_DEFAULT, SVMAdapter.SVM_GAMMA_DEFAULT);

            Console.WriteLine("==> Starting SVM test...");
            List <KeyValuePair <int, int> > testAnswer = runSVMTest(svmAdapter, TEST_DATA_DIR, TEST_LOG_DIR, dictionary, wordIDFDictionary, stopWordTable, ref testFileNameList, model, nlpAdapter);
            Console.WriteLine("==> Starting SVM test done!!");
#endif
            Console.WriteLine("==> Starting saving result...");
            genStatistic(testAnswer, testFileNameList, LOG_DIR);
        }
Пример #3
0
        private static List <KeyValuePair <int, int> > runKnnTest(KNN knn, string testPath, string logPath, Dictionary <string, int> dictionary, Dictionary <string, double> wordIDFDictionary, Hashtable stopWordTable, ref List <string> testFileNameList)
        {
            string[] categories = Directory.GetDirectories(testPath);
            List <KeyValuePair <int, int> > testAnswer = new List <KeyValuePair <int, int> >();

            for (int i = 0; i < categories.Length; i++) //traverse Categories
            {
                Console.WriteLine(categories[i]);
                string[] files = Directory.GetFiles(categories[i]);
                for (int j = 0; j < files.Length; j++)
                {
                    int testResult = -1;
                    testFileNameList.Add(Path.GetFileName(files[j]));
                    testResult = knn.test(readDoc(files[j], stopWordTable), dictionary, wordIDFDictionary);
                    testAnswer.Add(new KeyValuePair <int, int>(testResult, i));
                    Console.WriteLine(testResult + "," + i);
                }
            }
            return(testAnswer);
        }