Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            // reading the data
            const string dataFilePath = @"E:\training data\testdata.manual.2009.06.14.csv";
            var dataTable = DataTable.New.ReadCsv(dataFilePath);
            var dataTablePhones = dataTable.Rows.Select(row => row).ToList(); //.Where(row => row["Tweet"].ToString().Contains("Phone")).ToList();

            List<string> x = dataTablePhones.Select(row => row["Tweet"]).ToList();
            double[] y = dataTablePhones.Select(row => double.Parse(row["Sentiment"])).ToArray();

            // creating vocabulary
            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            // generating the problem
            var problemBuilder = new  TextClassificationProblemBuilder();
            var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            //creating and training SVM model
            const int C = 1;
            var model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            // prediction
            string userInput;
            var _predictionDictionary = new Dictionary<int, string> { { 0, "Bad" }, { 4, "Good" }, {2, "Neutral"} };
            do
            {
                userInput = Console.ReadLine();
                var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary);

                var predictedY = model.Predict(newX);
                Console.WriteLine("{0}", _predictionDictionary[(int)predictedY]);
                Console.WriteLine(new string('=', 50));
            } while (userInput != "quit");

        }
Ejemplo n.º 2
0
        public static void ClassifyBySVM(string trainFile, string testFile, string testTarget)
        {
            string testResultFile = "../../svm/testResult.txt";

            var watch = System.Diagnostics.Stopwatch.StartNew();
            //STEP 1 : READ DATA
            List <Vector> vectorsTrain = new List <Vector>();
            var           content      = FileIO.ReadFileIntoVector(trainFile, out vectorsTrain, true);
            var           typeClass    = Vector.GetDistinctClassTypes(vectorsTrain);

            //Get content of document and lable of document
            double[] label = GetLableOfDocument(vectorsTrain);

            //Get features list
            var features = content.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            //STEP 2: Generate a problem
            var problem = TextClassificationProblemBuilder.CreateProblem(content, label, features.ToList());

            //STEP 3: Create and train a SVM model
            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            //STEP 4: Predict
            List <string> test       = FileIO.ReadFile(testFile);
            List <string> resultList = new List <string>();

            _predictionDictionary = new Dictionary <int, string>();
            List <Vector> targetVector = new List <Vector>();

            FileIO.ReadFileIntoVector(testTarget, out targetVector, true);

            for (int l = 0; l < typeClass.Count(); l++)
            {
                _predictionDictionary.Add(l, typeClass.ElementAt(l));
            }

            for (int i = 0; i < test.Count(); i++)
            {
                var newX       = TextClassificationProblemBuilder.CreateNode(test[i], features);
                var predictedY = model.Predict(newX);
                var result     = _predictionDictionary[(int)predictedY];
                resultList.Add(result + " - " + test[i]);
            }
            FileIO.WriteFile(resultList, testResultFile);

            List <Vector> sourceVector = new List <Vector>();

            FileIO.ReadFileIntoVector(testResultFile, out sourceVector, true);

            double score = 0;

            for (int i = 0; i < typeClass.Count(); i++)
            {
                score = 1.0 * Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector) / Vector.CountClassElements(typeClass.ElementAt(i), targetVector);
                Console.WriteLine("correct label: " + Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector));
                Console.WriteLine("total label: " + Vector.CountClassElements(typeClass.ElementAt(i), targetVector));
                Console.WriteLine("SVM score: " + score);
            }

            Console.WriteLine("The time for SVM: {0} ", watch.ElapsedMilliseconds);
        }