static void Main(string[] args) { // reading the data const string dataFilePath = @"E:\training data\testdata.manual.2009.06.14.csv"; var dataTable = DataTable.New.ReadCsv(dataFilePath); var dataTablePhones = dataTable.Rows.Select(row => row).ToList(); //.Where(row => row["Tweet"].ToString().Contains("Phone")).ToList(); List<string> x = dataTablePhones.Select(row => row["Tweet"]).ToList(); double[] y = dataTablePhones.Select(row => double.Parse(row["Sentiment"])).ToArray(); // creating vocabulary var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); // generating the problem var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); //creating and training SVM model const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // prediction string userInput; var _predictionDictionary = new Dictionary<int, string> { { 0, "Bad" }, { 4, "Good" }, {2, "Neutral"} }; do { userInput = Console.ReadLine(); var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary); var predictedY = model.Predict(newX); Console.WriteLine("{0}", _predictionDictionary[(int)predictedY]); Console.WriteLine(new string('=', 50)); } while (userInput != "quit"); }
public static void ClassifyBySVM(string trainFile, string testFile, string testTarget) { string testResultFile = "../../svm/testResult.txt"; var watch = System.Diagnostics.Stopwatch.StartNew(); //STEP 1 : READ DATA List <Vector> vectorsTrain = new List <Vector>(); var content = FileIO.ReadFileIntoVector(trainFile, out vectorsTrain, true); var typeClass = Vector.GetDistinctClassTypes(vectorsTrain); //Get content of document and lable of document double[] label = GetLableOfDocument(vectorsTrain); //Get features list var features = content.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); //STEP 2: Generate a problem var problem = TextClassificationProblemBuilder.CreateProblem(content, label, features.ToList()); //STEP 3: Create and train a SVM model const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); //STEP 4: Predict List <string> test = FileIO.ReadFile(testFile); List <string> resultList = new List <string>(); _predictionDictionary = new Dictionary <int, string>(); List <Vector> targetVector = new List <Vector>(); FileIO.ReadFileIntoVector(testTarget, out targetVector, true); for (int l = 0; l < typeClass.Count(); l++) { _predictionDictionary.Add(l, typeClass.ElementAt(l)); } for (int i = 0; i < test.Count(); i++) { var newX = TextClassificationProblemBuilder.CreateNode(test[i], features); var predictedY = model.Predict(newX); var result = _predictionDictionary[(int)predictedY]; resultList.Add(result + " - " + test[i]); } FileIO.WriteFile(resultList, testResultFile); List <Vector> sourceVector = new List <Vector>(); FileIO.ReadFileIntoVector(testResultFile, out sourceVector, true); double score = 0; for (int i = 0; i < typeClass.Count(); i++) { score = 1.0 * Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector) / Vector.CountClassElements(typeClass.ElementAt(i), targetVector); Console.WriteLine("correct label: " + Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector)); Console.WriteLine("total label: " + Vector.CountClassElements(typeClass.ElementAt(i), targetVector)); Console.WriteLine("SVM score: " + score); } Console.WriteLine("The time for SVM: {0} ", watch.ElapsedMilliseconds); }