private static void TrainingRecordData() { var mongoClient = new MongoClient("mongodb://*****:*****@"[^a-zA-Z]", " ").Trim().ToLowerInvariant().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Where(d => d.Count() < 20).ToArray(); var trainingDataCollection = database.GetCollection <ProcessInfoLabeledItem>("training_data"); var records = trainingDataCollection.Find(Builders <ProcessInfoLabeledItem> .Filter.Empty).ToList(); var vocabulary = records.Select(c => c.Title + " " + c.Process).SelectMany(filter).Distinct().OrderBy(str => str).ToList(); List <string> x = records.Select(item => item.Title + " " + item.Process).ToList(); double[] y = records.Select(item => (double)item.Category).ToArray(); var problemBuilder = new TextClassificationProblemBuilder(); problemBuilder.RefineText = filter; var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var _predictionDictionary = new Dictionary <Karma, string> { { Karma.Bad, "Bad" }, { Karma.Good, "Good" }, { Karma.Neutral, "Neutral" } }; var newXs = database.GetCollection <AppUsageRecord>("daily_records").Find(Builders <AppUsageRecord> .Filter.Eq(f => f.Id, AppUsageRecord.GetGeneratedId(DateTime.Now))).FirstOrDefault().ActiveApps.Select(c => c.Value).Select(c => c.MainWindowTitle + " " + c.ProcessName); foreach (var _x in newXs) { var newX = TextClassificationProblemBuilder.CreateNode(_x, vocabulary, problemBuilder.RefineText); var predictedY = model.Predict(newX); Console.WriteLine($"For title {_x}"); Console.WriteLine($"The prediction is {_predictionDictionary[(Karma)predictedY]}"); } }
public C_SVC getmodel() { List <string> x = new List <string>(); List <double> yb = new List <double>(); foreach (var obj in _context.PlainTickets) { double val = -1; x.Add(_context.Countries.Where(ct => ct.Key == _context.Targets. Where(t => t.Key == obj.Target).FirstOrDefault().CountryName). FirstOrDefault().CountryName); if (obj.IsSold) { val = 1; } yb.Add(val); } double[] y = yb.ToArray(); this.vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); return(model); }
static void Main2(string path) { // STEP 4: Read the data const string dataFilePath = @"D:\texto.csv"; var dataTable = DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSunny"])) .ToArray(); string texto = File.ReadAllText(path + @"/datoscsv.csv", Encoding.Default); List <string> x2 = new List <string>(); double[] y2 = null; arreglar_dato(texto, ref x2, ref y2); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // If you want you can save this problem with : // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem); // And then load it again using: // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var accuracy = model.GetCrossValidationAccuracy(10); // Console.Clear(); // Console.WriteLine(new string('=', 50)); // Console.WriteLine("Accuracy of the model is {0:P}", accuracy); // model.Export(string.Format(@"D:\MACHINE_LEARNING\SVM\Tutorial\model_{0}_accuracy.model", accuracy)); // Console.WriteLine(new string('=', 50)); // Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: sunny rainy sunny)"); // Console.WriteLine(new string('=', 50)); string userInput; _predictionDictionary = new Dictionary <int, string> { { -1, "Rainy" }, { 1, "Sunny" } }; userInput = "caries"; var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary); var predictedY = model.Predict(newX); // Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]); // Console.WriteLine(new string('=', 50)); Console.WriteLine(""); }
private void TrainingData() { string dateFilePath = Path.Combine(Directory.GetCurrentDirectory(), $"sunnyData.csv"); var dataTable = DataTable.New.ReadCsv(dateFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSunny"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(w => w).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); string userInput; var _predictionDictionary = new Dictionary <int, string> { { -1, "Rainy" }, { 1, "Sunny" } }; do { userInput = Console.ReadLine(); var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary); var predictedY = model.Predict(newX); Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]); Console.WriteLine(new string('=', 50)); } while (userInput != "quit"); Console.WriteLine(""); }
private SvmMethod() { var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Files/SentimentAnalysisDataset.csv"); List <string> x = new List <string>(); List <double> y = new List <double>(); if (File.Exists(path)) { var lines = File.ReadAllLines(path); for (int i = 0; i < 500; i++)//5146 { var lineArr = lines[i].Split(new string[] { ",Sentiment140,", ",Kaggle," }, StringSplitOptions.None); y.Add(double.Parse(lineArr[0].Split(',')[1])); x.Add(lineArr[1].Trim()); } } //var dataTable = DataTable.New.ReadCsv(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Files/spamdata.csv")); //List<string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); //double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSpam"])).ToArray(); vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y.ToArray(), vocabulary.ToList()); const int C = 1; model = new C_SVC(problem, KernelHelper.LinearKernel(), C); _predictionDictionary = new Dictionary <int, string> { { 0, "negative" }, { 1, "positive" } }; }
public static void Main() { // STEP 4: Read the data string dataFilePath = HttpContext.Current.Server.MapPath("~/DAL/svm/"); var dataTable = DataTable.New.ReadCsv(dataFilePath + "Data.csv"); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["Category"])) .ToArray(); vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // If you want you can save this problem with : // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem); // And then load it again using: // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); const int C = 1; model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var accuracy = model.GetCrossValidationAccuracy(10); Console.Clear(); Console.WriteLine(new string('=', 50)); Console.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(dataFilePath + "model_{0}_accuracy.model", accuracy)); Console.WriteLine(new string('=', 50)); Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: sunny rainy sunny)"); Console.WriteLine(new string('=', 50)); }
public IActionResult RecommendedPlaces() { // Load the predifined data for smv algorithm var dataFilePath = "./wwwroot/svm/words.csv"; var dataTable = DataTable.New.ReadCsv(dataFilePath); var data = dataTable.Rows.Select(row => row["Text"]).ToList(); // Load classes (-1 or +1) var classes = dataTable.Rows.Select(row => double.Parse(row["IsRecommended"])) .ToArray(); // Get words var vocabulary = data.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); // Generate a svm problem var problem = CreateProblem(data, classes, vocabulary.ToList()); // Create and train a smv model const int C = 1; var model = new libsvm.C_SVC(problem, KernelHelper.LinearKernel(), C); var _predictionDictionary = new Dictionary <int, string> { { -1, "NotRecommended" }, { 1, "Recommended" } }; // Get all reviews var reviews = _context.Review.ToList(); // Get recommended reviews foreach (var review in reviews) { if (review.Content != null) { var node = CreateNode(review.Content, vocabulary); var prediction = model.Predict(node); review.IsRecommended = _predictionDictionary[(int)prediction] == "Recommended"; } else { review.IsRecommended = false; } } var recommendedReviews = reviews.Where(p => p.IsRecommended == true); foreach (var review in recommendedReviews) { review.Place = _context.Place.First(c => c.ID == review.PlaceID); review.Comments = _context.Comment.Where(c => c.ReviewID == review.ID).ToList(); } return(View(recommendedReviews.OrderByDescending(p => p.PublishDate))); }
static void Main(string[] args) { // STEP 4: Read the data const string dataFilePath = @"spamdata.csv"; var dataTable = DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSpam"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // If you want you can save this problem with : // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem); // And then load it again using: // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var accuracy = model.GetCrossValidationAccuracy(10); Console.Clear(); Console.WriteLine(new string('=', 50)); Console.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(@"model_{0}_accuracy.model", accuracy)); Console.WriteLine(new string('=', 50)); Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: love hate dong)"); Console.WriteLine(new string('=', 50)); string userInput; //This just takes the predicted value (-1 to 3) and translates to your categorization response _predictionDictionary = new Dictionary <int, string> { { -2, "Angry" }, { -1, "Sad" }, { 0, "Normal" }, { 1, "Happy" }, { 2, "Love" } }; do { userInput = Console.ReadLine(); var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary); var predictedY = model.Predict(newX); Console.WriteLine("The prediction is {0} value is {1} ", _predictionDictionary[(int)predictedY], predictedY); Console.WriteLine(new string('=', 50)); } while (userInput != "quit"); Console.WriteLine(""); }
public static C_SVC CreateModel() { try { var prob = CreateProblem(X, Y, Vocabulary); const int C = 1; return(new C_SVC(prob, KernelHelper.LinearKernel(), C)); } catch (Exception ex) { return(null); } }
public void Create_Train_SVMmodel(string path_dataCSV_trainning, double C) { var dataTable = DataAccess.DataTable.New.ReadCsv(path_dataCSV_trainning); List <string> x = dataTable.Rows.Select(row => row["text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["class"])).ToArray(); vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y.ToArray(), vocabulary.ToList()); model = new C_SVC(problem, KernelHelper.LinearKernel(), C); }
public bool buildSVMCorpus(string filename) { string trainDataPath = filename + "TrainSVM.txt"; if (File.Exists(trainDataPath)) { _prob = ProblemHelper.ReadProblem(trainDataPath); _test = ProblemHelper.ScaleProblem(_prob); svm = new C_SVC(_test, KernelHelper.LinearKernel(), C); ProblemHelper.WriteProblem(filename + "output.txt", _test); fileExistance = true; } return(fileExistance); }
public static void Train() { DataHandler.ImportReviewData(3); var x = DataHandler.Reviews.Select(r => r.reviewText); double[] y = DataHandler.Reviews.Select(r => r.overall).ToArray(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, DataHandler.Vocabulary); const int C = 1; model = new C_SVC(problem, KernelHelper.LinearKernel(), C); }
public void Train() { SVMDataManager data = new SVMDataManager(); var problemBuilder = new SVMProblemBuilder(); var problem = problemBuilder.CreateProblem(data.RequestText, data.ClassValue, data.Vocabulary.ToList()); const double C = 0.5; C_SVC model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Train is called automatically here accuracy = model.GetCrossValidationAccuracy(100); model.Export(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"bin\model_{0}_accuracy.model", accuracy)))); System.IO.File.WriteAllLines(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"bin\model_{0}_vocabulary.txt", accuracy))), data.Vocabulary); }
private RestuarantRecomandationByNLP() { string dataFilePath = HttpContext.Current.Server.MapPath("~/App_Data/TrainingForIsPositiveAlgo.csv"); var dataTable = DataAccess.DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["IsPositive"])) .ToArray(); vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); const int C = 1; model = new C_SVC(problem, KernelHelper.LinearKernel(), C); _predictionDictionary = new Dictionary <int, string> { { -1, "Bad" }, { 1, "Good" } }; }
public bool buildSVMCorpus(string filename) { string trainDataPath = filename + "SimpleScaledTrainSVM.txt"; if (File.Exists(trainDataPath)) { _prob = ProblemHelper.ReadAndScaleProblem(trainDataPath); svm = new C_SVC(_prob, KernelHelper.LinearKernel(), C); fileExistance = true; var reader = new StreamReader(File.OpenRead(filename + "MinMax.txt")); string[] minMax = reader.ReadLine().Split(','); scale.min = Convert.ToDouble(minMax[0]); scale.max = Convert.ToDouble(minMax[1]); } return(fileExistance); }
public bool FindMoodMethod(string g) { string dataFilePath = Server.MapPath("~/MoodCsv/GenreList.txt"); var dataTable = DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Genre"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["Mood"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); string GenreId = g; Dictionary <int, string> _predictionDictionary = new Dictionary <int, string> { { -2, "Scared" }, { -1, "Sad" }, { 1, "Laugh" }, { 2, "Romance" } }; //maybe add do,while here //GenreId = movie.with_genres; var newX = TextClassificationProblemBuilder.CreateNode(GenreId, vocabulary); var predictedY = model.Predict(newX); if (predictedY == -2 || predictedY == -1 || predictedY == 1 || predictedY == 2) { return(true); } else { return(false); } // ViewBag.Mood = _predictionDictionary[-2]; }
public void C_SVC_should_always_return_the_same_cross_validation_accuracy_when_probability_is_false() { // Arrange var problem = CreateSimpleProblem(); var model = new C_SVC(problem, KernelHelper.LinearKernel(), 1); // Act var results = new double[10]; for (int i = 0; i < 10; i++) { results[i] = model.GetCrossValidationAccuracy(10); } //Assert for (int i = 1; i < 10; i++) { Assert.AreEqual(0.90909090909090906, results[i]); } }
public ActionResult FindMood(MovieList movie) { string dataFilePath = Server.MapPath("~/MoodCsv/GenreList.txt"); var dataTable = DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Genre"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["Mood"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); string GenreId = movie.with_genres; Dictionary <int, string> _predictionDictionary = new Dictionary <int, string> { { -2, "Scared" }, { -1, "Sad" }, { 1, "Laugh" }, { 2, "Romance" } }; //maybe add do,while here //GenreId = movie.with_genres; var newX = TextClassificationProblemBuilder.CreateNode(GenreId, vocabulary); var predictedY = model.Predict(newX); ViewBag.Mood = _predictionDictionary[(int)predictedY]; ViewBag.MovieTitle = movie.title; ViewBag.MoviePoster = movie.poster_path; return(View()); }
// Uczenie algorytmu public void Train() { // Pobieranie danych z zestawów do trenowanie algorytmu znajduje się w konstruktorze klasy SVMDataManager -> SVMDataManager data = new SVMDataManager(); // Tworzenie macierzy (wraz z wektorami) var problemBuilder = new SVMProblemBuilder(); var problem = problemBuilder.CreateMatrix(data.RequestText, data.ClassValue, data.Vocabulary.ToList()); // Parametrem C dokonywana jest optymalizacja marginesu. Oznacza on wartość straty/kary błędnej klasyfikacji. const double C = 0.5; C_SVC model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Dokładność liczona jest procentowo na bazie danych treningowych. // Po wyznaczeniu przez algorytm najlepszej dostępnej hiperpłaszczyzny oddzielającej cechy od siebie, // przez stworzony model przepuszczane są jeszcze raz dane treningowe i liczony jest odsetek błędnych klasyfikacji na tej podstawie. accuracy = model.GetCrossValidationAccuracy(100); // Export modelu oraz słownika model.Export(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"WAF\model_{0}_accuracy.model", accuracy)))); System.IO.File.WriteAllLines(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"WAF\model_{0}_vocabulary.txt", accuracy))), data.Vocabulary); }
public PartialViewResult GetSuggestion(string userName) { List <string> x = db.Purchases.OrderBy(p => p.Id).Select(p => p.User.Username).ToList(); double[] y = db.Purchases.OrderBy(p => p.Id).Select(p => (double)p.Product.Id).ToArray(); var users = db.Users.Select(s => s.Username).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, users.ToList()); const int C = 1; C_SVC model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var newX = TextClassificationProblemBuilder.CreateNode(userName, users); var predictedY = model.Predict(newX); var prediction = db.Products.Find((int)predictedY); ViewBag.Suggestion = prediction; return(PartialView("~/Views/Suggestion/Suggestion.cshtml")); }
/// <summary> /// Constructor that creates object with given training set and testing set /// </summary> /// <param name="trainingSet">Training set loaded from a file</param> /// <param name="testingSet">Testing set loaded from a file</param> public SVMClassifier(TrainingSet trainingSet, TestingSet testingSet) { this.trainingSet = trainingSet; vocabulary = new HashSet <string>(); x = new List <string>(); y = new List <double>(); foreach (Article article in trainingSet.articles.Values) //load data from the training set { string features = ArticleFeatures(article); //add features and special coverages to lists AddFeaturesToVocabulary(features); x.Add(features); y.Add(article.specialCoverage[0]); } foreach (Article article in testingSet.articles.Values) //load articles with given specialCoverage from the testing set { if (article.specialCoverage != null) { string features = ArticleFeatures(article); //add features and special coverages to lists AddFeaturesToVocabulary(features); x.Add(features); y.Add(article.specialCoverage[0]); } } //create new problem ProblemBuilder problemBuilder = new ProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y.ToArray(), vocabulary.ToList()); //create new model using linear kernel const int C = 1; //C parameter for C_SVC model = new C_SVC(problem, KernelHelper.LinearKernel(), C); }
static void Main(string[] args) { DataPreparer data = new DataPreparer(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(data.RequestText, data.ClassValue, data.Vocabulary.ToList()); const double C = 0.5; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Train is called automatically here var accuracy = model.GetCrossValidationAccuracy(100); Console.Clear(); Console.WriteLine(new string('=', 50)); Console.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(@"C:\Users\kramek\Desktop\AIC#\model_{0}_accuracy.model", accuracy)); Console.WriteLine(new string('=', 50)); Console.WriteLine("The Model is ready. \r\nEnter a request to check:"); Console.WriteLine(new string('=', 50)); string userInput; do { userInput = Console.ReadLine(); // SeparateNonAlphanumeric(Console.ReadLine());// var newX = TextClassificationProblemBuilder.CreateNode(userInput, data.Vocabulary); //var predictedYProb = model.PredictProbabilities(newX); var predictedY = model.Predict(newX); Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]); Console.WriteLine(new string('=', 50)); } while (userInput != "exit"); Console.WriteLine(""); }
static void Main(string[] args) { if (!System.Console.IsOutputRedirected) { System.Console.Clear(); } CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US"); System.Console.WriteLine("Sentiment Analysis"); System.Console.WriteLine("======================\n"); // load data System.Console.WriteLine("Loading data...."); string fileContent = ReadDataFile(".\\data\\wikipedia-detox-250-line-data.tsv"); // preprocess file System.Console.WriteLine("Processing data...."); string[,] processedComments = ProcessComments(fileContent); System.Console.WriteLine($"Data file contains {processedComments.GetLength(0)} comments\n"); // for(int i = 0; i < 3; i++) // { // System.Console.WriteLine($"{processedComments[i, 0]}\t{processedComments[i, 1]}"); // } // System.Console.WriteLine("...\n"); // generate the vocabulary list System.Console.WriteLine("Generating Vocabulary List...."); string[] vocab = GenerateVocabulary(processedComments); System.Console.WriteLine($"Vocabulary generated with {vocab.Length} words\n"); // get labels from preprocessed comments System.Console.WriteLine("Retrieving labels..."); Vector <double> Y = GetLables(processedComments); //System.Console.WriteLine(Y); // extract features from processed comments and vocabulary System.Console.WriteLine("Extracting features..."); Matrix <double> X = GetFeatures(processedComments, vocab); //System.Console.WriteLine(X); // split the data into train and test in ratio 80:20 System.Console.WriteLine("Splitting data..."); int m = X.RowCount; int n = X.ColumnCount; int testsetSize = m * 20 / 100; Vector <double> testLabel = Y.SubVector(0, testsetSize); Matrix <double> testFeatures = X.SubMatrix(0, testsetSize, 0, n); Vector <double> trainingLabel = Y.SubVector(testsetSize, m - testsetSize); Matrix <double> trainingFeatures = X.SubMatrix(testsetSize, m - testsetSize, 0, n); System.Console.WriteLine(); System.Console.WriteLine($"Test set: {testLabel.Count}"); System.Console.WriteLine($"Training set: {trainingLabel.Count}"); // trainiong SVM System.Console.WriteLine("\nTraining linear SVM ...\n"); // SVM parameters double C = .4; var linearKernel = KernelHelper.LinearKernel(); List <List <double> > libSvmData = ConvertToLibSvmFormat(trainingFeatures, trainingLabel); svm_problem prob = ProblemHelper.ReadProblem(libSvmData); var svc = new C_SVC(prob, linearKernel, C); System.Console.WriteLine(); // accuacy on training set Vector <double> prediction = SvmPredic(trainingFeatures, svc); double accuracy = CalculateAccuracy(prediction, trainingLabel); System.Console.WriteLine("Training set Accuracy: {0:f2}%\n", accuracy); // accuacy on test set prediction = SvmPredic(testFeatures, svc); accuracy = CalculateAccuracy(prediction, testLabel); System.Console.WriteLine("Test set Accuracy: {0:f2}%\n", accuracy); // F1 score double f1Score = CalculateF1Score(prediction, testLabel); System.Console.WriteLine("F1 Score on test set: {0:f2}%\n", f1Score * 100); //Pause(); }
public void Train() { var problem = problemBuilder.CreateProblem(dataSet.TrainData, dataSet.Vocabulary); model = new C_SVC(problem, KernelHelper.LinearKernel(), c, probability: true); }
static void Main(string[] args) { List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result; List <double[]> continuousTestData = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result; // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b) foreach (int i in _continuousIndexes) { using (StreamWriter sw = new StreamWriter($"{i}.txt")) { sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i]))); } } // Convert continuous to discrete Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap); List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping); List <int[]> discreteTestData = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping); var problem = ProblemHelper.ReadProblem(discreteTrainData.Select(arr => { // Move class to front as it is expected by libsvm. int temp = arr[0]; arr[SVMSupportedClassIndex] = arr[OriginalClassIndex]; arr[OriginalClassIndex] = temp; return(arr.Select(i => (double)i).ToList()); }).ToList()); var test = ProblemHelper.ReadProblem(discreteTestData.Select(arr => { // Move class to front as it is expected by libsvm. int temp = arr[0]; arr[SVMSupportedClassIndex] = arr[OriginalClassIndex]; arr[OriginalClassIndex] = temp; return(arr.Select(i => (double)i).ToList()); }).ToList()); // defaults taken from documentation http://weka.sourceforge.net/doc.stable/weka/classifiers/functions/LibSVM.html double c = 1; // default C is 1 double gamma = 1.0 / problem.l; // default gamma is 1/k double r = 0; // default coef0 is 0 int degree = 3; // default degree is 3 Dictionary <string, Kernel> nameKernelMap = new Dictionary <string, Kernel>(StringComparer.OrdinalIgnoreCase) { { "Linear", KernelHelper.LinearKernel() }, { "Polynomial", KernelHelper.PolynomialKernel(degree, gamma, r) }, { "Radial", KernelHelper.RadialBasisFunctionKernel(gamma) }, { "Sigmoid", KernelHelper.SigmoidKernel(gamma, r) }, }; // Get accuracies for base comparison // DON'T DO PARALLEL. We don't know if the underlying implementation is MT safe or not. //Parallel.ForEach(nameKernelMap.Keys, (kernelName) => foreach (string kernelName in nameKernelMap.Keys) { Console.WriteLine($"{kernelName}: {GetSVMAccuracy(problem, test, nameKernelMap[kernelName], c)}"); } ; // Get accuracy of with Naive Bayes double[] classWeightPrior = new[] { 1.0, 1.0 }; double[] classPriorProbability = new[] { 0.5, 0.5 }; NaiveBayesClassifier naiveBayes = NaiveBayesClassifier.Load(discreteTrainData, SVMSupportedClassIndex, classWeightPrior, classPriorProbability); Console.WriteLine($"Naive Bayes: {naiveBayes.GetPredictionAccuracy(discreteTestData, SVMSupportedClassIndex)}"); // Calculate SVMs Bias and Variance List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples); ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > kernelInstanceClassifierPredictionsMappings = new ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >(StringComparer.OrdinalIgnoreCase); foreach (string kernelName in nameKernelMap.Keys) { ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >()); for (int classifierIndex = 0; classifierIndex < BiasVarianceNumOfSamples; classifierIndex++) { problem = ProblemHelper.ReadProblem(samples[classifierIndex].Select(arr => arr.Select(i => (double)i).ToList()).ToList()); var svm = new C_SVC(problem, nameKernelMap[kernelName], c); for (int instanceIndex = 0; instanceIndex < discreteTestData.Count; instanceIndex++) { ConcurrentDictionary <int, int> classifierPredictionMappings = instanceClassifierPredictionMappings.GetOrAdd(instanceIndex, new ConcurrentDictionary <int, int>()); test = ProblemHelper.ReadProblem(new List <List <double> > { discreteTestData[instanceIndex].Select(i => (double)i).ToList() }); for (int i = 0; i < test.l; i++) { var x = test.x[i]; var y = test.y[i]; classifierPredictionMappings.GetOrAdd(classifierIndex, (int)svm.Predict(x)); } } } } Console.WriteLine("Kernel, Bias, Variance, Accuracy"); foreach (string kernelName in nameKernelMap.Keys) { ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >()); Tuple <double, double, double> biasVarianceAccuracy = BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, SVMSupportedClassIndex, instanceClassifierPredictionMappings); Console.WriteLine($"{kernelName}, {biasVarianceAccuracy.Item1}, {biasVarianceAccuracy.Item2}, {biasVarianceAccuracy.Item3}"); } Console.WriteLine("Press ENTER to continue..."); Console.ReadLine(); }
public Dictionary <int, double> PredictByText(string input) { // STEP 4: Read the data string dataFilePath = System.Web.HttpContext.Current.Server.MapPath("~/Data/data_train.csv"); var dataTable = DataAccess.DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["Type"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); Console.WriteLine("Creating problem"); var problemBuilder = new DataPreprocess.TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // // If you want you can save this problem with : // //ProblemHelper.WriteProblem(@"C:\Users\", problem); // // And then load it again using: // //var problem2 = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); System.Diagnostics.Debug.WriteLine("Creating model"); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C, 100, true); var accuracy = model.GetCrossValidationAccuracy(10); System.Diagnostics.Debug.WriteLine(new string('=', 50)); System.Diagnostics.Debug.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(@"model_{0}_accuracy.model", accuracy)); System.Diagnostics.Debug.WriteLine(new string('=', 50)); System.Diagnostics.Debug.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction."); System.Diagnostics.Debug.WriteLine(new string('=', 50)); _predictionDictionary = new Dictionary <int, string> { { 1, "ID" }, { 2, "Documents" }, { 3, "Forme" } }; int numOFWords = 0; string processedText = TextPreprocessorService.parseJSONText(input); processedText = TextPreprocessorService.ProcessText(ref processedText); Dictionary <int, double> dict = new Dictionary <int, double>() { { 1, 0 }, { 2, 0 }, { 3, 0 } }; if (processedText.Equals("")) { return(dict); } var newX = TextClassificationProblemBuilder.CreateNode(processedText, vocabulary); var predictedY = model.Predict(newX); System.Diagnostics.Debug.WriteLine(predictedY); dict = model.PredictProbabilities(newX); System.Diagnostics.Debug.WriteLine("Prob(1): " + dict[1]); System.Diagnostics.Debug.WriteLine("Prob(2): " + dict[2]); System.Diagnostics.Debug.WriteLine("Prob(3): " + dict[3]); System.Diagnostics.Debug.WriteLine("The prediction is {0} value is {1} ", _predictionDictionary[(int)predictedY], predictedY); return(dict); }
static void Main(string[] args) { bool kernelparam = false; bool properformat = false; bool needsFormatting = false; bool done = false; int vectorlength; // number of features int kernelchoice; // integer representation of selected kernel int numberofArgs = args.Length; string inputmatrix, savefilename, labelfile; string path = Directory.GetCurrentDirectory(); string save_model_name; string kerneltype; string testfile; /* SVM specific initializations */ int degree = 3; // default for none specified int r = 1; // C and gamma come from using grid.py on the training set resume.mat 982 x 7768 double C = 2.0; double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF) C_SVC svm; // setup the default variable for the SVM if (numberofArgs < 1) { Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); } // Exit if no params passed on the command line /* At least one command line parameter we can continue, but it can't be an int. * so check for that next. */ if (numberofArgs == 1 && Int32.TryParse(args[0], out kernelchoice)) { Console.WriteLine(MyStrings.usage); // single paramater can't be int System.Environment.Exit(1); } else // Assume file name and check if it needs formatting, if not we are good to train and save the model { kernelparam = false; properformat = HelperFunctions.CheckFormat(args[0]); inputmatrix = args[0]; savefilename = inputmatrix.Replace(".mat", ".svm"); // update the suffix svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); save_model_name = savefilename.Replace(".svm", ".model"); svm.Export(save_model_name); done = true; } if (numberofArgs >= 1) { if (Int32.TryParse(args[0], out kernelchoice)) { kernelparam = true; switch (numberofArgs) { case 2: needsFormatting = HelperFunctions.CheckFormat(args[1]); inputmatrix = args[1]; if (needsFormatting) { Console.WriteLine("Missing label file"); System.Environment.Exit(1); } break; case 3: needsFormatting = HelperFunctions.CheckFormat(args[1]); inputmatrix = args[1]; labelfile = args[2]; break; case 4: needsFormatting = HelperFunctions.CheckFormat(args[1]); inputmatrix = args[1]; labelfile = args[2]; testfile = args[3]; break; default: Console.WriteLine("too many parameters"); Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); break; } } } savefilename = inputmatrix.Replace(".mat", ".svm"); // update the suffix if (!done && needsFormatting && args.Length >= 2) { inputmatrix = args[1]; labelfile = args[2]; vectorlength = HelperFunctions.VectorLength(inputmatrix); // Get the number of features string[] labels = new string[HelperFunctions.SampleSize(labelfile)]; // Calculate the number of labels and use to create storage /* if the input matrix is not already in the correct format Call reformat function * result is that a file is written that is the LIBSVM format, expects the * labels to be in a separate file * * Reformatdata(string[] data, string labels, string fname) * */ HelperFunctions.Reformatdata(inputmatrix, labels, savefilename, vectorlength); } // Train the SVM /* "." means every 1,000 iterations (or every #data iterations is your #data is less than 1,000). * "*" means that after iterations of using a smaller shrunk problem, we reset to use the whole set. */ /* optimization finished, #iter = 219 * nu = 0.431030 * obj = -100.877286, rho = 0.424632 * nSV = 132, nBSV = 107 * Total nSV = 132 * obj is the optimal objective value of the dual SVM problem. rho is the bias term in the decision * function sgn(w^Tx - rho). nSV and nBSV are number of support vectors and bounded support vectors * (i.e., alpha_i = C). nu-svm is a somewhat equivalent form of C-SVM where C is replaced by nu. * nu simply shows the corresponding parameter. */ /* if a kernel is specified on the command line, then select the corresponding kernel for training the SVM as follows * 0 = linear * 1 = polynomial * 2 = RBF * 3 = sigmoind * 4 = precomputed */ // 7/23/19 fix up save file name, kernelchoice does not seem to be in the rigth place, also logic flow thru above switch and if statements needs some review Int32.TryParse(args[0], out kernelchoice); if (kernelparam) { int caseSwitch = kernelchoice; switch (caseSwitch) { case 0: svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); kerneltype = "Linear"; break; case 1: svm = new C_SVC(savefilename, KernelHelper.PolynomialKernel(degree, gamma, r), C); kerneltype = "Polynomial"; break; case 2: svm = new C_SVC(savefilename, KernelHelper.RadialBasisFunctionKernel(gamma), C); kerneltype = "RBF"; break; default: svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); kerneltype = "Linear"; break; } } else { svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); kerneltype = "Linear"; } // For RBF kernel, linear kernel would be KernelHelper.LinearKernel // // var accuracy = svm.GetCrossValidationAccuracy(5); save_model_name = savefilename.Replace(".svm", ".model"); svm.Export(save_model_name); /* * ********** Stoppted here for checking file input formats */ //double accuracy = svm.Predict(testfile); //Console.WriteLine(MyStrings.Accuracy, accuracy * 100); Console.WriteLine("SVM kernel type {0}", kerneltype); }
static void Main(string[] args) { if (!System.Console.IsOutputRedirected) { System.Console.Clear(); } CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US"); var M = Matrix <double> .Build; var V = Vector <double> .Build; //// =============== Part 1: Loading and Visualizing Data ================ // We start the exercise by first loading and visualizing the dataset. // The following code will load the dataset into your environment and plot // the data. // System.Console.WriteLine("Loading and Visualizing Data ...\n"); // Load from ex6data1: // You will have X, y in your environment Dictionary <string, Matrix <double> > ms = MatlabReader.ReadAll <double>("data\\ex6data1.mat"); Matrix <double> X = ms["X"]; // 51 X 2 Vector <double> y = ms["y"].Column(0); // 51 X 1 // Plot training data GnuPlot.HoldOn(); PlotData(X, y); Pause(); //// ==================== Part 2: Training Linear SVM ==================== // The following code will train a linear SVM on the dataset and plot the // decision boundary learned. // System.Console.WriteLine("\nTraining Linear SVM ...\n"); // You should try to change the C value below and see how the decision // boundary varies (e.g., try C = 1000) double C = 1.0; var linearKernel = KernelHelper.LinearKernel(); List <List <double> > libSvmData = ConvertToLibSvmFormat(X, y); svm_problem prob = ProblemHelper.ReadProblem(libSvmData); var svc = new C_SVC(prob, linearKernel, C); PlotBoundary(X, svc); GnuPlot.HoldOff(); System.Console.WriteLine(); Pause(); //// =============== Part 3: Implementing Gaussian Kernel =============== // You will now implement the Gaussian kernel to use // with the SVM. You should complete the code in gaussianKernel.m // System.Console.WriteLine("\nEvaluating the Gaussian Kernel ...\n"); double sigma = 2.0; double sim = GaussianKernel( V.DenseOfArray(new [] { 1.0, 2, 1 }), V.DenseOfArray(new [] { 0.0, 4, -1 }), sigma ); System.Console.WriteLine("Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0:f6} :\n\t{1:f6}\n(for sigma = 2, this value should be about 0.324652)\n", sigma, sim); Pause(); //// =============== Part 4: Visualizing Dataset 2 ================ // The following code will load the next dataset into your environment and // plot the data. // System.Console.WriteLine("Loading and Visualizing Data ...\n"); // Load from ex6data2: // You will have X, y in your environment ms = MatlabReader.ReadAll <double>("data\\ex6data2.mat"); X = ms["X"]; // 863 X 2 y = ms["y"].Column(0); // 863 X 1 // Plot training data GnuPlot.HoldOn(); PlotData(X, y); Pause(); //// ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== // After you have implemented the kernel, we can now use it to train the // SVM classifier. // System.Console.WriteLine("\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n"); // SVM Parameters C = 1; sigma = 0.1; double gamma = 1 / (2 * sigma * sigma); var rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma); libSvmData = ConvertToLibSvmFormat(X, y); prob = ProblemHelper.ReadProblem(libSvmData); svc = new C_SVC(prob, rbfKernel, C); PlotBoundary(X, svc); GnuPlot.HoldOff(); Pause(); double acc = svc.GetCrossValidationAccuracy(10); System.Console.WriteLine("\nCross Validation Accuracy: {0:f6}\n", acc); Pause(); //// =============== Part 6: Visualizing Dataset 3 ================ // The following code will load the next dataset into your environment and // plot the data. // System.Console.WriteLine("Loading and Visualizing Data ...\n"); // Load from ex6data2: // You will have X, y in your environment ms = MatlabReader.ReadAll <double>("data\\ex6data3.mat"); Matrix <double> Xval; Vector <double> yval; X = ms["X"]; // 211 X 2 y = ms["y"].Column(0); // 211 X 1 Xval = ms["Xval"]; // 200 X 2 yval = ms["yval"].Column(0); // 200 X 1 // Plot training data GnuPlot.HoldOn(); PlotData(X, y); //// ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== // This is a different dataset that you can use to experiment with. Try // different values of C and sigma here. // (C, sigma) = Dataset3Params(X, y, Xval, yval); gamma = 1 / (2 * sigma * sigma); rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma); libSvmData = ConvertToLibSvmFormat(X, y); prob = ProblemHelper.ReadProblem(libSvmData); svc = new C_SVC(prob, rbfKernel, C); PlotBoundary(X, svc); GnuPlot.HoldOff(); Pause(); }
static void Main(string[] args) { bool kernelparam = false; int numberofArgs = args.Length; string inputmatrix; string path = Directory.GetCurrentDirectory(); string save_model_name; string kerneltype; string testfile; /* SVM specific initializations */ int degree = 3; // default for none specified int r = 1; // C and gamma come from using grid.py on the training set resume.mat 982 x 7768 double C = 2.0; double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF) C_SVC svm; // setup the default variable for the SVM /* * Three parameters are required, kernel selection, training file and test file */ if (args.Length != 3) { Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); } if (kernelparam = Int32.TryParse(args[0], out int kernelchoice) && kernelchoice <= 3) { //Legal value for kernelchoice are 0-3 //kernelchoice = 1; } else { // Not a legal kernel selection Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); } inputmatrix = args[1]; testfile = args[2]; if (!HelperFunctions.CheckFormat(inputmatrix)) { Console.WriteLine(MyStrings.TrainingFileFormat, inputmatrix); System.Environment.Exit(1); } if (!File.Exists(testfile)) { Console.WriteLine(MyStrings.File_error, inputmatrix); System.Environment.Exit(1); } // Train the SVM switch (kernelchoice) { case 0: svm = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C); kerneltype = MyStrings.Linear; break; case 1: svm = new C_SVC(inputmatrix, KernelHelper.PolynomialKernel(degree, gamma, r), C); kerneltype = MyStrings.Polynomial; break; case 2: svm = new C_SVC(inputmatrix, KernelHelper.RadialBasisFunctionKernel(gamma), C); kerneltype = MyStrings.RBF; break; case 3: svm = new C_SVC(inputmatrix, KernelHelper.SigmoidKernel(gamma, r), C); kerneltype = MyStrings.Sigmoid; break; default: svm = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C); kerneltype = MyStrings.Linear; break; } // var accuracy = svm.GetCrossValidationAccuracy(5); save_model_name = String.Concat(inputmatrix, ".model"); svm.Export(save_model_name); var predfile = ProblemHelper.ReadProblem(testfile); double result = HelperFunctions.PredictTestSet(testfile, svm); Console.WriteLine(MyStrings.Accuracy, Math.Round(result * 100, 2)); Console.Write("SVM kernel type {0} ", kerneltype); Console.WriteLine(MyStrings.Parameters, C, gamma, degree, r); }
public static void ClassifyBySVM(string trainFile, string testFile, string testTarget) { string testResultFile = "../../svm/testResult.txt"; var watch = System.Diagnostics.Stopwatch.StartNew(); //STEP 1 : READ DATA List <Vector> vectorsTrain = new List <Vector>(); var content = FileIO.ReadFileIntoVector(trainFile, out vectorsTrain, true); var typeClass = Vector.GetDistinctClassTypes(vectorsTrain); //Get content of document and lable of document double[] label = GetLableOfDocument(vectorsTrain); //Get features list var features = content.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); //STEP 2: Generate a problem var problem = TextClassificationProblemBuilder.CreateProblem(content, label, features.ToList()); //STEP 3: Create and train a SVM model const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); //STEP 4: Predict List <string> test = FileIO.ReadFile(testFile); List <string> resultList = new List <string>(); _predictionDictionary = new Dictionary <int, string>(); List <Vector> targetVector = new List <Vector>(); FileIO.ReadFileIntoVector(testTarget, out targetVector, true); for (int l = 0; l < typeClass.Count(); l++) { _predictionDictionary.Add(l, typeClass.ElementAt(l)); } for (int i = 0; i < test.Count(); i++) { var newX = TextClassificationProblemBuilder.CreateNode(test[i], features); var predictedY = model.Predict(newX); var result = _predictionDictionary[(int)predictedY]; resultList.Add(result + " - " + test[i]); } FileIO.WriteFile(resultList, testResultFile); List <Vector> sourceVector = new List <Vector>(); FileIO.ReadFileIntoVector(testResultFile, out sourceVector, true); double score = 0; for (int i = 0; i < typeClass.Count(); i++) { score = 1.0 * Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector) / Vector.CountClassElements(typeClass.ElementAt(i), targetVector); Console.WriteLine("correct label: " + Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector)); Console.WriteLine("total label: " + Vector.CountClassElements(typeClass.ElementAt(i), targetVector)); Console.WriteLine("SVM score: " + score); } Console.WriteLine("The time for SVM: {0} ", watch.ElapsedMilliseconds); }