static void Main(string[] args) { var path = Environment.CurrentDirectory; string DvCPath = System.IO.Path.Combine(path, DvC_TEST_FILE); string DvHPath = System.IO.Path.Combine(path, DvH_TEST_FILE); string HvCPath = System.IO.Path.Combine(path, HvC_TEST_FILE); DvC_prob = ProblemHelper.ReadAndScaleProblem(DvCPath); DvH_prob = ProblemHelper.ReadAndScaleProblem(DvHPath); HvC_prob = ProblemHelper.ReadAndScaleProblem(HvCPath); var DvCsvm = new C_SVC(DvC_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C); var DvHsvm = new C_SVC(DvH_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C); var HvCsvm = new C_SVC(HvC_prob, KernelHelper.RadialBasisFunctionKernel(gamma), C); var DvCcva = DvCsvm.GetCrossValidationAccuracy(5); var DvHcva = DvHsvm.GetCrossValidationAccuracy(2); var HvCcva = HvCsvm.GetCrossValidationAccuracy(5); DvCsvm.Export(System.IO.Path.Combine(path, DvC_MODEL_FILE)); DvHsvm.Export(System.IO.Path.Combine(path, DvH_MODEL_FILE)); HvCsvm.Export(System.IO.Path.Combine(path, HvC_MODEL_FILE)); Console.WriteLine(String.Format("--------------------------")); Console.WriteLine(String.Format("DvC Result: {0}%", (Math.Round(DvCcva * 100, 2)).ToString())); Console.WriteLine(String.Format("DvH Result: {0}%", (Math.Round(DvHcva * 100, 2)).ToString())); Console.WriteLine(String.Format("HvC Result: {0}%", (Math.Round(HvCcva * 100, 2)).ToString())); Console.WriteLine(String.Format("--------------------------")); Console.ReadKey(); }
public static void Main() { // STEP 4: Read the data string dataFilePath = HttpContext.Current.Server.MapPath("~/DAL/svm/"); var dataTable = DataTable.New.ReadCsv(dataFilePath + "Data.csv"); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["Category"])) .ToArray(); vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // If you want you can save this problem with : // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem); // And then load it again using: // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); const int C = 1; model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var accuracy = model.GetCrossValidationAccuracy(10); Console.Clear(); Console.WriteLine(new string('=', 50)); Console.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(dataFilePath + "model_{0}_accuracy.model", accuracy)); Console.WriteLine(new string('=', 50)); Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: sunny rainy sunny)"); Console.WriteLine(new string('=', 50)); }
static void Main(string[] args) { // STEP 4: Read the data const string dataFilePath = @"spamdata.csv"; var dataTable = DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSpam"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // If you want you can save this problem with : // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem); // And then load it again using: // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); var accuracy = model.GetCrossValidationAccuracy(10); Console.Clear(); Console.WriteLine(new string('=', 50)); Console.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(@"model_{0}_accuracy.model", accuracy)); Console.WriteLine(new string('=', 50)); Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: love hate dong)"); Console.WriteLine(new string('=', 50)); string userInput; //This just takes the predicted value (-1 to 3) and translates to your categorization response _predictionDictionary = new Dictionary <int, string> { { -2, "Angry" }, { -1, "Sad" }, { 0, "Normal" }, { 1, "Happy" }, { 2, "Love" } }; do { userInput = Console.ReadLine(); var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary); var predictedY = model.Predict(newX); Console.WriteLine("The prediction is {0} value is {1} ", _predictionDictionary[(int)predictedY], predictedY); Console.WriteLine(new string('=', 50)); } while (userInput != "quit"); Console.WriteLine(""); }
public void Train() { SVMDataManager data = new SVMDataManager(); var problemBuilder = new SVMProblemBuilder(); var problem = problemBuilder.CreateProblem(data.RequestText, data.ClassValue, data.Vocabulary.ToList()); const double C = 0.5; C_SVC model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Train is called automatically here accuracy = model.GetCrossValidationAccuracy(100); model.Export(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"bin\model_{0}_accuracy.model", accuracy)))); System.IO.File.WriteAllLines(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"bin\model_{0}_vocabulary.txt", accuracy))), data.Vocabulary); }
// Uczenie algorytmu public void Train() { // Pobieranie danych z zestawów do trenowanie algorytmu znajduje się w konstruktorze klasy SVMDataManager -> SVMDataManager data = new SVMDataManager(); // Tworzenie macierzy (wraz z wektorami) var problemBuilder = new SVMProblemBuilder(); var problem = problemBuilder.CreateMatrix(data.RequestText, data.ClassValue, data.Vocabulary.ToList()); // Parametrem C dokonywana jest optymalizacja marginesu. Oznacza on wartość straty/kary błędnej klasyfikacji. const double C = 0.5; C_SVC model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Dokładność liczona jest procentowo na bazie danych treningowych. // Po wyznaczeniu przez algorytm najlepszej dostępnej hiperpłaszczyzny oddzielającej cechy od siebie, // przez stworzony model przepuszczane są jeszcze raz dane treningowe i liczony jest odsetek błędnych klasyfikacji na tej podstawie. accuracy = model.GetCrossValidationAccuracy(100); // Export modelu oraz słownika model.Export(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"WAF\model_{0}_accuracy.model", accuracy)))); System.IO.File.WriteAllLines(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"WAF\model_{0}_vocabulary.txt", accuracy))), data.Vocabulary); }
public void C_SVC_should_enable_to_export_and_import_svm_models() { // note : K(u; v) = (u v + 1)^2 kernel is able to feet exactly the xor function // see http://www.doc.ic.ac.uk/~dfg/ProbabilisticInference/IDAPILecture18.pdf for more infos var svm = new C_SVC(xor_problem, KernelHelper.PolynomialKernel(2, 1, 1), 1); var file_name = System.IO.Path.Combine(base_path, "test_export_temp.xml"); // make sure directory is clean if (File.Exists(file_name)) { File.Delete(file_name); } svm.Export(file_name); Assert.IsTrue(File.Exists(file_name)); var new_svm = new C_SVC(file_name); checkXOR(new_svm); File.Delete(file_name); // cleanup }
static void Main(string[] args) { DataPreparer data = new DataPreparer(); var problemBuilder = new TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(data.RequestText, data.ClassValue, data.Vocabulary.ToList()); const double C = 0.5; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Train is called automatically here var accuracy = model.GetCrossValidationAccuracy(100); Console.Clear(); Console.WriteLine(new string('=', 50)); Console.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(@"C:\Users\kramek\Desktop\AIC#\model_{0}_accuracy.model", accuracy)); Console.WriteLine(new string('=', 50)); Console.WriteLine("The Model is ready. \r\nEnter a request to check:"); Console.WriteLine(new string('=', 50)); string userInput; do { userInput = Console.ReadLine(); // SeparateNonAlphanumeric(Console.ReadLine());// var newX = TextClassificationProblemBuilder.CreateNode(userInput, data.Vocabulary); //var predictedYProb = model.PredictProbabilities(newX); var predictedY = model.Predict(newX); Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]); Console.WriteLine(new string('=', 50)); } while (userInput != "exit"); Console.WriteLine(""); }
static void Main(string[] args) { bool kernelparam = false; bool properformat = false; bool needsFormatting = false; bool done = false; int vectorlength; // number of features int kernelchoice; // integer representation of selected kernel int numberofArgs = args.Length; string inputmatrix, savefilename, labelfile; string path = Directory.GetCurrentDirectory(); string save_model_name; string kerneltype; string testfile; /* SVM specific initializations */ int degree = 3; // default for none specified int r = 1; // C and gamma come from using grid.py on the training set resume.mat 982 x 7768 double C = 2.0; double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF) C_SVC svm; // setup the default variable for the SVM if (numberofArgs < 1) { Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); } // Exit if no params passed on the command line /* At least one command line parameter we can continue, but it can't be an int. * so check for that next. */ if (numberofArgs == 1 && Int32.TryParse(args[0], out kernelchoice)) { Console.WriteLine(MyStrings.usage); // single paramater can't be int System.Environment.Exit(1); } else // Assume file name and check if it needs formatting, if not we are good to train and save the model { kernelparam = false; properformat = HelperFunctions.CheckFormat(args[0]); inputmatrix = args[0]; savefilename = inputmatrix.Replace(".mat", ".svm"); // update the suffix svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); save_model_name = savefilename.Replace(".svm", ".model"); svm.Export(save_model_name); done = true; } if (numberofArgs >= 1) { if (Int32.TryParse(args[0], out kernelchoice)) { kernelparam = true; switch (numberofArgs) { case 2: needsFormatting = HelperFunctions.CheckFormat(args[1]); inputmatrix = args[1]; if (needsFormatting) { Console.WriteLine("Missing label file"); System.Environment.Exit(1); } break; case 3: needsFormatting = HelperFunctions.CheckFormat(args[1]); inputmatrix = args[1]; labelfile = args[2]; break; case 4: needsFormatting = HelperFunctions.CheckFormat(args[1]); inputmatrix = args[1]; labelfile = args[2]; testfile = args[3]; break; default: Console.WriteLine("too many parameters"); Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); break; } } } savefilename = inputmatrix.Replace(".mat", ".svm"); // update the suffix if (!done && needsFormatting && args.Length >= 2) { inputmatrix = args[1]; labelfile = args[2]; vectorlength = HelperFunctions.VectorLength(inputmatrix); // Get the number of features string[] labels = new string[HelperFunctions.SampleSize(labelfile)]; // Calculate the number of labels and use to create storage /* if the input matrix is not already in the correct format Call reformat function * result is that a file is written that is the LIBSVM format, expects the * labels to be in a separate file * * Reformatdata(string[] data, string labels, string fname) * */ HelperFunctions.Reformatdata(inputmatrix, labels, savefilename, vectorlength); } // Train the SVM /* "." means every 1,000 iterations (or every #data iterations is your #data is less than 1,000). * "*" means that after iterations of using a smaller shrunk problem, we reset to use the whole set. */ /* optimization finished, #iter = 219 * nu = 0.431030 * obj = -100.877286, rho = 0.424632 * nSV = 132, nBSV = 107 * Total nSV = 132 * obj is the optimal objective value of the dual SVM problem. rho is the bias term in the decision * function sgn(w^Tx - rho). nSV and nBSV are number of support vectors and bounded support vectors * (i.e., alpha_i = C). nu-svm is a somewhat equivalent form of C-SVM where C is replaced by nu. * nu simply shows the corresponding parameter. */ /* if a kernel is specified on the command line, then select the corresponding kernel for training the SVM as follows * 0 = linear * 1 = polynomial * 2 = RBF * 3 = sigmoind * 4 = precomputed */ // 7/23/19 fix up save file name, kernelchoice does not seem to be in the rigth place, also logic flow thru above switch and if statements needs some review Int32.TryParse(args[0], out kernelchoice); if (kernelparam) { int caseSwitch = kernelchoice; switch (caseSwitch) { case 0: svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); kerneltype = "Linear"; break; case 1: svm = new C_SVC(savefilename, KernelHelper.PolynomialKernel(degree, gamma, r), C); kerneltype = "Polynomial"; break; case 2: svm = new C_SVC(savefilename, KernelHelper.RadialBasisFunctionKernel(gamma), C); kerneltype = "RBF"; break; default: svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); kerneltype = "Linear"; break; } } else { svm = new C_SVC(savefilename, KernelHelper.LinearKernel(), C); kerneltype = "Linear"; } // For RBF kernel, linear kernel would be KernelHelper.LinearKernel // // var accuracy = svm.GetCrossValidationAccuracy(5); save_model_name = savefilename.Replace(".svm", ".model"); svm.Export(save_model_name); /* * ********** Stoppted here for checking file input formats */ //double accuracy = svm.Predict(testfile); //Console.WriteLine(MyStrings.Accuracy, accuracy * 100); Console.WriteLine("SVM kernel type {0}", kerneltype); }
static void Main(string[] args) { bool kernelparam = false; int numberofArgs = args.Length; string inputmatrix; string path = Directory.GetCurrentDirectory(); string save_model_name; string kerneltype; string testfile; /* SVM specific initializations */ int degree = 3; // default for none specified int r = 1; // C and gamma come from using grid.py on the training set resume.mat 982 x 7768 double C = 2.0; double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF) C_SVC svm; // setup the default variable for the SVM /* * Three parameters are required, kernel selection, training file and test file */ if (args.Length != 3) { Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); } if (kernelparam = Int32.TryParse(args[0], out int kernelchoice) && kernelchoice <= 3) { //Legal value for kernelchoice are 0-3 //kernelchoice = 1; } else { // Not a legal kernel selection Console.WriteLine(MyStrings.usage); System.Environment.Exit(1); } inputmatrix = args[1]; testfile = args[2]; if (!HelperFunctions.CheckFormat(inputmatrix)) { Console.WriteLine(MyStrings.TrainingFileFormat, inputmatrix); System.Environment.Exit(1); } if (!File.Exists(testfile)) { Console.WriteLine(MyStrings.File_error, inputmatrix); System.Environment.Exit(1); } // Train the SVM switch (kernelchoice) { case 0: svm = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C); kerneltype = MyStrings.Linear; break; case 1: svm = new C_SVC(inputmatrix, KernelHelper.PolynomialKernel(degree, gamma, r), C); kerneltype = MyStrings.Polynomial; break; case 2: svm = new C_SVC(inputmatrix, KernelHelper.RadialBasisFunctionKernel(gamma), C); kerneltype = MyStrings.RBF; break; case 3: svm = new C_SVC(inputmatrix, KernelHelper.SigmoidKernel(gamma, r), C); kerneltype = MyStrings.Sigmoid; break; default: svm = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C); kerneltype = MyStrings.Linear; break; } // var accuracy = svm.GetCrossValidationAccuracy(5); save_model_name = String.Concat(inputmatrix, ".model"); svm.Export(save_model_name); var predfile = ProblemHelper.ReadProblem(testfile); double result = HelperFunctions.PredictTestSet(testfile, svm); Console.WriteLine(MyStrings.Accuracy, Math.Round(result * 100, 2)); Console.Write("SVM kernel type {0} ", kerneltype); Console.WriteLine(MyStrings.Parameters, C, gamma, degree, r); }
public Dictionary <int, double> PredictByText(string input) { // STEP 4: Read the data string dataFilePath = System.Web.HttpContext.Current.Server.MapPath("~/Data/data_train.csv"); var dataTable = DataAccess.DataTable.New.ReadCsv(dataFilePath); List <string> x = dataTable.Rows.Select(row => row["Text"]).ToList(); double[] y = dataTable.Rows.Select(row => double.Parse(row["Type"])).ToArray(); var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList(); Console.WriteLine("Creating problem"); var problemBuilder = new DataPreprocess.TextClassificationProblemBuilder(); var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList()); // // If you want you can save this problem with : // //ProblemHelper.WriteProblem(@"C:\Users\", problem); // // And then load it again using: // //var problem2 = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem"); System.Diagnostics.Debug.WriteLine("Creating model"); const int C = 1; var model = new C_SVC(problem, KernelHelper.LinearKernel(), C, 100, true); var accuracy = model.GetCrossValidationAccuracy(10); System.Diagnostics.Debug.WriteLine(new string('=', 50)); System.Diagnostics.Debug.WriteLine("Accuracy of the model is {0:P}", accuracy); model.Export(string.Format(@"model_{0}_accuracy.model", accuracy)); System.Diagnostics.Debug.WriteLine(new string('=', 50)); System.Diagnostics.Debug.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction."); System.Diagnostics.Debug.WriteLine(new string('=', 50)); _predictionDictionary = new Dictionary <int, string> { { 1, "ID" }, { 2, "Documents" }, { 3, "Forme" } }; int numOFWords = 0; string processedText = TextPreprocessorService.parseJSONText(input); processedText = TextPreprocessorService.ProcessText(ref processedText); Dictionary <int, double> dict = new Dictionary <int, double>() { { 1, 0 }, { 2, 0 }, { 3, 0 } }; if (processedText.Equals("")) { return(dict); } var newX = TextClassificationProblemBuilder.CreateNode(processedText, vocabulary); var predictedY = model.Predict(newX); System.Diagnostics.Debug.WriteLine(predictedY); dict = model.PredictProbabilities(newX); System.Diagnostics.Debug.WriteLine("Prob(1): " + dict[1]); System.Diagnostics.Debug.WriteLine("Prob(2): " + dict[2]); System.Diagnostics.Debug.WriteLine("Prob(3): " + dict[3]); System.Diagnostics.Debug.WriteLine("The prediction is {0} value is {1} ", _predictionDictionary[(int)predictedY], predictedY); return(dict); }