Esempio n. 1
0
        private static void TrainingRecordData()
        {
            var mongoClient = new MongoClient("mongodb://*****:*****@"[^a-zA-Z]", " ").Trim().ToLowerInvariant().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Where(d => d.Count() < 20).ToArray();
            var trainingDataCollection     = database.GetCollection <ProcessInfoLabeledItem>("training_data");
            var records    = trainingDataCollection.Find(Builders <ProcessInfoLabeledItem> .Filter.Empty).ToList();
            var vocabulary = records.Select(c => c.Title + " " + c.Process).SelectMany(filter).Distinct().OrderBy(str => str).ToList();

            List <string> x = records.Select(item => item.Title + " " + item.Process).ToList();

            double[] y = records.Select(item => (double)item.Category).ToArray();

            var problemBuilder = new TextClassificationProblemBuilder();

            problemBuilder.RefineText = filter;
            var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C);
            var       _predictionDictionary = new Dictionary <Karma, string> {
                { Karma.Bad, "Bad" }, { Karma.Good, "Good" }, { Karma.Neutral, "Neutral" }
            };

            var newXs = database.GetCollection <AppUsageRecord>("daily_records").Find(Builders <AppUsageRecord> .Filter.Eq(f => f.Id, AppUsageRecord.GetGeneratedId(DateTime.Now))).FirstOrDefault().ActiveApps.Select(c => c.Value).Select(c => c.MainWindowTitle + " " + c.ProcessName);

            foreach (var _x in newXs)
            {
                var newX       = TextClassificationProblemBuilder.CreateNode(_x, vocabulary, problemBuilder.RefineText);
                var predictedY = model.Predict(newX);
                Console.WriteLine($"For title {_x}");
                Console.WriteLine($"The prediction is {_predictionDictionary[(Karma)predictedY]}");
            }
        }
        public C_SVC getmodel()
        {
            List <string> x  = new List <string>();
            List <double> yb = new List <double>();

            foreach (var obj in _context.PlainTickets)
            {
                double val = -1;

                x.Add(_context.Countries.Where(ct => ct.Key == _context.Targets.
                                               Where(t => t.Key == obj.Target).FirstOrDefault().CountryName).
                      FirstOrDefault().CountryName);

                if (obj.IsSold)
                {
                    val = 1;
                }

                yb.Add(val);
            }

            double[] y = yb.ToArray();
            this.vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();

            var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            const int C = 1;

            var model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            return(model);
        }
    static void Main2(string path)
    {
        // STEP 4: Read the data
        const string  dataFilePath = @"D:\texto.csv";
        var           dataTable    = DataTable.New.ReadCsv(dataFilePath);
        List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

        double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSunny"]))
                     .ToArray();


        string texto = File.ReadAllText(path + @"/datoscsv.csv", Encoding.Default);

        List <string> x2 = new List <string>();

        double[] y2 = null;
        arreglar_dato(texto, ref x2, ref y2);

        var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

        var problemBuilder = new TextClassificationProblemBuilder();
        var problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

        // If you want you can save this problem with :
        // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem);
        // And then load it again using:
        // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem");

        const int C     = 1;
        var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C);



        var accuracy = model.GetCrossValidationAccuracy(10);
        //  Console.Clear();
        // Console.WriteLine(new string('=', 50));
        // Console.WriteLine("Accuracy of the model is {0:P}", accuracy);
        //  model.Export(string.Format(@"D:\MACHINE_LEARNING\SVM\Tutorial\model_{0}_accuracy.model", accuracy));

        //  Console.WriteLine(new string('=', 50));
        //   Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: sunny rainy sunny)");
        //   Console.WriteLine(new string('=', 50));

        string userInput;

        _predictionDictionary = new Dictionary <int, string> {
            { -1, "Rainy" }, { 1, "Sunny" }
        };

        userInput = "caries";
        var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary);

        var predictedY = model.Predict(newX);

        //  Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]);
        //  Console.WriteLine(new string('=', 50));


        Console.WriteLine("");
    }
Esempio n. 4
0
        private void TrainingData()
        {
            string        dateFilePath = Path.Combine(Directory.GetCurrentDirectory(), $"sunnyData.csv");
            var           dataTable    = DataTable.New.ReadCsv(dateFilePath);
            List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSunny"])).ToArray();

            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(w => w).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            string userInput;
            var    _predictionDictionary = new Dictionary <int, string> {
                { -1, "Rainy" }, { 1, "Sunny" }
            };

            do
            {
                userInput = Console.ReadLine();
                var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary);

                var predictedY = model.Predict(newX);
                Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]);
                Console.WriteLine(new string('=', 50));
            } while (userInput != "quit");

            Console.WriteLine("");
        }
Esempio n. 5
0
        private SvmMethod()
        {
            var           path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Files/SentimentAnalysisDataset.csv");
            List <string> x    = new List <string>();

            List <double> y = new List <double>();

            if (File.Exists(path))
            {
                var lines = File.ReadAllLines(path);
                for (int i = 0; i < 500; i++)//5146
                {
                    var lineArr = lines[i].Split(new string[] { ",Sentiment140,", ",Kaggle," }, StringSplitOptions.None);
                    y.Add(double.Parse(lineArr[0].Split(',')[1]));
                    x.Add(lineArr[1].Trim());
                }
            }

            //var dataTable = DataTable.New.ReadCsv(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Files/spamdata.csv"));
            //List<string> x = dataTable.Rows.Select(row => row["Text"]).ToList();
            //double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSpam"])).ToArray();

            vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y.ToArray(), vocabulary.ToList());

            const int C = 1;

            model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            _predictionDictionary = new Dictionary <int, string> {
                { 0, "negative" }, { 1, "positive" }
            };
        }
Esempio n. 6
0
        public static void Main()
        {
            // STEP 4: Read the data
            string        dataFilePath = HttpContext.Current.Server.MapPath("~/DAL/svm/");
            var           dataTable    = DataTable.New.ReadCsv(dataFilePath + "Data.csv");
            List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["Category"]))
                         .ToArray();

            vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            // If you want you can save this problem with :
            // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem);
            // And then load it again using:
            // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem");

            const int C = 1;

            model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            var accuracy = model.GetCrossValidationAccuracy(10);

            Console.Clear();
            Console.WriteLine(new string('=', 50));
            Console.WriteLine("Accuracy of the model is {0:P}", accuracy);
            model.Export(string.Format(dataFilePath + "model_{0}_accuracy.model", accuracy));

            Console.WriteLine(new string('=', 50));
            Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: sunny rainy sunny)");
            Console.WriteLine(new string('=', 50));
        }
Esempio n. 7
0
        public IActionResult RecommendedPlaces()
        {
            // Load the predifined data for smv algorithm
            var dataFilePath = "./wwwroot/svm/words.csv";
            var dataTable    = DataTable.New.ReadCsv(dataFilePath);
            var data         = dataTable.Rows.Select(row => row["Text"]).ToList();

            // Load classes (-1 or +1)
            var classes = dataTable.Rows.Select(row => double.Parse(row["IsRecommended"]))
                          .ToArray();

            // Get words
            var vocabulary = data.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            // Generate a svm problem
            var problem = CreateProblem(data, classes, vocabulary.ToList());

            // Create and train a smv model
            const int C     = 1;
            var       model = new libsvm.C_SVC(problem, KernelHelper.LinearKernel(), C);

            var _predictionDictionary = new Dictionary <int, string> {
                { -1, "NotRecommended" }, { 1, "Recommended" }
            };

            // Get all reviews
            var reviews = _context.Review.ToList();

            // Get recommended reviews
            foreach (var review in reviews)
            {
                if (review.Content != null)
                {
                    var node       = CreateNode(review.Content, vocabulary);
                    var prediction = model.Predict(node);

                    review.IsRecommended = _predictionDictionary[(int)prediction] == "Recommended";
                }
                else
                {
                    review.IsRecommended = false;
                }
            }

            var recommendedReviews = reviews.Where(p => p.IsRecommended == true);

            foreach (var review in recommendedReviews)
            {
                review.Place    = _context.Place.First(c => c.ID == review.PlaceID);
                review.Comments = _context.Comment.Where(c => c.ReviewID == review.ID).ToList();
            }

            return(View(recommendedReviews.OrderByDescending(p => p.PublishDate)));
        }
Esempio n. 8
0
        static void Main(string[] args)
        {
            // STEP 4: Read the data
            const string  dataFilePath = @"spamdata.csv";
            var           dataTable    = DataTable.New.ReadCsv(dataFilePath);
            List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["IsSpam"])).ToArray();

            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            // If you want you can save this problem with :
            // ProblemHelper.WriteProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem", problem);
            // And then load it again using:
            // var problem = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem");

            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            var accuracy = model.GetCrossValidationAccuracy(10);

            Console.Clear();
            Console.WriteLine(new string('=', 50));
            Console.WriteLine("Accuracy of the model is {0:P}", accuracy);
            model.Export(string.Format(@"model_{0}_accuracy.model", accuracy));

            Console.WriteLine(new string('=', 50));
            Console.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction. (ex: love hate dong)");
            Console.WriteLine(new string('=', 50));

            string userInput;

            //This just takes the predicted value (-1 to 3) and translates to your categorization response

            _predictionDictionary = new Dictionary <int, string> {
                { -2, "Angry" }, { -1, "Sad" }, { 0, "Normal" }, { 1, "Happy" }, { 2, "Love" }
            };


            do
            {
                userInput = Console.ReadLine();
                var newX = TextClassificationProblemBuilder.CreateNode(userInput, vocabulary);

                var predictedY = model.Predict(newX);
                Console.WriteLine("The prediction is {0}  value is {1} ", _predictionDictionary[(int)predictedY], predictedY);
                Console.WriteLine(new string('=', 50));
            } while (userInput != "quit");

            Console.WriteLine("");
        }
Esempio n. 9
0
 public static C_SVC CreateModel()
 {
     try
     {
         var       prob = CreateProblem(X, Y, Vocabulary);
         const int C    = 1;
         return(new C_SVC(prob, KernelHelper.LinearKernel(), C));
     }
     catch (Exception ex)
     {
         return(null);
     }
 }
Esempio n. 10
0
        public void Create_Train_SVMmodel(string path_dataCSV_trainning, double C)
        {
            var           dataTable = DataAccess.DataTable.New.ReadCsv(path_dataCSV_trainning);
            List <string> x         = dataTable.Rows.Select(row => row["text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["class"])).ToArray();

            vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();
            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y.ToArray(), vocabulary.ToList());

            model = new C_SVC(problem, KernelHelper.LinearKernel(), C);
        }
Esempio n. 11
0
        public bool buildSVMCorpus(string filename)
        {
            string trainDataPath = filename + "TrainSVM.txt";

            if (File.Exists(trainDataPath))
            {
                _prob = ProblemHelper.ReadProblem(trainDataPath);
                _test = ProblemHelper.ScaleProblem(_prob);
                svm   = new C_SVC(_test, KernelHelper.LinearKernel(), C);
                ProblemHelper.WriteProblem(filename + "output.txt", _test);
                fileExistance = true;
            }
            return(fileExistance);
        }
Esempio n. 12
0
        public static void Train()
        {
            DataHandler.ImportReviewData(3);
            var x = DataHandler.Reviews.Select(r => r.reviewText);

            double[] y = DataHandler.Reviews.Select(r => r.overall).ToArray();


            var       problemBuilder = new TextClassificationProblemBuilder();
            var       problem        = problemBuilder.CreateProblem(x, y, DataHandler.Vocabulary);
            const int C = 1;

            model = new C_SVC(problem, KernelHelper.LinearKernel(), C);
        }
Esempio n. 13
0
        public void Train()
        {
            SVMDataManager data = new SVMDataManager();

            var problemBuilder = new SVMProblemBuilder();
            var problem        = problemBuilder.CreateProblem(data.RequestText, data.ClassValue, data.Vocabulary.ToList());

            const double C     = 0.5;
            C_SVC        model = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Train is called automatically here

            accuracy = model.GetCrossValidationAccuracy(100);

            model.Export(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"bin\model_{0}_accuracy.model", accuracy))));
            System.IO.File.WriteAllLines(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"bin\model_{0}_vocabulary.txt", accuracy))), data.Vocabulary);
        }
Esempio n. 14
0
        private RestuarantRecomandationByNLP()
        {
            string        dataFilePath = HttpContext.Current.Server.MapPath("~/App_Data/TrainingForIsPositiveAlgo.csv");
            var           dataTable    = DataAccess.DataTable.New.ReadCsv(dataFilePath);
            List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["IsPositive"]))
                         .ToArray();
            vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();
            var       problemBuilder = new TextClassificationProblemBuilder();
            var       problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());
            const int C = 1;

            model = new C_SVC(problem, KernelHelper.LinearKernel(), C);
            _predictionDictionary = new Dictionary <int, string> {
                { -1, "Bad" }, { 1, "Good" }
            };
        }
Esempio n. 15
0
        public bool buildSVMCorpus(string filename)
        {
            string trainDataPath = filename + "SimpleScaledTrainSVM.txt";

            if (File.Exists(trainDataPath))
            {
                _prob         = ProblemHelper.ReadAndScaleProblem(trainDataPath);
                svm           = new C_SVC(_prob, KernelHelper.LinearKernel(), C);
                fileExistance = true;

                var      reader = new StreamReader(File.OpenRead(filename + "MinMax.txt"));
                string[] minMax = reader.ReadLine().Split(',');
                scale.min = Convert.ToDouble(minMax[0]);
                scale.max = Convert.ToDouble(minMax[1]);
            }

            return(fileExistance);
        }
Esempio n. 16
0
        public bool FindMoodMethod(string g)

        {
            string dataFilePath = Server.MapPath("~/MoodCsv/GenreList.txt");


            var dataTable = DataTable.New.ReadCsv(dataFilePath);

            List <string> x = dataTable.Rows.Select(row => row["Genre"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["Mood"])).ToArray();

            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();

            var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            const int C = 1;

            var model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            string GenreId = g;

            Dictionary <int, string> _predictionDictionary = new Dictionary <int, string> {
                { -2, "Scared" }, { -1, "Sad" }, { 1, "Laugh" }, { 2, "Romance" }
            };

            //maybe add do,while here
            //GenreId = movie.with_genres;
            var newX = TextClassificationProblemBuilder.CreateNode(GenreId, vocabulary);

            var predictedY = model.Predict(newX);

            if (predictedY == -2 || predictedY == -1 || predictedY == 1 || predictedY == 2)
            {
                return(true);
            }
            else
            {
                return(false);
            }
            // ViewBag.Mood = _predictionDictionary[-2];
        }
Esempio n. 17
0
        public void C_SVC_should_always_return_the_same_cross_validation_accuracy_when_probability_is_false()
        {
            // Arrange
            var problem = CreateSimpleProblem();
            var model   = new C_SVC(problem, KernelHelper.LinearKernel(), 1);

            // Act
            var results = new double[10];

            for (int i = 0; i < 10; i++)
            {
                results[i] = model.GetCrossValidationAccuracy(10);
            }

            //Assert
            for (int i = 1; i < 10; i++)
            {
                Assert.AreEqual(0.90909090909090906, results[i]);
            }
        }
Esempio n. 18
0
        public ActionResult FindMood(MovieList movie)
        {
            string dataFilePath = Server.MapPath("~/MoodCsv/GenreList.txt");


            var dataTable = DataTable.New.ReadCsv(dataFilePath);

            List <string> x = dataTable.Rows.Select(row => row["Genre"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["Mood"])).ToArray();

            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            var problemBuilder = new TextClassificationProblemBuilder();

            var problem = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            const int C = 1;

            var model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            string GenreId = movie.with_genres;

            Dictionary <int, string> _predictionDictionary = new Dictionary <int, string> {
                { -2, "Scared" }, { -1, "Sad" }, { 1, "Laugh" }, { 2, "Romance" }
            };

            //maybe add do,while here
            //GenreId = movie.with_genres;
            var newX = TextClassificationProblemBuilder.CreateNode(GenreId, vocabulary);

            var predictedY = model.Predict(newX);

            ViewBag.Mood        = _predictionDictionary[(int)predictedY];
            ViewBag.MovieTitle  = movie.title;
            ViewBag.MoviePoster = movie.poster_path;



            return(View());
        }
Esempio n. 19
0
        // Uczenie algorytmu
        public void Train()
        {
            // Pobieranie danych z zestawów do trenowanie algorytmu znajduje się w konstruktorze klasy SVMDataManager ->
            SVMDataManager data = new SVMDataManager();

            // Tworzenie macierzy (wraz z wektorami)
            var problemBuilder = new SVMProblemBuilder();
            var problem        = problemBuilder.CreateMatrix(data.RequestText, data.ClassValue, data.Vocabulary.ToList());

            // Parametrem C dokonywana jest optymalizacja marginesu. Oznacza on wartość straty/kary błędnej klasyfikacji.
            const double C     = 0.5;
            C_SVC        model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            // Dokładność liczona jest procentowo na bazie danych treningowych.
            // Po wyznaczeniu przez algorytm najlepszej dostępnej hiperpłaszczyzny oddzielającej cechy od siebie,
            // przez stworzony model przepuszczane są jeszcze raz dane treningowe i liczony jest odsetek błędnych klasyfikacji na tej podstawie.
            accuracy = model.GetCrossValidationAccuracy(100);

            // Export modelu oraz słownika
            model.Export(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"WAF\model_{0}_accuracy.model", accuracy))));
            System.IO.File.WriteAllLines(string.Format(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, string.Format(@"WAF\model_{0}_vocabulary.txt", accuracy))), data.Vocabulary);
        }
Esempio n. 20
0
        public PartialViewResult GetSuggestion(string userName)
        {
            List <string> x = db.Purchases.OrderBy(p => p.Id).Select(p => p.User.Username).ToList();

            double[] y = db.Purchases.OrderBy(p => p.Id).Select(p => (double)p.Product.Id).ToArray();

            var users          = db.Users.Select(s => s.Username).ToList();
            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y, users.ToList());

            const int C     = 1;
            C_SVC     model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            var newX = TextClassificationProblemBuilder.CreateNode(userName, users);

            var predictedY = model.Predict(newX);
            var prediction = db.Products.Find((int)predictedY);

            ViewBag.Suggestion = prediction;

            return(PartialView("~/Views/Suggestion/Suggestion.cshtml"));
        }
Esempio n. 21
0
        /// <summary>
        /// Constructor that creates object with given training set and testing set
        /// </summary>
        /// <param name="trainingSet">Training set loaded from a file</param>
        /// <param name="testingSet">Testing set loaded from a file</param>
        public SVMClassifier(TrainingSet trainingSet, TestingSet testingSet)
        {
            this.trainingSet = trainingSet;
            vocabulary       = new HashSet <string>();
            x = new List <string>();
            y = new List <double>();

            foreach (Article article in trainingSet.articles.Values) //load data from the training set
            {
                string features = ArticleFeatures(article);
                //add features and special coverages to lists
                AddFeaturesToVocabulary(features);
                x.Add(features);
                y.Add(article.specialCoverage[0]);
            }

            foreach (Article article in testingSet.articles.Values) //load articles with given specialCoverage from the testing set
            {
                if (article.specialCoverage != null)
                {
                    string features = ArticleFeatures(article);
                    //add features and special coverages to lists
                    AddFeaturesToVocabulary(features);
                    x.Add(features);
                    y.Add(article.specialCoverage[0]);
                }
            }

            //create new problem
            ProblemBuilder problemBuilder = new ProblemBuilder();
            var            problem        = problemBuilder.CreateProblem(x, y.ToArray(), vocabulary.ToList());

            //create new model using linear kernel
            const int C = 1; //C parameter for C_SVC

            model = new C_SVC(problem, KernelHelper.LinearKernel(), C);
        }
Esempio n. 22
0
        static void Main(string[] args)
        {
            DataPreparer data = new DataPreparer();

            var problemBuilder = new TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(data.RequestText, data.ClassValue, data.Vocabulary.ToList());

            const double C        = 0.5;
            var          model    = new C_SVC(problem, KernelHelper.LinearKernel(), C); // Train is called automatically here
            var          accuracy = model.GetCrossValidationAccuracy(100);

            Console.Clear();
            Console.WriteLine(new string('=', 50));
            Console.WriteLine("Accuracy of the model is {0:P}", accuracy);

            model.Export(string.Format(@"C:\Users\kramek\Desktop\AIC#\model_{0}_accuracy.model", accuracy));

            Console.WriteLine(new string('=', 50));
            Console.WriteLine("The Model is ready. \r\nEnter a request to check:");
            Console.WriteLine(new string('=', 50));

            string userInput;

            do
            {
                userInput = Console.ReadLine(); // SeparateNonAlphanumeric(Console.ReadLine());//
                var newX = TextClassificationProblemBuilder.CreateNode(userInput, data.Vocabulary);
                //var predictedYProb = model.PredictProbabilities(newX);
                var predictedY = model.Predict(newX);

                Console.WriteLine("The prediction is {0}", _predictionDictionary[(int)predictedY]);
                Console.WriteLine(new string('=', 50));
            } while (userInput != "exit");

            Console.WriteLine("");
        }
Esempio n. 23
0
        static void Main(string[] args)
        {
            if (!System.Console.IsOutputRedirected)
            {
                System.Console.Clear();
            }

            CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US");

            System.Console.WriteLine("Sentiment Analysis");
            System.Console.WriteLine("======================\n");

            // load data
            System.Console.WriteLine("Loading data....");
            string fileContent = ReadDataFile(".\\data\\wikipedia-detox-250-line-data.tsv");

            // preprocess file
            System.Console.WriteLine("Processing data....");
            string[,] processedComments = ProcessComments(fileContent);
            System.Console.WriteLine($"Data file contains {processedComments.GetLength(0)} comments\n");
            // for(int i = 0; i < 3; i++)
            // {
            //     System.Console.WriteLine($"{processedComments[i, 0]}\t{processedComments[i, 1]}");
            // }
            // System.Console.WriteLine("...\n");

            // generate the vocabulary list
            System.Console.WriteLine("Generating Vocabulary List....");
            string[] vocab = GenerateVocabulary(processedComments);
            System.Console.WriteLine($"Vocabulary generated with {vocab.Length} words\n");

            // get labels from preprocessed comments
            System.Console.WriteLine("Retrieving labels...");
            Vector <double> Y = GetLables(processedComments);

            //System.Console.WriteLine(Y);

            // extract features from processed comments and vocabulary
            System.Console.WriteLine("Extracting features...");
            Matrix <double> X = GetFeatures(processedComments, vocab);

            //System.Console.WriteLine(X);

            // split the data into train and test in ratio 80:20
            System.Console.WriteLine("Splitting data...");
            int m           = X.RowCount;
            int n           = X.ColumnCount;
            int testsetSize = m * 20 / 100;

            Vector <double> testLabel    = Y.SubVector(0, testsetSize);
            Matrix <double> testFeatures = X.SubMatrix(0, testsetSize, 0, n);

            Vector <double> trainingLabel    = Y.SubVector(testsetSize, m - testsetSize);
            Matrix <double> trainingFeatures = X.SubMatrix(testsetSize, m - testsetSize, 0, n);

            System.Console.WriteLine();
            System.Console.WriteLine($"Test set: {testLabel.Count}");
            System.Console.WriteLine($"Training set: {trainingLabel.Count}");

            // trainiong SVM
            System.Console.WriteLine("\nTraining linear SVM ...\n");

            // SVM parameters
            double C            = .4;
            var    linearKernel = KernelHelper.LinearKernel();

            List <List <double> > libSvmData = ConvertToLibSvmFormat(trainingFeatures, trainingLabel);
            svm_problem           prob       = ProblemHelper.ReadProblem(libSvmData);
            var svc = new C_SVC(prob, linearKernel, C);

            System.Console.WriteLine();

            // accuacy on training set
            Vector <double> prediction = SvmPredic(trainingFeatures, svc);
            double          accuracy   = CalculateAccuracy(prediction, trainingLabel);

            System.Console.WriteLine("Training set Accuracy: {0:f2}%\n", accuracy);


            // accuacy on test set
            prediction = SvmPredic(testFeatures, svc);
            accuracy   = CalculateAccuracy(prediction, testLabel);
            System.Console.WriteLine("Test set Accuracy: {0:f2}%\n", accuracy);

            // F1 score
            double f1Score = CalculateF1Score(prediction, testLabel);

            System.Console.WriteLine("F1 Score on test set: {0:f2}%\n", f1Score * 100);

            //Pause();
        }
Esempio n. 24
0
        public void Train()
        {
            var problem = problemBuilder.CreateProblem(dataSet.TrainData, dataSet.Vocabulary);

            model = new C_SVC(problem, KernelHelper.LinearKernel(), c, probability: true);
        }
Esempio n. 25
0
        static void Main(string[] args)
        {
            List <double[]> continuousTrainData = DataWrangler.LoadContinuousDataAsync(TrainingCsv, _indexToIgnore).Result;
            List <double[]> continuousTestData  = DataWrangler.LoadContinuousDataAsync(TestingCsv, _indexToIgnore).Result;

            // Print continuous columns for calculating elbows in external tool(https://bl.ocks.org/rpgove/0060ff3b656618e9136b)
            foreach (int i in _continuousIndexes)
            {
                using (StreamWriter sw = new StreamWriter($"{i}.txt"))
                {
                    sw.WriteLine(string.Join(",", continuousTrainData.Select(array => array[i])));
                }
            }

            // Convert continuous to discrete
            Dictionary <int, GaussianClusterCollection> indexClusterMapping = DataWrangler.GetIndexClustersMap(continuousTrainData, _indexElbowMap);
            List <int[]> discreteTrainData = DataWrangler.ConvertContinuesToDiscrete(continuousTrainData, indexClusterMapping);
            List <int[]> discreteTestData  = DataWrangler.ConvertContinuesToDiscrete(continuousTestData, indexClusterMapping);

            var problem = ProblemHelper.ReadProblem(discreteTrainData.Select(arr =>
            {
                // Move class to front as it is expected by libsvm.
                int temp = arr[0];
                arr[SVMSupportedClassIndex] = arr[OriginalClassIndex];
                arr[OriginalClassIndex]     = temp;
                return(arr.Select(i => (double)i).ToList());
            }).ToList());

            var test = ProblemHelper.ReadProblem(discreteTestData.Select(arr =>
            {
                // Move class to front as it is expected by libsvm.
                int temp = arr[0];
                arr[SVMSupportedClassIndex] = arr[OriginalClassIndex];
                arr[OriginalClassIndex]     = temp;
                return(arr.Select(i => (double)i).ToList());
            }).ToList());

            // defaults taken from documentation http://weka.sourceforge.net/doc.stable/weka/classifiers/functions/LibSVM.html
            double c      = 1;               // default C is 1
            double gamma  = 1.0 / problem.l; // default gamma is 1/k
            double r      = 0;               // default coef0 is 0
            int    degree = 3;               // default degree is 3

            Dictionary <string, Kernel> nameKernelMap = new Dictionary <string, Kernel>(StringComparer.OrdinalIgnoreCase)
            {
                { "Linear", KernelHelper.LinearKernel() },
                { "Polynomial", KernelHelper.PolynomialKernel(degree, gamma, r) },
                { "Radial", KernelHelper.RadialBasisFunctionKernel(gamma) },
                { "Sigmoid", KernelHelper.SigmoidKernel(gamma, r) },
            };

            // Get accuracies for base comparison
            // DON'T DO PARALLEL. We don't know if the underlying implementation is MT safe or not.
            //Parallel.ForEach(nameKernelMap.Keys, (kernelName) =>
            foreach (string kernelName in nameKernelMap.Keys)
            {
                Console.WriteLine($"{kernelName}: {GetSVMAccuracy(problem, test, nameKernelMap[kernelName], c)}");
            }
            ;

            // Get accuracy of with Naive Bayes
            double[]             classWeightPrior      = new[] { 1.0, 1.0 };
            double[]             classPriorProbability = new[] { 0.5, 0.5 };
            NaiveBayesClassifier naiveBayes            = NaiveBayesClassifier.Load(discreteTrainData, SVMSupportedClassIndex, classWeightPrior, classPriorProbability);

            Console.WriteLine($"Naive Bayes: {naiveBayes.GetPredictionAccuracy(discreteTestData, SVMSupportedClassIndex)}");

            // Calculate SVMs Bias and Variance
            List <List <int[]> > samples = Sampler.SampleData(discreteTrainData, BiasVarianceNumOfSamples);

            ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > > kernelInstanceClassifierPredictionsMappings = new ConcurrentDictionary <string, ConcurrentDictionary <int, ConcurrentDictionary <int, int> > >(StringComparer.OrdinalIgnoreCase);

            foreach (string kernelName in nameKernelMap.Keys)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                for (int classifierIndex = 0; classifierIndex < BiasVarianceNumOfSamples; classifierIndex++)
                {
                    problem = ProblemHelper.ReadProblem(samples[classifierIndex].Select(arr => arr.Select(i => (double)i).ToList()).ToList());

                    var svm = new C_SVC(problem, nameKernelMap[kernelName], c);

                    for (int instanceIndex = 0; instanceIndex < discreteTestData.Count; instanceIndex++)
                    {
                        ConcurrentDictionary <int, int> classifierPredictionMappings = instanceClassifierPredictionMappings.GetOrAdd(instanceIndex, new ConcurrentDictionary <int, int>());
                        test = ProblemHelper.ReadProblem(new List <List <double> > {
                            discreteTestData[instanceIndex].Select(i => (double)i).ToList()
                        });

                        for (int i = 0; i < test.l; i++)
                        {
                            var x = test.x[i];
                            var y = test.y[i];
                            classifierPredictionMappings.GetOrAdd(classifierIndex, (int)svm.Predict(x));
                        }
                    }
                }
            }

            Console.WriteLine("Kernel, Bias, Variance, Accuracy");
            foreach (string kernelName in nameKernelMap.Keys)
            {
                ConcurrentDictionary <int, ConcurrentDictionary <int, int> > instanceClassifierPredictionMappings = kernelInstanceClassifierPredictionsMappings.GetOrAdd(kernelName, new ConcurrentDictionary <int, ConcurrentDictionary <int, int> >());
                Tuple <double, double, double> biasVarianceAccuracy = BiasVarianceHelper.GetBiasVarianceAccuracy(discreteTestData, SVMSupportedClassIndex, instanceClassifierPredictionMappings);
                Console.WriteLine($"{kernelName}, {biasVarianceAccuracy.Item1}, {biasVarianceAccuracy.Item2}, {biasVarianceAccuracy.Item3}");
            }

            Console.WriteLine("Press ENTER to continue...");
            Console.ReadLine();
        }
Esempio n. 26
0
        public Dictionary <int, double> PredictByText(string input)
        {
            // STEP 4: Read the data

            string        dataFilePath = System.Web.HttpContext.Current.Server.MapPath("~/Data/data_train.csv");
            var           dataTable    = DataAccess.DataTable.New.ReadCsv(dataFilePath);
            List <string> x            = dataTable.Rows.Select(row => row["Text"]).ToList();

            double[] y = dataTable.Rows.Select(row => double.Parse(row["Type"])).ToArray();

            var vocabulary = x.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            Console.WriteLine("Creating problem");
            var problemBuilder = new DataPreprocess.TextClassificationProblemBuilder();
            var problem        = problemBuilder.CreateProblem(x, y, vocabulary.ToList());

            //        // If you want you can save this problem with :
            //        //ProblemHelper.WriteProblem(@"C:\Users\", problem);
            //        // And then load it again using:
            //        //var problem2 = ProblemHelper.ReadProblem(@"D:\MACHINE_LEARNING\SVM\Tutorial\sunnyData.problem");

            System.Diagnostics.Debug.WriteLine("Creating model");
            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C, 100, true);

            var accuracy = model.GetCrossValidationAccuracy(10);

            System.Diagnostics.Debug.WriteLine(new string('=', 50));
            System.Diagnostics.Debug.WriteLine("Accuracy of the model is {0:P}", accuracy);
            model.Export(string.Format(@"model_{0}_accuracy.model", accuracy));

            System.Diagnostics.Debug.WriteLine(new string('=', 50));
            System.Diagnostics.Debug.WriteLine("The model is trained. \r\nEnter a sentence to make a prediction.");
            System.Diagnostics.Debug.WriteLine(new string('=', 50));

            _predictionDictionary = new Dictionary <int, string> {
                { 1, "ID" }, { 2, "Documents" }, { 3, "Forme" }
            };

            int    numOFWords    = 0;
            string processedText = TextPreprocessorService.parseJSONText(input);

            processedText = TextPreprocessorService.ProcessText(ref processedText);
            Dictionary <int, double> dict = new Dictionary <int, double>()
            {
                { 1, 0 }, { 2, 0 }, { 3, 0 }
            };

            if (processedText.Equals(""))
            {
                return(dict);
            }

            var newX       = TextClassificationProblemBuilder.CreateNode(processedText, vocabulary);
            var predictedY = model.Predict(newX);

            System.Diagnostics.Debug.WriteLine(predictedY);

            dict = model.PredictProbabilities(newX);
            System.Diagnostics.Debug.WriteLine("Prob(1): " + dict[1]);
            System.Diagnostics.Debug.WriteLine("Prob(2): " + dict[2]);
            System.Diagnostics.Debug.WriteLine("Prob(3): " + dict[3]);

            System.Diagnostics.Debug.WriteLine("The prediction is {0}  value is {1} ", _predictionDictionary[(int)predictedY], predictedY);

            return(dict);
        }
Esempio n. 27
0
        static void Main(string[] args)
        {
            bool   kernelparam     = false;
            bool   properformat    = false;
            bool   needsFormatting = false;
            bool   done            = false;
            int    vectorlength; // number of features
            int    kernelchoice; // integer representation of selected kernel
            int    numberofArgs = args.Length;
            string inputmatrix, savefilename, labelfile;
            string path = Directory.GetCurrentDirectory();
            string save_model_name;
            string kerneltype;
            string testfile;

            /* SVM specific initializations
             */
            int degree = 3; // default for none specified
            int r      = 1;
            // C and gamma come from using grid.py on the training set resume.mat 982 x 7768
            double C     = 2.0;
            double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF)
            C_SVC  svm;                 // setup the default variable for the SVM

            if (numberofArgs < 1)
            {
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            } // Exit if no params passed on the command line

            /* At least one command line parameter we can continue, but it can't be an int.
             * so check for that next.
             */
            if (numberofArgs == 1 && Int32.TryParse(args[0], out kernelchoice))
            {
                Console.WriteLine(MyStrings.usage); // single paramater can't be int
                System.Environment.Exit(1);
            }
            else // Assume file name and check if it needs formatting, if not we are good to train and save the model
            {
                kernelparam     = false;
                properformat    = HelperFunctions.CheckFormat(args[0]);
                inputmatrix     = args[0];
                savefilename    = inputmatrix.Replace(".mat", ".svm"); // update the suffix
                svm             = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                save_model_name = savefilename.Replace(".svm", ".model");
                svm.Export(save_model_name);
                done = true;
            }

            if (numberofArgs >= 1)
            {
                if (Int32.TryParse(args[0], out kernelchoice))
                {
                    kernelparam = true;

                    switch (numberofArgs)
                    {
                    case 2:
                        needsFormatting = HelperFunctions.CheckFormat(args[1]);
                        inputmatrix     = args[1];
                        if (needsFormatting)
                        {
                            Console.WriteLine("Missing label file");
                            System.Environment.Exit(1);
                        }
                        break;

                    case 3:
                        needsFormatting = HelperFunctions.CheckFormat(args[1]);
                        inputmatrix     = args[1];
                        labelfile       = args[2];
                        break;

                    case 4:
                        needsFormatting = HelperFunctions.CheckFormat(args[1]);
                        inputmatrix     = args[1];
                        labelfile       = args[2];
                        testfile        = args[3];
                        break;

                    default:

                        Console.WriteLine("too many parameters");
                        Console.WriteLine(MyStrings.usage);
                        System.Environment.Exit(1);
                        break;
                    }
                }
            }
            savefilename = inputmatrix.Replace(".mat", ".svm"); // update the suffix
            if (!done && needsFormatting && args.Length >= 2)
            {
                inputmatrix  = args[1];
                labelfile    = args[2];
                vectorlength = HelperFunctions.VectorLength(inputmatrix);            // Get the number of features
                string[] labels = new string[HelperFunctions.SampleSize(labelfile)]; // Calculate the number of labels and use to create storage

                /* if the input matrix is not already in the correct format Call reformat function
                 * result is that a file is written that is the LIBSVM format, expects the
                 * labels to be in a separate file
                 *
                 * Reformatdata(string[] data, string labels, string fname)
                 *
                 */

                HelperFunctions.Reformatdata(inputmatrix, labels, savefilename, vectorlength);
            }


            // Train the SVM

            /* "." means every 1,000 iterations (or every #data iterations is your #data is less than 1,000).
             *  "*" means that after iterations of using a smaller shrunk problem, we reset to use the whole set. */
            /*  optimization finished, #iter = 219
             *  nu = 0.431030
             *  obj = -100.877286, rho = 0.424632
             *  nSV = 132, nBSV = 107
             *  Total nSV = 132
             *  obj is the optimal objective value of the dual SVM problem. rho is the bias term in the decision
             *  function sgn(w^Tx - rho). nSV and nBSV are number of support vectors and bounded support vectors
             *  (i.e., alpha_i = C). nu-svm is a somewhat equivalent form of C-SVM where C is replaced by nu.
             *  nu simply shows the corresponding parameter.
             */

            /* if a kernel is specified on the command line, then select the corresponding kernel for training the SVM as follows
             * 0 = linear
             * 1 = polynomial
             * 2 = RBF
             * 3 = sigmoind
             * 4 = precomputed
             */

            // 7/23/19 fix up save file name, kernelchoice does not seem to be in the rigth place, also logic flow thru above switch and if statements needs some review

            Int32.TryParse(args[0], out kernelchoice);


            if (kernelparam)
            {
                int caseSwitch = kernelchoice;
                switch (caseSwitch)
                {
                case 0:
                    svm        = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                    kerneltype = "Linear";
                    break;

                case 1:
                    svm        = new C_SVC(savefilename, KernelHelper.PolynomialKernel(degree, gamma, r), C);
                    kerneltype = "Polynomial";
                    break;

                case 2:
                    svm        = new C_SVC(savefilename, KernelHelper.RadialBasisFunctionKernel(gamma), C);
                    kerneltype = "RBF";
                    break;

                default:
                    svm        = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                    kerneltype = "Linear";
                    break;
                }
            }
            else
            {
                svm        = new C_SVC(savefilename, KernelHelper.LinearKernel(), C);
                kerneltype = "Linear";
            }

            // For RBF kernel, linear kernel would be KernelHelper.LinearKernel
            //
            // var accuracy = svm.GetCrossValidationAccuracy(5);
            save_model_name = savefilename.Replace(".svm", ".model");
            svm.Export(save_model_name);

            /*
             * ********** Stoppted here for checking file input formats
             */


            //double accuracy = svm.Predict(testfile);
            //Console.WriteLine(MyStrings.Accuracy, accuracy * 100);
            Console.WriteLine("SVM kernel type {0}", kerneltype);
        }
Esempio n. 28
0
        static void Main(string[] args)
        {
            if (!System.Console.IsOutputRedirected)
            {
                System.Console.Clear();
            }

            CultureInfo.CurrentCulture = CultureInfo.CreateSpecificCulture("en-US");

            var M = Matrix <double> .Build;
            var V = Vector <double> .Build;


            //// =============== Part 1: Loading and Visualizing Data ================
            //  We start the exercise by first loading and visualizing the dataset.
            //  The following code will load the dataset into your environment and plot
            //  the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data1:
            // You will have X, y in your environment
            Dictionary <string, Matrix <double> > ms = MatlabReader.ReadAll <double>("data\\ex6data1.mat");

            Matrix <double> X = ms["X"];                 // 51 X 2
            Vector <double> y = ms["y"].Column(0);       // 51 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            Pause();

            //// ==================== Part 2: Training Linear SVM ====================
            //  The following code will train a linear SVM on the dataset and plot the
            //  decision boundary learned.
            //

            System.Console.WriteLine("\nTraining Linear SVM ...\n");

            // You should try to change the C value below and see how the decision
            // boundary varies (e.g., try C = 1000)
            double C            = 1.0;
            var    linearKernel = KernelHelper.LinearKernel();

            List <List <double> > libSvmData = ConvertToLibSvmFormat(X, y);
            svm_problem           prob       = ProblemHelper.ReadProblem(libSvmData);
            var svc = new C_SVC(prob, linearKernel, C);

            PlotBoundary(X, svc);
            GnuPlot.HoldOff();

            System.Console.WriteLine();

            Pause();

            //// =============== Part 3: Implementing Gaussian Kernel ===============
            //  You will now implement the Gaussian kernel to use
            //  with the SVM. You should complete the code in gaussianKernel.m
            //

            System.Console.WriteLine("\nEvaluating the Gaussian Kernel ...\n");

            double sigma = 2.0;
            double sim   = GaussianKernel(
                V.DenseOfArray(new [] { 1.0, 2, 1 }),
                V.DenseOfArray(new [] { 0.0, 4, -1 }),
                sigma
                );

            System.Console.WriteLine("Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {0:f6} :\n\t{1:f6}\n(for sigma = 2, this value should be about 0.324652)\n", sigma, sim);

            Pause();

            //// =============== Part 4: Visualizing Dataset 2 ================
            //  The following code will load the next dataset into your environment and
            //  plot the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data2:
            // You will have X, y in your environment
            ms = MatlabReader.ReadAll <double>("data\\ex6data2.mat");

            X = ms["X"];                 // 863 X 2
            y = ms["y"].Column(0);       // 863 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            Pause();

            //// ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
            //  After you have implemented the kernel, we can now use it to train the
            //  SVM classifier.
            //

            System.Console.WriteLine("\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n");

            // SVM Parameters
            C     = 1;
            sigma = 0.1;
            double gamma = 1 / (2 * sigma * sigma);

            var rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

            libSvmData = ConvertToLibSvmFormat(X, y);
            prob       = ProblemHelper.ReadProblem(libSvmData);
            svc        = new C_SVC(prob, rbfKernel, C);


            PlotBoundary(X, svc);
            GnuPlot.HoldOff();

            Pause();

            double acc = svc.GetCrossValidationAccuracy(10);

            System.Console.WriteLine("\nCross Validation Accuracy: {0:f6}\n", acc);

            Pause();

            //// =============== Part 6: Visualizing Dataset 3 ================
            //  The following code will load the next dataset into your environment and
            //  plot the data.
            //

            System.Console.WriteLine("Loading and Visualizing Data ...\n");

            // Load from ex6data2:
            // You will have X, y in your environment
            ms = MatlabReader.ReadAll <double>("data\\ex6data3.mat");

            Matrix <double> Xval;
            Vector <double> yval;

            X    = ms["X"];              // 211 X 2
            y    = ms["y"].Column(0);    // 211 X 1
            Xval = ms["Xval"];           // 200 X 2
            yval = ms["yval"].Column(0); // 200 X 1

            // Plot training data
            GnuPlot.HoldOn();
            PlotData(X, y);

            //// ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========

            //  This is a different dataset that you can use to experiment with. Try
            //  different values of C and sigma here.
            //


            (C, sigma) = Dataset3Params(X, y, Xval, yval);

            gamma     = 1 / (2 * sigma * sigma);
            rbfKernel = KernelHelper.RadialBasisFunctionKernel(gamma);

            libSvmData = ConvertToLibSvmFormat(X, y);
            prob       = ProblemHelper.ReadProblem(libSvmData);
            svc        = new C_SVC(prob, rbfKernel, C);

            PlotBoundary(X, svc);

            GnuPlot.HoldOff();
            Pause();
        }
Esempio n. 29
0
        static void Main(string[] args)
        {
            bool   kernelparam  = false;
            int    numberofArgs = args.Length;
            string inputmatrix;
            string path = Directory.GetCurrentDirectory();
            string save_model_name;
            string kerneltype;
            string testfile;

            /* SVM specific initializations
             */
            int degree = 3; // default for none specified
            int r      = 1;
            // C and gamma come from using grid.py on the training set resume.mat 982 x 7768
            double C     = 2.0;
            double gamma = 0.001953125; // used for Radial Basis Function Kernel (RBF)
            C_SVC  svm;                 // setup the default variable for the SVM

            /*
             * Three parameters are required, kernel selection, training file and test file
             */

            if (args.Length != 3)
            {
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            }

            if (kernelparam = Int32.TryParse(args[0], out int kernelchoice) && kernelchoice <= 3)
            {
                //Legal value for kernelchoice are 0-3
                //kernelchoice = 1;
            }
            else
            {
                // Not a legal kernel selection
                Console.WriteLine(MyStrings.usage);
                System.Environment.Exit(1);
            }
            inputmatrix = args[1];
            testfile    = args[2];
            if (!HelperFunctions.CheckFormat(inputmatrix))
            {
                Console.WriteLine(MyStrings.TrainingFileFormat, inputmatrix);
                System.Environment.Exit(1);
            }
            if (!File.Exists(testfile))
            {
                Console.WriteLine(MyStrings.File_error, inputmatrix);
                System.Environment.Exit(1);
            }

            // Train the SVM

            switch (kernelchoice)
            {
            case 0:
                svm        = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C);
                kerneltype = MyStrings.Linear;
                break;

            case 1:
                svm        = new C_SVC(inputmatrix, KernelHelper.PolynomialKernel(degree, gamma, r), C);
                kerneltype = MyStrings.Polynomial;
                break;

            case 2:
                svm        = new C_SVC(inputmatrix, KernelHelper.RadialBasisFunctionKernel(gamma), C);
                kerneltype = MyStrings.RBF;
                break;

            case 3:
                svm        = new C_SVC(inputmatrix, KernelHelper.SigmoidKernel(gamma, r), C);
                kerneltype = MyStrings.Sigmoid;
                break;

            default:
                svm        = new C_SVC(inputmatrix, KernelHelper.LinearKernel(), C);
                kerneltype = MyStrings.Linear;
                break;
            }

            // var accuracy = svm.GetCrossValidationAccuracy(5);
            save_model_name = String.Concat(inputmatrix, ".model");
            svm.Export(save_model_name);
            var    predfile = ProblemHelper.ReadProblem(testfile);
            double result   = HelperFunctions.PredictTestSet(testfile, svm);

            Console.WriteLine(MyStrings.Accuracy, Math.Round(result * 100, 2));
            Console.Write("SVM kernel type {0}      ", kerneltype);
            Console.WriteLine(MyStrings.Parameters, C, gamma, degree, r);
        }
Esempio n. 30
0
        public static void ClassifyBySVM(string trainFile, string testFile, string testTarget)
        {
            string testResultFile = "../../svm/testResult.txt";

            var watch = System.Diagnostics.Stopwatch.StartNew();
            //STEP 1 : READ DATA
            List <Vector> vectorsTrain = new List <Vector>();
            var           content      = FileIO.ReadFileIntoVector(trainFile, out vectorsTrain, true);
            var           typeClass    = Vector.GetDistinctClassTypes(vectorsTrain);

            //Get content of document and lable of document
            double[] label = GetLableOfDocument(vectorsTrain);

            //Get features list
            var features = content.SelectMany(GetWords).Distinct().OrderBy(word => word).ToList();

            //STEP 2: Generate a problem
            var problem = TextClassificationProblemBuilder.CreateProblem(content, label, features.ToList());

            //STEP 3: Create and train a SVM model
            const int C     = 1;
            var       model = new C_SVC(problem, KernelHelper.LinearKernel(), C);

            //STEP 4: Predict
            List <string> test       = FileIO.ReadFile(testFile);
            List <string> resultList = new List <string>();

            _predictionDictionary = new Dictionary <int, string>();
            List <Vector> targetVector = new List <Vector>();

            FileIO.ReadFileIntoVector(testTarget, out targetVector, true);

            for (int l = 0; l < typeClass.Count(); l++)
            {
                _predictionDictionary.Add(l, typeClass.ElementAt(l));
            }

            for (int i = 0; i < test.Count(); i++)
            {
                var newX       = TextClassificationProblemBuilder.CreateNode(test[i], features);
                var predictedY = model.Predict(newX);
                var result     = _predictionDictionary[(int)predictedY];
                resultList.Add(result + " - " + test[i]);
            }
            FileIO.WriteFile(resultList, testResultFile);

            List <Vector> sourceVector = new List <Vector>();

            FileIO.ReadFileIntoVector(testResultFile, out sourceVector, true);

            double score = 0;

            for (int i = 0; i < typeClass.Count(); i++)
            {
                score = 1.0 * Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector) / Vector.CountClassElements(typeClass.ElementAt(i), targetVector);
                Console.WriteLine("correct label: " + Vector.CountShareSameTypeRecords(typeClass.ElementAt(i), sourceVector, targetVector));
                Console.WriteLine("total label: " + Vector.CountClassElements(typeClass.ElementAt(i), targetVector));
                Console.WriteLine("SVM score: " + score);
            }

            Console.WriteLine("The time for SVM: {0} ", watch.ElapsedMilliseconds);
        }