public async Task Classify()
        {
            var         problemFactory = new ProblemFactory(threeClassDataset);
            SvmTraining training       = new SvmTraining(problemFactory, threeClassDataset);
            var         parameters     = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None);
            var         results        = await training.Train(parameters).ConfigureAwait(false);

            results.Model.Write(Path.Combine(TestContext.CurrentContext.TestDirectory, "classify.dat"));
            var testSet = ArffDataSet.Create <PositivityType>("Test");

            testSet.UseTotal = true;

            var positive = testSet.AddDocument();

            positive.AddRecord("Good");

            var negative = testSet.AddDocument();

            negative.AddRecord("Bad");

            SvmTesting testing = new SvmTesting(results.Model, problemFactory);

            testing.Classify(testSet);
            Assert.AreEqual(PositivityType.Positive, positive.Class.Value);
            Assert.AreEqual(PositivityType.Negative, negative.Class.Value);
        }
Beispiel #2
0
        public async Task <TrainingResults> Train(DocumentSet documentSet, TrainingHeader header, CancellationToken token)
        {
            Guard.NotNull(() => documentSet, documentSet);
            Guard.NotNull(() => header, header);
            Guard.NotNull(() => documentSet.Document, documentSet.Document);
            log.Debug("Train with {0} - {1}", documentSet.Document.Length, header);
            using (PerformanceTrace.Info(log, "Training..."))
            {
                var arff = CreateDataset(documentSet, token);
                if (arff == null)
                {
                    throw new LearningException("Not enough documents to learn patterns");
                }

                arff.CompactHeader(3);
                arff.CompactReviews(3);
                arff.CompactClass(3);

                if (arff.TotalDocuments < 10)
                {
                    throw new LearningException("Not enough documents to learn patterns");
                }

                token.ThrowIfCancellationRequested();
                arff.Normalize(header.Normalization);
                SvmTrainClient train = new SvmTrainClient(arff);
                var            model = await train.Train(header, token).ConfigureAwait(false);

                return(model);
            }
        }
        public IParameterSelection SelectParameters(TrainingHeader header, CancellationToken token)
        {
            log.Info("Selecting parameters...");
            if (dataSet.TotalDocuments == 0)
            {
                log.Error("No document found");
                return(null);
            }

            var scheduler = new ConcurrentExclusiveSchedulerPair(TaskScheduler.Default, Environment.ProcessorCount / 2)
                            .ConcurrentScheduler;
            var taskFactory = new TaskFactory(
                token,
                TaskCreationOptions.LongRunning,
                TaskContinuationOptions.LongRunning,
                scheduler);

            // https://www.quora.com/Support-Vector-Machines/SVM-performance-depends-on-scaling-and-normalization-Is-this-considered-a-drawback
            header.Normalization = dataSet.Normalization;

            ParametersSelectionFactory factory = new ParametersSelectionFactory(taskFactory, problemFactory);
            var selection = factory.Create(header, dataSet);

            return(selection);
        }
        public void CreateNull()
        {
            TrainingHeader header = TrainingHeader.CreateDefault();

            Assert.Throws <ArgumentNullException>(() => factory.Create(null, dataSet));
            Assert.Throws <ArgumentNullException>(() => factory.Create(header, null));
            header.GridSelection = false;
            var result = factory.Create(header, ArffDataSet.CreateSimple("Test"));

            Assert.IsInstanceOf <NullParameterSelection>(result);
        }
        public void Setup()
        {
            var header = TrainingHeader.CreateDefault();

            parameters             = new Parameter();
            parameters.KernelType  = header.Kernel;
            parameters.CacheSize   = 200;
            parameters.SvmType     = header.SvmType;
            parameters.Probability = false;
            model = new TrainingModel(header);
        }
Beispiel #6
0
        public void Construct()
        {
            var arff   = ArffDataSet.CreateSimple("Test");
            var header = TrainingHeader.CreateDefault();
            var model  = new Model();

            Assert.Throws <ArgumentNullException>(() => new TrainingResults(null, header, arff));
            Assert.Throws <ArgumentNullException>(() => new TrainingResults(model, null, arff));
            Assert.Throws <ArgumentNullException>(() => new TrainingResults(model, header, null));
            var instance = new TrainingResults(model, header, arff);

            Assert.IsNotNull(instance.Header);
            Assert.IsNotNull(instance.Model);
        }
Beispiel #7
0
        public IParameterSelection Create(TrainingHeader header, IArffDataSet dataset)
        {
            Guard.NotNull(() => header, header);
            Guard.NotNull(() => dataset, dataset);
            Parameter defaultParameter = new Parameter();

            defaultParameter.KernelType = header.Kernel;
            defaultParameter.CacheSize  = 200;
            defaultParameter.SvmType    = header.SvmType;
            var model = new TrainingModel(header);

            if (!header.GridSelection)
            {
                return(new NullParameterSelection(defaultParameter, model));
            }

            GridSearchParameters searchParameters;

            logger.Info("Investigate LibLinear");
            if (header.Kernel == KernelType.Linear)
            {
                var gamma = GetList(1, 1, 1);
                if (dataset.Header.Total > (dataset.TotalDocuments * 10))
                {
                    logger.Info("Selecting Linear features >> instances");
                    defaultParameter.Shrinking = false;
                }
                else
                {
                    logger.Warn("Investigate LibLinear");
                }

                var training = problemFactory.Construct(dataset).GetProblem();
                defaultParameter.Weights = WeightCalculation.GetWeights(training.Y);
                foreach (var classItem in defaultParameter.Weights)
                {
                    logger.Info($"Using class [{classItem.Key}] with weight [{classItem.Value}]");
                }

                searchParameters = new GridSearchParameters(3, GetList(-1, 2, 1), gamma, defaultParameter);
            }
            else
            {
                searchParameters = new GridSearchParameters(3, GetList(-5, 15, 2), GetList(-15, 3, 2), defaultParameter);
            }

            return(new GridParameterSelection(taskFactory, model, searchParameters));
        }
        public async Task TestTwoClass()
        {
            var         problemFactory = new ProblemFactory(twoClassDataset);
            SvmTraining training       = new SvmTraining(problemFactory, twoClassDataset);
            var         parameters     = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None);
            var         results        = await training.Train(parameters).ConfigureAwait(false);

            var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data.arff");

            threeClassDataset.Save(file);
            var        testFile = ArffDataSet.LoadSimple(file);
            SvmTesting testing  = new SvmTesting(results.Model, problemFactory);
            var        result   = testing.Test(testFile, Path.Combine(TestContext.CurrentContext.TestDirectory, "."));

            Assert.AreEqual(1, result);
        }
        public void Setup()
        {
            var dataSet = ArffDataSet.CreateSimple("Test");
            var model   = new Model();

            model.NumberOfClasses           = 2;
            model.ClassLabels               = null;
            model.NumberOfSVPerClass        = null;
            model.PairwiseProbabilityA      = null;
            model.PairwiseProbabilityB      = null;
            model.SupportVectorCoefficients = new double[1][];
            model.Rho       = new double[1];
            model.Rho[0]    = 0;
            model.Parameter = new Parameter();
            instance        = new TrainingResults(model, TrainingHeader.CreateDefault(), dataSet);
        }
Beispiel #10
0
        public async Task Learn()
        {
            FileManager manager = new FileManager(
                new DocumentParser(Global.TextSplitter, new DevExpressParserFactory(20)),
                new CancellationToken(),
                4);
            var set = await manager.LoadAll(new DirectoryInfo(@"location")).ConfigureAwait(false);

            TrainingManager training = new TrainingManager();
            var             header   = TrainingHeader.CreateDefault();

            header.Normalization = NormalizationType.L2;
            var trainingTask = training.Train(set, header, CancellationToken.None);
            var model        = await trainingTask.ConfigureAwait(false);

            Assert.LessOrEqual(0.80, Math.Round(model.Model.Parameter.Performance, 2));
        }
        public async Task TestMultiClass()
        {
            var dataSet = ArffDataSet.CreateSimple("Test");

            dataSet.Header.RegisterNominalClass("One", "Two", "Three");
            dataSet.UseTotal = true;
            for (int i = 0; i < 20; i++)
            {
                var one = dataSet.AddDocument();
                one.Class.Value = "One";
                one.AddRecord("Good");

                var two = dataSet.AddDocument();
                two.Class.Value = "Two";
                two.AddRecord("Bad");

                var three = dataSet.AddDocument();
                three.Class.Value = "Three";
                three.AddRecord("Some");
            }

            var         problemFactory = new ProblemFactory(dataSet);
            SvmTraining training       = new SvmTraining(problemFactory, dataSet);
            var         parameters     = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None);
            var         results        = await training.Train(parameters).ConfigureAwait(false);

            var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data.arff");

            dataSet.Save(file);
            results.Model.Write(Path.Combine(TestContext.CurrentContext.TestDirectory, "label.dat"));
            var testFile = ArffDataSet.LoadSimple(file);

            SvmTesting testing = new SvmTesting(results.Model, problemFactory);
            var        result  = testing.Test(testFile, Path.Combine(TestContext.CurrentContext.TestDirectory, "."));

            Assert.AreEqual(1, result);
        }
        public void CreateGrid(KernelType kernel, int instances, int features, int gammas, int c, bool shrink)
        {
            for (int i = 0; i < features; i++)
            {
                dataSet.Header.RegisterNumeric(i.ToString());
            }

            for (int i = 0; i < instances; i++)
            {
                var review = dataSet.AddDocument();
                review.Class.Value = "One";
                review.AddRecord("Record").Value = 0.1;
            }

            TrainingHeader header = TrainingHeader.Create(kernel, SvmType.C_SVC);
            var            result = factory.Create(header, dataSet) as GridParameterSelection;

            Assert.AreEqual(3, result.SearchParameters.Folds);
            Assert.AreEqual(gammas, result.SearchParameters.Gamma.Length);
            Assert.AreEqual(c, result.SearchParameters.C.Length);
            Assert.AreEqual(kernel, result.SearchParameters.Default.KernelType);
            Assert.AreEqual(shrink, result.SearchParameters.Default.Shrinking);
            Assert.AreEqual(SvmType.C_SVC, result.SearchParameters.Default.SvmType);
        }
Beispiel #13
0
        public static TrainingHeader GetHeader(int idTraining, DatabaseConnection db)
        {
            var header = new TrainingHeader();

            var cmd = new SqlCommand("SELECT * FROM Training WHERE Id =" + idTraining, db.connection);

            var reader = cmd.ExecuteReader();

            if (reader.HasRows)
            {
                while (reader.Read())
                {
                    header.Id     = (int)reader["Id"];
                    header.Name   = reader["Name"].ToString();
                    header.Date   = (DateTime)reader["Date"];
                    header.IdUser = (int)reader["IdUser"];
                }
            }

            reader.Dispose();
            cmd.Dispose();

            return(header);
        }
Beispiel #14
0
 public void TrainInvalidArguments()
 {
     Assert.ThrowsAsync <ArgumentNullException>(() => manager.Train(null, TrainingHeader.CreateDefault(), CancellationToken.None));
     Assert.ThrowsAsync <ArgumentNullException>(() => manager.Train(set, null, CancellationToken.None));
     Assert.ThrowsAsync <LearningException>(() => manager.Train(set, TrainingHeader.CreateDefault(), CancellationToken.None));
 }