Exemple #1
0
        public void Setup()
        {
            mockArffDataSet = ArffDataSet.Create <PositivityType>("Test");
            instance        = CreateProcessArff();
            document        = new Document("Test");
            document.Sentences.Add(new SentenceItem("Test"));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("Good")
            {
                Stemmed     = "Good",
                IsSentiment = true
            }));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("Two")
            {
                Stemmed = "Two"
            }));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("#Three")
            {
                Stemmed = "#Three"
            }));
            var factory = ActualWordsHandler.InstanceSimple.Container.Resolve <Func <Document, IParsedReviewManager> >()(document);

            review = factory.Create();
        }
        public async Task Classify()
        {
            var         problemFactory = new ProblemFactory(threeClassDataset);
            SvmTraining training       = new SvmTraining(problemFactory, threeClassDataset);
            var         parameters     = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None);
            var         results        = await training.Train(parameters).ConfigureAwait(false);

            results.Model.Write(Path.Combine(TestContext.CurrentContext.TestDirectory, "classify.dat"));
            var testSet = ArffDataSet.Create <PositivityType>("Test");

            testSet.UseTotal = true;

            var positive = testSet.AddDocument();

            positive.AddRecord("Good");

            var negative = testSet.AddDocument();

            negative.AddRecord("Bad");

            SvmTesting testing = new SvmTesting(results.Model, problemFactory);

            testing.Classify(testSet);
            Assert.AreEqual(PositivityType.Positive, positive.Class.Value);
            Assert.AreEqual(PositivityType.Negative, negative.Class.Value);
        }
        private static TrainingResults LoadCompressed(string path)
        {
            log.Debug("LoadNormal: {0}", path);
            TrainingHeader header  = null;
            Model          model   = null;
            IArffDataSet   dataSet = null;

            using (ZipArchive archive = ZipFile.OpenRead(path))
            {
                foreach (ZipArchiveEntry entry in archive.Entries)
                {
                    if (entry.FullName.EndsWith(headerFile, StringComparison.OrdinalIgnoreCase))
                    {
                        header = XDocument.Load(entry.Open()).XmlDeserialize <TrainingHeader>();
                    }
                    else if (entry.FullName.EndsWith(modelFile, StringComparison.OrdinalIgnoreCase))
                    {
                        model = Model.Read(entry.Open());
                    }
                    else if (entry.FullName.EndsWith(arffFile, StringComparison.OrdinalIgnoreCase))
                    {
                        using (StreamReader reader = new StreamReader(entry.Open()))
                        {
                            dataSet = ArffDataSet.LoadSimple(reader);
                        }
                    }
                }
            }

            return(new TrainingResults(model, header, dataSet));
        }
        public void Setup()
        {
            threeClassDataset          = ArffDataSet.Create <PositivityType>("Test");
            threeClassDataset.UseTotal = true;
            twoClassDataset            = ArffDataSet.CreateSimple("Test");
            twoClassDataset.UseTotal   = true;
            twoClassDataset.Header.RegisterNominalClass("Positive", "Negative");

            for (int i = 0; i < 20; i++)
            {
                var positive = threeClassDataset.AddDocument();
                positive.Class.Value = PositivityType.Positive;
                positive.AddRecord("Good");

                positive             = twoClassDataset.AddDocument();
                positive.Class.Value = "Positive";
                positive.AddRecord("Good");

                var negative = threeClassDataset.AddDocument();
                negative.Class.Value = PositivityType.Negative;
                negative.AddRecord("Bad");

                negative             = twoClassDataset.AddDocument();
                negative.Class.Value = "Negative";
                negative.AddRecord("Bad");
            }
        }
        public void Setup()
        {
            dataSet = ArffDataSet.CreateSimple("Test");
            dataSet.Header.RegisterNominalClass("One");
            var problemFactory = new ProblemFactory(dataSet);

            factory = new ParametersSelectionFactory(Task.Factory, problemFactory);
        }
Exemple #6
0
        public void Compute()
        {
            var dataSet   = ArffDataSet.LoadSimple(Path.Combine(TestContext.CurrentContext.TestDirectory, "Data", @"problem.arff"));
            var problem   = new ProblemSource(dataSet).GetProblem();
            var transform = RangeTransform.Compute(problem);
            var result    = transform.Scale(problem);

            Assert.IsNotNull(result);
        }
Exemple #7
0
        public void Construct()
        {
            Assert.Throws <ArgumentNullException>(() => TrainedTreeData.Construct(null));
            var result = TrainedTreeData.Construct(ArffDataSet.CreateSimple("Test"));

            Assert.AreEqual(0, result.Children.Length);
            Assert.AreEqual(0, result.Count);
            Assert.AreEqual("Documents", result.Name);
            Assert.AreEqual("Documents (0)", result.Description);
        }
        public void CreateNull()
        {
            TrainingHeader header = TrainingHeader.CreateDefault();

            Assert.Throws <ArgumentNullException>(() => factory.Create(null, dataSet));
            Assert.Throws <ArgumentNullException>(() => factory.Create(header, null));
            header.GridSelection = false;
            var result = factory.Create(header, ArffDataSet.CreateSimple("Test"));

            Assert.IsInstanceOf <NullParameterSelection>(result);
        }
Exemple #9
0
        public void Setup()
        {
            var file    = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", "data.arff");
            var dataSet = ArffDataSet.LoadSimple(file);

            file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", "model.dat");
            var model = Wikiled.MachineLearning.Svm.Logic.Model.Read(file);

            instance = new LearnedClassifier(
                new DocumentParser(Global.TextSplitter, new DevExpressParserFactory(20)),
                new SvmTestClient(dataSet, model));
        }
        private Problem LoadData(bool withScaling = false)
        {
            var             file    = Path.Combine(TestContext.CurrentContext.TestDirectory, @".\Data\data.arff");
            var             arff    = ArffDataSet.Load <PositivityType>(file);
            IProblemFactory factory = new ProblemFactory(arff);

            if (withScaling)
            {
                factory = factory.WithRangeScaling();
            }

            return(factory.Construct(arff).GetProblem());
        }
Exemple #11
0
        public static IMachineSentiment Load(string path)
        {
            if (string.IsNullOrEmpty(path))
            {
                throw new ArgumentException("Value cannot be null or empty.", nameof(path));
            }

            log.LogInformation("Loading {0}...", path);
            IArffDataSet reviews    = ArffDataSet.Load <PositivityType>(Path.Combine(path, "data.arff"));
            var          classifier = new Classifier();

            classifier.Load(Path.Combine(path, "training.model"));
            return(new MachineSentiment(reviews, classifier));
        }
Exemple #12
0
        public void Construct()
        {
            var arff   = ArffDataSet.CreateSimple("Test");
            var header = TrainingHeader.CreateDefault();
            var model  = new Model();

            Assert.Throws <ArgumentNullException>(() => new TrainingResults(null, header, arff));
            Assert.Throws <ArgumentNullException>(() => new TrainingResults(model, null, arff));
            Assert.Throws <ArgumentNullException>(() => new TrainingResults(model, header, null));
            var instance = new TrainingResults(model, header, arff);

            Assert.IsNotNull(instance.Header);
            Assert.IsNotNull(instance.Model);
        }
        public async Task TestTwoClass()
        {
            var         problemFactory = new ProblemFactory(twoClassDataset);
            SvmTraining training       = new SvmTraining(problemFactory, twoClassDataset);
            var         parameters     = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None);
            var         results        = await training.Train(parameters).ConfigureAwait(false);

            var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data.arff");

            threeClassDataset.Save(file);
            var        testFile = ArffDataSet.LoadSimple(file);
            SvmTesting testing  = new SvmTesting(results.Model, problemFactory);
            var        result   = testing.Test(testFile, Path.Combine(TestContext.CurrentContext.TestDirectory, "."));

            Assert.AreEqual(1, result);
        }
        public void Setup()
        {
            var dataSet = ArffDataSet.CreateSimple("Test");
            var model   = new Model();

            model.NumberOfClasses           = 2;
            model.ClassLabels               = null;
            model.NumberOfSVPerClass        = null;
            model.PairwiseProbabilityA      = null;
            model.PairwiseProbabilityB      = null;
            model.SupportVectorCoefficients = new double[1][];
            model.Rho       = new double[1];
            model.Rho[0]    = 0;
            model.Parameter = new Parameter();
            instance        = new TrainingResults(model, TrainingHeader.CreateDefault(), dataSet);
        }
Exemple #15
0
        public void ConstructFull()
        {
            var dataSet = ArffDataSet.CreateSimple("Test");

            dataSet.Header.RegisterNominalClass("One", "Two");
            var review = dataSet.AddDocument();

            review.Class.Value = "One";
            review             = dataSet.AddDocument();
            review.Class.Value = "One";
            review             = dataSet.AddDocument();
            review.Class.Value = "Two";
            var result = TrainedTreeData.Construct(dataSet);

            Assert.AreEqual(2, result.Children.Length);
            Assert.AreEqual(3, result.Count);
            Assert.AreEqual("Documents", result.Name);
        }
        public void Classify(string arff, string modelName, object result)
        {
            var file    = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", arff);
            var dataSet = result.GetType() == typeof(PositivityType)
                              ? ArffDataSet.Load <PositivityType>(file)
                              : ArffDataSet.LoadSimple(file);

            file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", modelName);
            var model = Model.Read(file);

            IProblemFactory factory = new ProblemFactory(dataSet);
            var             client  = new SvmTesting(model, factory);

            var dataHolder = dataSet.CreateDataSet("Test");
            var review     = dataHolder.AddDocument();

            review.AddRecord("Good").Value = 2;
            review.AddRecord("Bad").Value  = 1;
            client.Classify(dataHolder);
            Assert.AreEqual(result, review.Class.Value);
        }
Exemple #17
0
        public IArffDataSet CreateDataset(DocumentSet documentSet, CancellationToken token)
        {
            Guard.NotNull(() => documentSet, documentSet);
            log.Debug("CreateDataset");
            if (documentSet.Document == null ||
                documentSet.Document.Length == 0)
            {
                log.Warn("No documents");
                return(null);
            }

            var labels = (from item in documentSet.Document
                          from label in item.Labels
                          select label).Distinct().ToArray();

            if (labels.Length == 0)
            {
                log.Warn("No labels found");
                return(null);
            }

            var dataHolder = ArffDataSet.CreateSimple("subjectivity");

            dataHolder.Header.RegisterNominalClass(labels);
            foreach (var definition in documentSet.Document.Where(item => item.Labels.Length > 0))
            {
                token.ThrowIfCancellationRequested();
                var label  = definition.Labels.Last();
                var review = dataHolder.AddDocument();
                review.Class.Value = label;
                foreach (var record in definition.WordsTable)
                {
                    review.AddRecord(record.Key).Value = record.Value;
                }
            }

            return(dataHolder);
        }
        public void Setup()
        {
            training = new Mock <ITrainingModel>();
            var scheduler = new ConcurrentExclusiveSchedulerPair(TaskScheduler.Default, 2)
                            .ConcurrentScheduler;

            taskFactory = new TaskFactory(scheduler);
            parameters  = new GridSearchParameters(5, new double[] { 1, 2, 3, 4 }, new double[] { 1, 2, 3, 4 }, new Parameter());
            instance    = new GridParameterSelection(taskFactory, training.Object, parameters);

            var dataSet = ArffDataSet.CreateSimple("Test");

            dataSet.Header.RegisterNominalClass("One", "Two", "Three");
            dataSet.UseTotal = true;
            var one = dataSet.AddDocument();

            one.Class.Value = "One";
            one.AddRecord("Good");
            IProblemFactory factory = new ProblemFactory(dataSet);

            problem    = factory.Construct(dataSet).GetProblem();
            resetEvent = new ManualResetEvent(false);
        }
        public async Task TestMultiClass()
        {
            var dataSet = ArffDataSet.CreateSimple("Test");

            dataSet.Header.RegisterNominalClass("One", "Two", "Three");
            dataSet.UseTotal = true;
            for (int i = 0; i < 20; i++)
            {
                var one = dataSet.AddDocument();
                one.Class.Value = "One";
                one.AddRecord("Good");

                var two = dataSet.AddDocument();
                two.Class.Value = "Two";
                two.AddRecord("Bad");

                var three = dataSet.AddDocument();
                three.Class.Value = "Three";
                three.AddRecord("Some");
            }

            var         problemFactory = new ProblemFactory(dataSet);
            SvmTraining training       = new SvmTraining(problemFactory, dataSet);
            var         parameters     = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None);
            var         results        = await training.Train(parameters).ConfigureAwait(false);

            var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data.arff");

            dataSet.Save(file);
            results.Model.Write(Path.Combine(TestContext.CurrentContext.TestDirectory, "label.dat"));
            var testFile = ArffDataSet.LoadSimple(file);

            SvmTesting testing = new SvmTesting(results.Model, problemFactory);
            var        result  = testing.Test(testFile, Path.Combine(TestContext.CurrentContext.TestDirectory, "."));

            Assert.AreEqual(1, result);
        }
Exemple #20
0
        public void Save(string path)
        {
            if (string.IsNullOrWhiteSpace(path))
            {
                throw new ArgumentException("Value cannot be null or whitespace.", nameof(path));
            }

            log.LogInformation("Saving {0}...", path);
            var fileName = $"{Word.Text.CreatePureLetterText()}.arff";

            path = Path.Combine(path, fileName);
            IArffDataSet arff = ArffDataSet.Create <PositivityType>(Word.Text);

            arff.UseTotal = true;
            foreach (WordsContext vector in Vectors)
            {
                IArffDataRow review = arff.AddDocument();
                review.Class.Value = vector.SentimentValue > 0
                    ? PositivityType.Positive
                    : PositivityType.Negative;
                foreach (WordEx wordItem in vector.Words)
                {
                    if (!wordItem.IsAspect &&
                        wordItem.Value == 0)
                    {
                        continue;
                    }

                    DataRecord addedWord = review.AddRecord(wordItem.Text);
                    addedWord.Value = addedWord.Total;
                }
            }

            arff.Save(path);
            log.LogInformation("Saving {0} Completed.", path);
        }
        private static TrainingResults LoadNormal(string path)
        {
            log.Debug("LoadNormal: {0}", path);
            if (!Directory.Exists(path))
            {
                throw new ArgumentOutOfRangeException(nameof(path), path);
            }

            var file   = GetFile(path, headerFile);
            var header = File.Exists(file) ? XDocument.Load(file).XmlDeserialize <TrainingHeader>() : null;

            var          model = Model.Read(GetFile(path, modelFile));
            IArffDataSet arff;

            using (FileStream stream = new FileStream(GetFile(path, arffFile), FileMode.Open))
            {
                using (StreamReader reader = new StreamReader(stream))
                {
                    arff = ArffDataSet.LoadSimple(reader);
                }
            }

            return(new TrainingResults(model, header, arff));
        }