public void Setup() { mockArffDataSet = ArffDataSet.Create <PositivityType>("Test"); instance = CreateProcessArff(); document = new Document("Test"); document.Sentences.Add(new SentenceItem("Test")); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("Good") { Stemmed = "Good", IsSentiment = true })); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("Two") { Stemmed = "Two" })); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("#Three") { Stemmed = "#Three" })); var factory = ActualWordsHandler.InstanceSimple.Container.Resolve <Func <Document, IParsedReviewManager> >()(document); review = factory.Create(); }
public async Task Classify() { var problemFactory = new ProblemFactory(threeClassDataset); SvmTraining training = new SvmTraining(problemFactory, threeClassDataset); var parameters = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None); var results = await training.Train(parameters).ConfigureAwait(false); results.Model.Write(Path.Combine(TestContext.CurrentContext.TestDirectory, "classify.dat")); var testSet = ArffDataSet.Create <PositivityType>("Test"); testSet.UseTotal = true; var positive = testSet.AddDocument(); positive.AddRecord("Good"); var negative = testSet.AddDocument(); negative.AddRecord("Bad"); SvmTesting testing = new SvmTesting(results.Model, problemFactory); testing.Classify(testSet); Assert.AreEqual(PositivityType.Positive, positive.Class.Value); Assert.AreEqual(PositivityType.Negative, negative.Class.Value); }
private static TrainingResults LoadCompressed(string path) { log.Debug("LoadNormal: {0}", path); TrainingHeader header = null; Model model = null; IArffDataSet dataSet = null; using (ZipArchive archive = ZipFile.OpenRead(path)) { foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.FullName.EndsWith(headerFile, StringComparison.OrdinalIgnoreCase)) { header = XDocument.Load(entry.Open()).XmlDeserialize <TrainingHeader>(); } else if (entry.FullName.EndsWith(modelFile, StringComparison.OrdinalIgnoreCase)) { model = Model.Read(entry.Open()); } else if (entry.FullName.EndsWith(arffFile, StringComparison.OrdinalIgnoreCase)) { using (StreamReader reader = new StreamReader(entry.Open())) { dataSet = ArffDataSet.LoadSimple(reader); } } } } return(new TrainingResults(model, header, dataSet)); }
public void Setup() { threeClassDataset = ArffDataSet.Create <PositivityType>("Test"); threeClassDataset.UseTotal = true; twoClassDataset = ArffDataSet.CreateSimple("Test"); twoClassDataset.UseTotal = true; twoClassDataset.Header.RegisterNominalClass("Positive", "Negative"); for (int i = 0; i < 20; i++) { var positive = threeClassDataset.AddDocument(); positive.Class.Value = PositivityType.Positive; positive.AddRecord("Good"); positive = twoClassDataset.AddDocument(); positive.Class.Value = "Positive"; positive.AddRecord("Good"); var negative = threeClassDataset.AddDocument(); negative.Class.Value = PositivityType.Negative; negative.AddRecord("Bad"); negative = twoClassDataset.AddDocument(); negative.Class.Value = "Negative"; negative.AddRecord("Bad"); } }
public void Setup() { dataSet = ArffDataSet.CreateSimple("Test"); dataSet.Header.RegisterNominalClass("One"); var problemFactory = new ProblemFactory(dataSet); factory = new ParametersSelectionFactory(Task.Factory, problemFactory); }
public void Compute() { var dataSet = ArffDataSet.LoadSimple(Path.Combine(TestContext.CurrentContext.TestDirectory, "Data", @"problem.arff")); var problem = new ProblemSource(dataSet).GetProblem(); var transform = RangeTransform.Compute(problem); var result = transform.Scale(problem); Assert.IsNotNull(result); }
public void Construct() { Assert.Throws <ArgumentNullException>(() => TrainedTreeData.Construct(null)); var result = TrainedTreeData.Construct(ArffDataSet.CreateSimple("Test")); Assert.AreEqual(0, result.Children.Length); Assert.AreEqual(0, result.Count); Assert.AreEqual("Documents", result.Name); Assert.AreEqual("Documents (0)", result.Description); }
public void CreateNull() { TrainingHeader header = TrainingHeader.CreateDefault(); Assert.Throws <ArgumentNullException>(() => factory.Create(null, dataSet)); Assert.Throws <ArgumentNullException>(() => factory.Create(header, null)); header.GridSelection = false; var result = factory.Create(header, ArffDataSet.CreateSimple("Test")); Assert.IsInstanceOf <NullParameterSelection>(result); }
public void Setup() { var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", "data.arff"); var dataSet = ArffDataSet.LoadSimple(file); file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", "model.dat"); var model = Wikiled.MachineLearning.Svm.Logic.Model.Read(file); instance = new LearnedClassifier( new DocumentParser(Global.TextSplitter, new DevExpressParserFactory(20)), new SvmTestClient(dataSet, model)); }
private Problem LoadData(bool withScaling = false) { var file = Path.Combine(TestContext.CurrentContext.TestDirectory, @".\Data\data.arff"); var arff = ArffDataSet.Load <PositivityType>(file); IProblemFactory factory = new ProblemFactory(arff); if (withScaling) { factory = factory.WithRangeScaling(); } return(factory.Construct(arff).GetProblem()); }
public static IMachineSentiment Load(string path) { if (string.IsNullOrEmpty(path)) { throw new ArgumentException("Value cannot be null or empty.", nameof(path)); } log.LogInformation("Loading {0}...", path); IArffDataSet reviews = ArffDataSet.Load <PositivityType>(Path.Combine(path, "data.arff")); var classifier = new Classifier(); classifier.Load(Path.Combine(path, "training.model")); return(new MachineSentiment(reviews, classifier)); }
public void Construct() { var arff = ArffDataSet.CreateSimple("Test"); var header = TrainingHeader.CreateDefault(); var model = new Model(); Assert.Throws <ArgumentNullException>(() => new TrainingResults(null, header, arff)); Assert.Throws <ArgumentNullException>(() => new TrainingResults(model, null, arff)); Assert.Throws <ArgumentNullException>(() => new TrainingResults(model, header, null)); var instance = new TrainingResults(model, header, arff); Assert.IsNotNull(instance.Header); Assert.IsNotNull(instance.Model); }
public async Task TestTwoClass() { var problemFactory = new ProblemFactory(twoClassDataset); SvmTraining training = new SvmTraining(problemFactory, twoClassDataset); var parameters = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None); var results = await training.Train(parameters).ConfigureAwait(false); var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data.arff"); threeClassDataset.Save(file); var testFile = ArffDataSet.LoadSimple(file); SvmTesting testing = new SvmTesting(results.Model, problemFactory); var result = testing.Test(testFile, Path.Combine(TestContext.CurrentContext.TestDirectory, ".")); Assert.AreEqual(1, result); }
public void Setup() { var dataSet = ArffDataSet.CreateSimple("Test"); var model = new Model(); model.NumberOfClasses = 2; model.ClassLabels = null; model.NumberOfSVPerClass = null; model.PairwiseProbabilityA = null; model.PairwiseProbabilityB = null; model.SupportVectorCoefficients = new double[1][]; model.Rho = new double[1]; model.Rho[0] = 0; model.Parameter = new Parameter(); instance = new TrainingResults(model, TrainingHeader.CreateDefault(), dataSet); }
public void ConstructFull() { var dataSet = ArffDataSet.CreateSimple("Test"); dataSet.Header.RegisterNominalClass("One", "Two"); var review = dataSet.AddDocument(); review.Class.Value = "One"; review = dataSet.AddDocument(); review.Class.Value = "One"; review = dataSet.AddDocument(); review.Class.Value = "Two"; var result = TrainedTreeData.Construct(dataSet); Assert.AreEqual(2, result.Children.Length); Assert.AreEqual(3, result.Count); Assert.AreEqual("Documents", result.Name); }
public void Classify(string arff, string modelName, object result) { var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", arff); var dataSet = result.GetType() == typeof(PositivityType) ? ArffDataSet.Load <PositivityType>(file) : ArffDataSet.LoadSimple(file); file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data", modelName); var model = Model.Read(file); IProblemFactory factory = new ProblemFactory(dataSet); var client = new SvmTesting(model, factory); var dataHolder = dataSet.CreateDataSet("Test"); var review = dataHolder.AddDocument(); review.AddRecord("Good").Value = 2; review.AddRecord("Bad").Value = 1; client.Classify(dataHolder); Assert.AreEqual(result, review.Class.Value); }
public IArffDataSet CreateDataset(DocumentSet documentSet, CancellationToken token) { Guard.NotNull(() => documentSet, documentSet); log.Debug("CreateDataset"); if (documentSet.Document == null || documentSet.Document.Length == 0) { log.Warn("No documents"); return(null); } var labels = (from item in documentSet.Document from label in item.Labels select label).Distinct().ToArray(); if (labels.Length == 0) { log.Warn("No labels found"); return(null); } var dataHolder = ArffDataSet.CreateSimple("subjectivity"); dataHolder.Header.RegisterNominalClass(labels); foreach (var definition in documentSet.Document.Where(item => item.Labels.Length > 0)) { token.ThrowIfCancellationRequested(); var label = definition.Labels.Last(); var review = dataHolder.AddDocument(); review.Class.Value = label; foreach (var record in definition.WordsTable) { review.AddRecord(record.Key).Value = record.Value; } } return(dataHolder); }
public void Setup() { training = new Mock <ITrainingModel>(); var scheduler = new ConcurrentExclusiveSchedulerPair(TaskScheduler.Default, 2) .ConcurrentScheduler; taskFactory = new TaskFactory(scheduler); parameters = new GridSearchParameters(5, new double[] { 1, 2, 3, 4 }, new double[] { 1, 2, 3, 4 }, new Parameter()); instance = new GridParameterSelection(taskFactory, training.Object, parameters); var dataSet = ArffDataSet.CreateSimple("Test"); dataSet.Header.RegisterNominalClass("One", "Two", "Three"); dataSet.UseTotal = true; var one = dataSet.AddDocument(); one.Class.Value = "One"; one.AddRecord("Good"); IProblemFactory factory = new ProblemFactory(dataSet); problem = factory.Construct(dataSet).GetProblem(); resetEvent = new ManualResetEvent(false); }
public async Task TestMultiClass() { var dataSet = ArffDataSet.CreateSimple("Test"); dataSet.Header.RegisterNominalClass("One", "Two", "Three"); dataSet.UseTotal = true; for (int i = 0; i < 20; i++) { var one = dataSet.AddDocument(); one.Class.Value = "One"; one.AddRecord("Good"); var two = dataSet.AddDocument(); two.Class.Value = "Two"; two.AddRecord("Bad"); var three = dataSet.AddDocument(); three.Class.Value = "Three"; three.AddRecord("Some"); } var problemFactory = new ProblemFactory(dataSet); SvmTraining training = new SvmTraining(problemFactory, dataSet); var parameters = training.SelectParameters(TrainingHeader.CreateDefault(), CancellationToken.None); var results = await training.Train(parameters).ConfigureAwait(false); var file = Path.Combine(TestContext.CurrentContext.TestDirectory, "data.arff"); dataSet.Save(file); results.Model.Write(Path.Combine(TestContext.CurrentContext.TestDirectory, "label.dat")); var testFile = ArffDataSet.LoadSimple(file); SvmTesting testing = new SvmTesting(results.Model, problemFactory); var result = testing.Test(testFile, Path.Combine(TestContext.CurrentContext.TestDirectory, ".")); Assert.AreEqual(1, result); }
public void Save(string path) { if (string.IsNullOrWhiteSpace(path)) { throw new ArgumentException("Value cannot be null or whitespace.", nameof(path)); } log.LogInformation("Saving {0}...", path); var fileName = $"{Word.Text.CreatePureLetterText()}.arff"; path = Path.Combine(path, fileName); IArffDataSet arff = ArffDataSet.Create <PositivityType>(Word.Text); arff.UseTotal = true; foreach (WordsContext vector in Vectors) { IArffDataRow review = arff.AddDocument(); review.Class.Value = vector.SentimentValue > 0 ? PositivityType.Positive : PositivityType.Negative; foreach (WordEx wordItem in vector.Words) { if (!wordItem.IsAspect && wordItem.Value == 0) { continue; } DataRecord addedWord = review.AddRecord(wordItem.Text); addedWord.Value = addedWord.Total; } } arff.Save(path); log.LogInformation("Saving {0} Completed.", path); }
private static TrainingResults LoadNormal(string path) { log.Debug("LoadNormal: {0}", path); if (!Directory.Exists(path)) { throw new ArgumentOutOfRangeException(nameof(path), path); } var file = GetFile(path, headerFile); var header = File.Exists(file) ? XDocument.Load(file).XmlDeserialize <TrainingHeader>() : null; var model = Model.Read(GetFile(path, modelFile)); IArffDataSet arff; using (FileStream stream = new FileStream(GetFile(path, arffFile), FileMode.Open)) { using (StreamReader reader = new StreamReader(stream)) { arff = ArffDataSet.LoadSimple(reader); } } return(new TrainingResults(model, header, arff)); }