public void Setup() { threeClassDataset = ArffDataSet.Create <PositivityType>("Test"); threeClassDataset.UseTotal = true; twoClassDataset = ArffDataSet.CreateSimple("Test"); twoClassDataset.UseTotal = true; twoClassDataset.Header.RegisterNominalClass("Positive", "Negative"); for (int i = 0; i < 20; i++) { var positive = threeClassDataset.AddDocument(); positive.Class.Value = PositivityType.Positive; positive.AddRecord("Good"); positive = twoClassDataset.AddDocument(); positive.Class.Value = "Positive"; positive.AddRecord("Good"); var negative = threeClassDataset.AddDocument(); negative.Class.Value = PositivityType.Negative; negative.AddRecord("Bad"); negative = twoClassDataset.AddDocument(); negative.Class.Value = "Negative"; negative.AddRecord("Bad"); } }
public PredictionResult Test(IArffDataSet testingSet) { Guard.NotNull(() => testingSet, testingSet); var problemSource = problemFactory.Construct(testingSet); return(Prediction.Predict(problemSource.GetProblem(), trainedModel, false)); }
public void Setup() { mockArffDataSet = ArffDataSet.Create <PositivityType>("Test"); instance = CreateProcessArff(); document = new Document("Test"); document.Sentences.Add(new SentenceItem("Test")); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("Good") { Stemmed = "Good", IsSentiment = true })); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("Two") { Stemmed = "Two" })); document.Sentences[0].Words.Add( WordExFactory.Construct( new TestWordItem("#Three") { Stemmed = "#Three" })); var factory = ActualWordsHandler.InstanceSimple.Container.Resolve <Func <Document, IParsedReviewManager> >()(document); review = factory.Create(); }
private static TrainingResults LoadCompressed(string path) { log.Debug("LoadNormal: {0}", path); TrainingHeader header = null; Model model = null; IArffDataSet dataSet = null; using (ZipArchive archive = ZipFile.OpenRead(path)) { foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.FullName.EndsWith(headerFile, StringComparison.OrdinalIgnoreCase)) { header = XDocument.Load(entry.Open()).XmlDeserialize <TrainingHeader>(); } else if (entry.FullName.EndsWith(modelFile, StringComparison.OrdinalIgnoreCase)) { model = Model.Read(entry.Open()); } else if (entry.FullName.EndsWith(arffFile, StringComparison.OrdinalIgnoreCase)) { using (StreamReader reader = new StreamReader(entry.Open())) { dataSet = ArffDataSet.LoadSimple(reader); } } } } return(new TrainingResults(model, header, dataSet)); }
private static void SaveArff(IArffDataSet arff, Stream outStream) { using (StreamWriter writer = new StreamWriter(outStream)) { arff.Save(writer); outStream.Flush(); } }
public void Setup() { dataSet = ArffDataSet.CreateSimple("Test"); dataSet.Header.RegisterNominalClass("One"); var problemFactory = new ProblemFactory(dataSet); factory = new ParametersSelectionFactory(Task.Factory, problemFactory); }
public TrainingResults(Model model, TrainingHeader header, IArffDataSet dataSet) { Guard.NotNull(() => model, model); Guard.NotNull(() => header, header); Guard.NotNull(() => dataSet, dataSet); Model = model; Header = header; DataSet = dataSet; }
public IProblemSource Construct(IArffDataSet currentDataSet) { Guard.NotNull(() => currentDataSet, currentDataSet); var dataSet = currentDataSet.CopyDataSet(baseDataSet.Header, "Test"); return(new ProblemSource(dataSet) { Transform = transform }); }
public static async Task <MachineSentiment> Train(IArffDataSet arff, CancellationToken token) { log.LogInformation("Training SVM..."); var classifier = new Classifier(); var data = arff.GetDataNormalized(NormalizationType.L2).ToArray(); if (data.Length < 40) { throw new ArgumentOutOfRangeException("Not enough training records"); } var classCount = new Dictionary <int, int>(); foreach (var datRecord in data) { if (classCount.ContainsKey(datRecord.Y.Value)) { classCount[datRecord.Y.Value] += 1; } else { classCount[datRecord.Y.Value] = 1; } // Make all sentiments positive - counts with weights for (var i = 0; i < datRecord.X.Length; i++) { var x = datRecord.X[i]; datRecord.X[i] = Math.Abs(x); } } if (classCount.Count != 2) { throw new ArgumentOutOfRangeException("Two classes not found"); } if (classCount[-1] < 20) { throw new ArgumentOutOfRangeException("Not enough negative classes"); } if (classCount[1] < 20) { throw new ArgumentOutOfRangeException("Not enough positive classes"); } var yData = data.Select(item => item.Y.Value).ToArray(); var xData = data.Select(item => item.X).ToArray(); Array[] randomized = GlobalSettings.Random.Shuffle(yData, xData).ToArray(); await Task.Run(() => classifier.Train(randomized[0].Cast <int>().ToArray(), randomized[1].Cast <double[]>().ToArray(), token), token).ConfigureAwait(false); return(new MachineSentiment(arff, classifier)); }
public ProcessArff(IArffDataSet dataSet) : base(dataSet) { if (dataSet == null) { throw new ArgumentNullException(nameof(dataSet)); } dataSet.HasId = true; dataSet.HasDate = true; }
public SvmTraining(IProblemFactory problemFactory, IArffDataSet dataSet) { Guard.NotNull(() => problemFactory, problemFactory); Guard.NotNull(() => dataSet, dataSet); this.problemFactory = problemFactory; this.dataSet = dataSet; if (dataSet.RandomSeed == null) { dataSet.RandomSeed = Environment.TickCount; } }
public MachineSentiment(IArffDataSet dataSet, IClassifier classifier) { if (dataSet is null) { throw new ArgumentNullException(nameof(dataSet)); } dataSet.Header.CreateHeader = false; DataSet = dataSet; Classifier = classifier ?? throw new ArgumentNullException(nameof(classifier)); weights = classifier.Model.ToWeights().Skip(1).ToArray(); featureTable = dataSet.GetFeatureTable(); }
public static IMachineSentiment Load(string path) { if (string.IsNullOrEmpty(path)) { throw new ArgumentException("Value cannot be null or empty.", nameof(path)); } log.LogInformation("Loading {0}...", path); IArffDataSet reviews = ArffDataSet.Load <PositivityType>(Path.Combine(path, "data.arff")); var classifier = new Classifier(); classifier.Load(Path.Combine(path, "training.model")); return(new MachineSentiment(reviews, classifier)); }
public IParameterSelection Create(TrainingHeader header, IArffDataSet dataset) { Guard.NotNull(() => header, header); Guard.NotNull(() => dataset, dataset); Parameter defaultParameter = new Parameter(); defaultParameter.KernelType = header.Kernel; defaultParameter.CacheSize = 200; defaultParameter.SvmType = header.SvmType; var model = new TrainingModel(header); if (!header.GridSelection) { return(new NullParameterSelection(defaultParameter, model)); } GridSearchParameters searchParameters; logger.Info("Investigate LibLinear"); if (header.Kernel == KernelType.Linear) { var gamma = GetList(1, 1, 1); if (dataset.Header.Total > (dataset.TotalDocuments * 10)) { logger.Info("Selecting Linear features >> instances"); defaultParameter.Shrinking = false; } else { logger.Warn("Investigate LibLinear"); } var training = problemFactory.Construct(dataset).GetProblem(); defaultParameter.Weights = WeightCalculation.GetWeights(training.Y); foreach (var classItem in defaultParameter.Weights) { logger.Info($"Using class [{classItem.Key}] with weight [{classItem.Value}]"); } searchParameters = new GridSearchParameters(3, GetList(-1, 2, 1), gamma, defaultParameter); } else { searchParameters = new GridSearchParameters(3, GetList(-5, 15, 2), GetList(-15, 3, 2), defaultParameter); } return(new GridParameterSelection(taskFactory, model, searchParameters)); }
public PredictionResult Classify(IArffDataSet testDataSet) { Guard.NotNull(() => testDataSet, testDataSet); log.Debug("Classify"); var result = Test(testDataSet); var docs = testDataSet.Documents.ToArray(); for (int i = 0; i < result.Classes.Length; i++) { var review = docs[i]; var classValue = ((IClassHeader)review.Class.Header).GetValueByClassId(result.Classes[i].Actual); review.Class.Value = classValue; } return(result); }
public double Test(IArffDataSet testingSet, string path) { Guard.NotNull(() => testingSet, testingSet); log.Debug("Test"); path.EnsureDirectoryExistence(); var result = Test(testingSet); string[] files = Directory.GetFiles(path, "training.*"); foreach (var file in files) { if (file.IndexOf("training.model", StringComparison.OrdinalIgnoreCase) == -1) { File.Delete(file); } } return(result.CorrectProbability); }
public async Task TrainSvm() { try { if (currentSet == null) { throw new ArgumentNullException(nameof(currentSet)); } IArffDataSet dataSet = currentSet; MachineSentiment machine = await MachineSentiment.Train(dataSet, CancellationToken.None).ConfigureAwait(false); machine.Save(SvmPath); LoadSvm(); log.LogInformation("SVM Training Completed..."); } catch (Exception ex) { log.LogError(ex, "Error"); throw; } }
public void Save(string path) { if (string.IsNullOrWhiteSpace(path)) { throw new ArgumentException("Value cannot be null or whitespace.", nameof(path)); } log.LogInformation("Saving {0}...", path); var fileName = $"{Word.Text.CreatePureLetterText()}.arff"; path = Path.Combine(path, fileName); IArffDataSet arff = ArffDataSet.Create <PositivityType>(Word.Text); arff.UseTotal = true; foreach (WordsContext vector in Vectors) { IArffDataRow review = arff.AddDocument(); review.Class.Value = vector.SentimentValue > 0 ? PositivityType.Positive : PositivityType.Negative; foreach (WordEx wordItem in vector.Words) { if (!wordItem.IsAspect && wordItem.Value == 0) { continue; } DataRecord addedWord = review.AddRecord(wordItem.Text); addedWord.Value = addedWord.Total; } } arff.Save(path); log.LogInformation("Saving {0} Completed.", path); }
public ProblemSource(IArffDataSet arff) { Guard.NotNull(() => arff, arff); this.arff = arff; }
protected ProcessArffBase(IArffDataSet dataSet) { DataSet = dataSet ?? throw new ArgumentNullException(nameof(dataSet)); }
public void SetArff(IArffDataSet dataSet) { currentSet = dataSet; }
public UnigramProcessArff(IArffDataSet dataSet) : base(dataSet) { }
public IProcessArff Create(IArffDataSet dataSet) { return(new UnigramProcessArff(dataSet)); }
public ProblemFactory(IArffDataSet baseDataSet) { this.baseDataSet = baseDataSet; }
public static TrainedTreeData Construct(IArffDataSet dataSet) { Guard.NotNull(() => dataSet, dataSet); return(ConstructInternal(dataSet.Documents.Select(item => (Func <string>)(() => (string)item.Class.Value)))); }