public void Setup()
        {
            threeClassDataset          = ArffDataSet.Create <PositivityType>("Test");
            threeClassDataset.UseTotal = true;
            twoClassDataset            = ArffDataSet.CreateSimple("Test");
            twoClassDataset.UseTotal   = true;
            twoClassDataset.Header.RegisterNominalClass("Positive", "Negative");

            for (int i = 0; i < 20; i++)
            {
                var positive = threeClassDataset.AddDocument();
                positive.Class.Value = PositivityType.Positive;
                positive.AddRecord("Good");

                positive             = twoClassDataset.AddDocument();
                positive.Class.Value = "Positive";
                positive.AddRecord("Good");

                var negative = threeClassDataset.AddDocument();
                negative.Class.Value = PositivityType.Negative;
                negative.AddRecord("Bad");

                negative             = twoClassDataset.AddDocument();
                negative.Class.Value = "Negative";
                negative.AddRecord("Bad");
            }
        }
        public PredictionResult Test(IArffDataSet testingSet)
        {
            Guard.NotNull(() => testingSet, testingSet);
            var problemSource = problemFactory.Construct(testingSet);

            return(Prediction.Predict(problemSource.GetProblem(), trainedModel, false));
        }
Example #3
0
        public void Setup()
        {
            mockArffDataSet = ArffDataSet.Create <PositivityType>("Test");
            instance        = CreateProcessArff();
            document        = new Document("Test");
            document.Sentences.Add(new SentenceItem("Test"));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("Good")
            {
                Stemmed     = "Good",
                IsSentiment = true
            }));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("Two")
            {
                Stemmed = "Two"
            }));
            document.Sentences[0].Words.Add(
                WordExFactory.Construct(
                    new TestWordItem("#Three")
            {
                Stemmed = "#Three"
            }));
            var factory = ActualWordsHandler.InstanceSimple.Container.Resolve <Func <Document, IParsedReviewManager> >()(document);

            review = factory.Create();
        }
        private static TrainingResults LoadCompressed(string path)
        {
            log.Debug("LoadNormal: {0}", path);
            TrainingHeader header  = null;
            Model          model   = null;
            IArffDataSet   dataSet = null;

            using (ZipArchive archive = ZipFile.OpenRead(path))
            {
                foreach (ZipArchiveEntry entry in archive.Entries)
                {
                    if (entry.FullName.EndsWith(headerFile, StringComparison.OrdinalIgnoreCase))
                    {
                        header = XDocument.Load(entry.Open()).XmlDeserialize <TrainingHeader>();
                    }
                    else if (entry.FullName.EndsWith(modelFile, StringComparison.OrdinalIgnoreCase))
                    {
                        model = Model.Read(entry.Open());
                    }
                    else if (entry.FullName.EndsWith(arffFile, StringComparison.OrdinalIgnoreCase))
                    {
                        using (StreamReader reader = new StreamReader(entry.Open()))
                        {
                            dataSet = ArffDataSet.LoadSimple(reader);
                        }
                    }
                }
            }

            return(new TrainingResults(model, header, dataSet));
        }
 private static void SaveArff(IArffDataSet arff, Stream outStream)
 {
     using (StreamWriter writer = new StreamWriter(outStream))
     {
         arff.Save(writer);
         outStream.Flush();
     }
 }
        public void Setup()
        {
            dataSet = ArffDataSet.CreateSimple("Test");
            dataSet.Header.RegisterNominalClass("One");
            var problemFactory = new ProblemFactory(dataSet);

            factory = new ParametersSelectionFactory(Task.Factory, problemFactory);
        }
Example #7
0
 public TrainingResults(Model model, TrainingHeader header, IArffDataSet dataSet)
 {
     Guard.NotNull(() => model, model);
     Guard.NotNull(() => header, header);
     Guard.NotNull(() => dataSet, dataSet);
     Model   = model;
     Header  = header;
     DataSet = dataSet;
 }
        public IProblemSource Construct(IArffDataSet currentDataSet)
        {
            Guard.NotNull(() => currentDataSet, currentDataSet);
            var dataSet = currentDataSet.CopyDataSet(baseDataSet.Header, "Test");

            return(new ProblemSource(dataSet)
            {
                Transform = transform
            });
        }
Example #9
0
        public static async Task <MachineSentiment> Train(IArffDataSet arff, CancellationToken token)
        {
            log.LogInformation("Training SVM...");
            var classifier = new Classifier();
            var data       = arff.GetDataNormalized(NormalizationType.L2).ToArray();

            if (data.Length < 40)
            {
                throw new ArgumentOutOfRangeException("Not enough training records");
            }

            var classCount = new Dictionary <int, int>();

            foreach (var datRecord in data)
            {
                if (classCount.ContainsKey(datRecord.Y.Value))
                {
                    classCount[datRecord.Y.Value] += 1;
                }
                else
                {
                    classCount[datRecord.Y.Value] = 1;
                }

                // Make all sentiments positive - counts with weights
                for (var i = 0; i < datRecord.X.Length; i++)
                {
                    var x = datRecord.X[i];
                    datRecord.X[i] = Math.Abs(x);
                }
            }

            if (classCount.Count != 2)
            {
                throw new ArgumentOutOfRangeException("Two classes not found");
            }

            if (classCount[-1] < 20)
            {
                throw new ArgumentOutOfRangeException("Not enough negative classes");
            }

            if (classCount[1] < 20)
            {
                throw new ArgumentOutOfRangeException("Not enough positive classes");
            }

            var yData = data.Select(item => item.Y.Value).ToArray();
            var xData = data.Select(item => item.X).ToArray();

            Array[] randomized = GlobalSettings.Random.Shuffle(yData, xData).ToArray();
            await Task.Run(() => classifier.Train(randomized[0].Cast <int>().ToArray(), randomized[1].Cast <double[]>().ToArray(), token), token).ConfigureAwait(false);

            return(new MachineSentiment(arff, classifier));
        }
Example #10
0
        public ProcessArff(IArffDataSet dataSet)
            : base(dataSet)
        {
            if (dataSet == null)
            {
                throw new ArgumentNullException(nameof(dataSet));
            }

            dataSet.HasId   = true;
            dataSet.HasDate = true;
        }
 public SvmTraining(IProblemFactory problemFactory, IArffDataSet dataSet)
 {
     Guard.NotNull(() => problemFactory, problemFactory);
     Guard.NotNull(() => dataSet, dataSet);
     this.problemFactory = problemFactory;
     this.dataSet        = dataSet;
     if (dataSet.RandomSeed == null)
     {
         dataSet.RandomSeed = Environment.TickCount;
     }
 }
Example #12
0
        public MachineSentiment(IArffDataSet dataSet, IClassifier classifier)
        {
            if (dataSet is null)
            {
                throw new ArgumentNullException(nameof(dataSet));
            }

            dataSet.Header.CreateHeader = false;
            DataSet      = dataSet;
            Classifier   = classifier ?? throw new ArgumentNullException(nameof(classifier));
            weights      = classifier.Model.ToWeights().Skip(1).ToArray();
            featureTable = dataSet.GetFeatureTable();
        }
Example #13
0
        public static IMachineSentiment Load(string path)
        {
            if (string.IsNullOrEmpty(path))
            {
                throw new ArgumentException("Value cannot be null or empty.", nameof(path));
            }

            log.LogInformation("Loading {0}...", path);
            IArffDataSet reviews    = ArffDataSet.Load <PositivityType>(Path.Combine(path, "data.arff"));
            var          classifier = new Classifier();

            classifier.Load(Path.Combine(path, "training.model"));
            return(new MachineSentiment(reviews, classifier));
        }
Example #14
0
        public IParameterSelection Create(TrainingHeader header, IArffDataSet dataset)
        {
            Guard.NotNull(() => header, header);
            Guard.NotNull(() => dataset, dataset);
            Parameter defaultParameter = new Parameter();

            defaultParameter.KernelType = header.Kernel;
            defaultParameter.CacheSize  = 200;
            defaultParameter.SvmType    = header.SvmType;
            var model = new TrainingModel(header);

            if (!header.GridSelection)
            {
                return(new NullParameterSelection(defaultParameter, model));
            }

            GridSearchParameters searchParameters;

            logger.Info("Investigate LibLinear");
            if (header.Kernel == KernelType.Linear)
            {
                var gamma = GetList(1, 1, 1);
                if (dataset.Header.Total > (dataset.TotalDocuments * 10))
                {
                    logger.Info("Selecting Linear features >> instances");
                    defaultParameter.Shrinking = false;
                }
                else
                {
                    logger.Warn("Investigate LibLinear");
                }

                var training = problemFactory.Construct(dataset).GetProblem();
                defaultParameter.Weights = WeightCalculation.GetWeights(training.Y);
                foreach (var classItem in defaultParameter.Weights)
                {
                    logger.Info($"Using class [{classItem.Key}] with weight [{classItem.Value}]");
                }

                searchParameters = new GridSearchParameters(3, GetList(-1, 2, 1), gamma, defaultParameter);
            }
            else
            {
                searchParameters = new GridSearchParameters(3, GetList(-5, 15, 2), GetList(-15, 3, 2), defaultParameter);
            }

            return(new GridParameterSelection(taskFactory, model, searchParameters));
        }
        public PredictionResult Classify(IArffDataSet testDataSet)
        {
            Guard.NotNull(() => testDataSet, testDataSet);
            log.Debug("Classify");
            var result = Test(testDataSet);
            var docs   = testDataSet.Documents.ToArray();

            for (int i = 0; i < result.Classes.Length; i++)
            {
                var review     = docs[i];
                var classValue = ((IClassHeader)review.Class.Header).GetValueByClassId(result.Classes[i].Actual);
                review.Class.Value = classValue;
            }

            return(result);
        }
        public double Test(IArffDataSet testingSet, string path)
        {
            Guard.NotNull(() => testingSet, testingSet);
            log.Debug("Test");
            path.EnsureDirectoryExistence();
            var result = Test(testingSet);

            string[] files = Directory.GetFiles(path, "training.*");
            foreach (var file in files)
            {
                if (file.IndexOf("training.model", StringComparison.OrdinalIgnoreCase) == -1)
                {
                    File.Delete(file);
                }
            }

            return(result.CorrectProbability);
        }
        public async Task TrainSvm()
        {
            try
            {
                if (currentSet == null)
                {
                    throw new ArgumentNullException(nameof(currentSet));
                }

                IArffDataSet     dataSet = currentSet;
                MachineSentiment machine = await MachineSentiment.Train(dataSet, CancellationToken.None).ConfigureAwait(false);

                machine.Save(SvmPath);
                LoadSvm();
                log.LogInformation("SVM Training Completed...");
            }
            catch (Exception ex)
            {
                log.LogError(ex, "Error");
                throw;
            }
        }
Example #18
0
        public void Save(string path)
        {
            if (string.IsNullOrWhiteSpace(path))
            {
                throw new ArgumentException("Value cannot be null or whitespace.", nameof(path));
            }

            log.LogInformation("Saving {0}...", path);
            var fileName = $"{Word.Text.CreatePureLetterText()}.arff";

            path = Path.Combine(path, fileName);
            IArffDataSet arff = ArffDataSet.Create <PositivityType>(Word.Text);

            arff.UseTotal = true;
            foreach (WordsContext vector in Vectors)
            {
                IArffDataRow review = arff.AddDocument();
                review.Class.Value = vector.SentimentValue > 0
                    ? PositivityType.Positive
                    : PositivityType.Negative;
                foreach (WordEx wordItem in vector.Words)
                {
                    if (!wordItem.IsAspect &&
                        wordItem.Value == 0)
                    {
                        continue;
                    }

                    DataRecord addedWord = review.AddRecord(wordItem.Text);
                    addedWord.Value = addedWord.Total;
                }
            }

            arff.Save(path);
            log.LogInformation("Saving {0} Completed.", path);
        }
 public ProblemSource(IArffDataSet arff)
 {
     Guard.NotNull(() => arff, arff);
     this.arff = arff;
 }
 protected ProcessArffBase(IArffDataSet dataSet)
 {
     DataSet = dataSet ?? throw new ArgumentNullException(nameof(dataSet));
 }
 public void SetArff(IArffDataSet dataSet)
 {
     currentSet = dataSet;
 }
 public UnigramProcessArff(IArffDataSet dataSet)
     : base(dataSet)
 {
 }
Example #23
0
 public IProcessArff Create(IArffDataSet dataSet)
 {
     return(new UnigramProcessArff(dataSet));
 }
 public ProblemFactory(IArffDataSet baseDataSet)
 {
     this.baseDataSet = baseDataSet;
 }
Example #25
0
 public static TrainedTreeData Construct(IArffDataSet dataSet)
 {
     Guard.NotNull(() => dataSet, dataSet);
     return(ConstructInternal(dataSet.Documents.Select(item => (Func <string>)(() => (string)item.Class.Value))));
 }