public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }));

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");
            param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes);

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() }));

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            var last = sortedScoreMap.Last();

            Assert.AreEqual("1", last.Value[0]);
        }
示例#2
0
        public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(new[] {
                new DocumentSample("1", new[] { "a", "b", "c" }),
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })
            });

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory());

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            // first 0
            // second 1 (last)
            foreach (var pair in sortedScoreMap)
            {
                Assert.AreEqual("0", pair.Value[0]);
                break;
            }
        }
        public SentimentAnalyzeResult Analyze()
        {
            var model     = Train();
            var category  = new DocumentCategorizerME(model);
            var evaluator = new DocumentCategorizerEvaluator(category);
            //var expectedDocumentCategory = "Movies";
            var content           = GetSourceText();
            var sample            = new DocumentSample("Call", content);
            var distribution      = category.Categorize(content);
            var predictedCategory = category.GetBestCategory(distribution);

            using (var stream = new FileStream(Path.Combine(AppConfig.GetAppBasePath(), "en-sentiment.bin"), FileMode.Append))
                SerializeHelper.Serialize(stream, model);
            return(predictedCategory.ConvertRawResultAsSentimentResult());
        }