private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) {
     return DocumentCategorizerME.Train(
         "x-unspecified",
         CreateSampleStream(),
         TrainingParameters.DefaultParameters(),
         factory ?? new DocumentCategorizerFactory());
 }
        public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }));

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");
            param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes);

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() }));

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            var last = sortedScoreMap.Last();

            Assert.AreEqual("1", last.Value[0]);
        }
Example #3
0
        public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(new[] {
                new DocumentSample("1", new[] { "a", "b", "c" }),
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })
            });

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory());

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            // first 0
            // second 1 (last)
            foreach (var pair in sortedScoreMap)
            {
                Assert.AreEqual("0", pair.Value[0]);
                break;
            }
        }
 public DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null)
 {
     return(DocumentCategorizerME.Train("en", GetSentimentModelStream(),
                                        TrainingParameters.DefaultParameters(),
                                        factory != null ? factory : new DocumentCategorizerFactory()));
 }