public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }));

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");
            param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes);

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() }));

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            var last = sortedScoreMap.Last();

            Assert.AreEqual("1", last.Value[0]);
        }
        public void testSimpleTraining() {

            IObjectStream<DocumentSample> samples = new GenericObjectStream<DocumentSample>(new[] {
                new DocumentSample("1", new[] {"a", "b", "c"}),
                new DocumentSample("1", new[] {"a", "b", "c", "1", "2"}),
                new DocumentSample("1", new[] {"a", "b", "c", "3", "4"}),
                new DocumentSample("0", new[] {"x", "y", "z"}),
                new DocumentSample("0", new[] {"x", "y", "z", "5", "6"}),
                new DocumentSample("0", new[] {"x", "y", "z", "7", "8"})
            });

            var param = new TrainingParameters();
            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory());

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");
            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            foreach (var pair in sortedScoreMap) {
                Assert.AreEqual("1", pair.Value[0]);
                break;   
            }
        }
示例#3
0
        public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(new[] {
                new DocumentSample("1", new[] { "a", "b", "c" }),
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })
            });

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory());

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            // first 0
            // second 1 (last)
            foreach (var pair in sortedScoreMap)
            {
                Assert.AreEqual("0", pair.Value[0]);
                break;
            }
        }