public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>( new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() })); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); var last = sortedScoreMap.Last(); Assert.AreEqual("1", last.Value[0]); }
public void testSimpleTraining() { IObjectStream<DocumentSample> samples = new GenericObjectStream<DocumentSample>(new[] { new DocumentSample("1", new[] {"a", "b", "c"}), new DocumentSample("1", new[] {"a", "b", "c", "1", "2"}), new DocumentSample("1", new[] {"a", "b", "c", "3", "4"}), new DocumentSample("0", new[] {"x", "y", "z"}), new DocumentSample("0", new[] {"x", "y", "z", "5", "6"}), new DocumentSample("0", new[] {"x", "y", "z", "7", "8"}) }); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory()); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); foreach (var pair in sortedScoreMap) { Assert.AreEqual("1", pair.Value[0]); break; } }
public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(new[] { new DocumentSample("1", new[] { "a", "b", "c" }), new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }) }); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory()); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); // first 0 // second 1 (last) foreach (var pair in sortedScoreMap) { Assert.AreEqual("0", pair.Value[0]); break; } }