public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>( new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() })); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); var last = sortedScoreMap.Last(); Assert.AreEqual("1", last.Value[0]); }
public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(new[] { new DocumentSample("1", new[] { "a", "b", "c" }), new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }) }); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory()); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); // first 0 // second 1 (last) foreach (var pair in sortedScoreMap) { Assert.AreEqual("0", pair.Value[0]); break; } }
public SentimentAnalyzeResult Analyze() { var model = Train(); var category = new DocumentCategorizerME(model); var evaluator = new DocumentCategorizerEvaluator(category); //var expectedDocumentCategory = "Movies"; var content = GetSourceText(); var sample = new DocumentSample("Call", content); var distribution = category.Categorize(content); var predictedCategory = category.GetBestCategory(distribution); using (var stream = new FileStream(Path.Combine(AppConfig.GetAppBasePath(), "en-sentiment.bin"), FileMode.Append)) SerializeHelper.Serialize(stream, model); return(predictedCategory.ConvertRawResultAsSentimentResult()); }