public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>( new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() })); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); var last = sortedScoreMap.Last(); Assert.AreEqual("1", last.Value[0]); }
private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) { return DocumentCategorizerME.Train( "x-unspecified", CreateSampleStream(), TrainingParameters.DefaultParameters(), factory ?? new DocumentCategorizerFactory()); }
public SentimentAnalyzeResult Analyze() { var model = Train(); var category = new DocumentCategorizerME(model); var evaluator = new DocumentCategorizerEvaluator(category); //var expectedDocumentCategory = "Movies"; var content = GetSourceText(); var sample = new DocumentSample("Call", content); var distribution = category.Categorize(content); var predictedCategory = category.GetBestCategory(distribution); using (var stream = new FileStream(Path.Combine(AppConfig.GetAppBasePath(), "en-sentiment.bin"), FileMode.Append)) SerializeHelper.Serialize(stream, model); return(predictedCategory.ConvertRawResultAsSentimentResult()); }
public CategoryMappedDocumentCategorizerModel(DocumentCategorizerME categorizer, Dictionary <string, string> mappedCategories) { Categorizer = categorizer; MappedCategories = mappedCategories; Debug.Assert(Categorizer != null, "Categorizer cannot be null!"); Debug.Assert(MappedCategories != null, "Mapped categories cannot be null!"); Debug.Assert(MappedCategories.Count > 0, "Mapped categories must contain one or more entries!"); if (Categorizer == null) { throw new ArgumentException(nameof(categorizer), "Categorizer cannot be null!"); } if (MappedCategories == null) { throw new ArgumentException(nameof(mappedCategories), "Mapped categories cannot be null!"); } if (MappedCategories.Count <= 0) { throw new ArgumentException(nameof(categorizer), "Mapped categories must contain one or more entries!"); } var totalInternalCats = categorizer.getNumberOfCategories(); Debug.Assert(totalInternalCats == MappedCategories.Count, "Mapped categories have the same number of entries as it's corresponding categorizer!"); if (totalInternalCats != MappedCategories.Count) { var whichArgName = totalInternalCats < MappedCategories.Count ? nameof(totalInternalCats) : nameof(mappedCategories); throw new ArgumentException(nameof(MappedCategories), "Mapped categories have the same number of entries as it's corresponding categorizer!"); } for (int i = 0; i < totalInternalCats; ++i) { var internalCatName = Categorizer.getCategory(i); Debug.Assert(MappedCategories.ContainsKey(internalCatName) == true, "Found unmapped category!"); if (MappedCategories.ContainsKey(internalCatName) == false) { throw new ArgumentException(nameof(mappedCategories), "Found unmapped category!"); } } }
public override void run(string[] args) { if (0 == args.Length) { Console.WriteLine(Help); } else { DoccatModel model = (new DoccatModelLoader()).load(new File(args[0])); DocumentCategorizerME doccat = new DocumentCategorizerME(model); ObjectStream <string> documentStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput))); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc"); perfMon.start(); try { string document; while ((document = documentStream.read()) != null) { double[] prob = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document)); string category = doccat.getBestCategory(prob); DocumentSample sample = new DocumentSample(category, document); Console.WriteLine(sample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(new[] { new DocumentSample("1", new[] { "a", "b", "c" }), new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }) }); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory()); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); // first 0 // second 1 (last) foreach (var pair in sortedScoreMap) { Assert.AreEqual("0", pair.Value[0]); break; } }
public DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) { return(DocumentCategorizerME.Train("en", GetSentimentModelStream(), TrainingParameters.DefaultParameters(), factory != null ? factory : new DocumentCategorizerFactory())); }