public void TestCustom() { var featureGenerators = new IFeatureGenerator[] { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator() }; var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators); var model = Train(factory); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.NotNull(deserialized); Assert.NotNull(deserialized.Factory); Assert.AreEqual(2, deserialized.Factory.FeatureGenerators.Length); Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType()); Assert.AreEqual(typeof (NGramFeatureGenerator), deserialized.Factory.FeatureGenerators[1].GetType()); Assert.AreEqual(typeof (SimpleTokenizer), deserialized.Factory.Tokenizer.GetType()); } }
private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) { return DocumentCategorizerME.Train( "x-unspecified", CreateSampleStream(), TrainingParameters.DefaultParameters(), factory ?? new DocumentCategorizerFactory()); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train( string languageCode, IObjectStream <DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory) { return(Train(languageCode, samples, parameters, factory, null)); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train(string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary<string, string>(); var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train( string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory) { return Train(languageCode, samples, parameters, factory, null); }
/// <summary> /// Initializes a new instance of the <see cref="DocumentCategorizerModel"/> with the default parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="doccatModel">The doccat model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="factory">The factory.</param> public DocumentCategorizerModel(string languageCode, IMaxentModel doccatModel, Dictionary<string, string> manifestInfoEntries, DocumentCategorizerFactory factory) : base(ComponentName, languageCode, manifestInfoEntries, factory) { artifactMap.Add(DoccatEntry, doccatModel); CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="DocumentCategorizerModel"/> with the default parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="doccatModel">The doccat model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <param name="factory">The factory.</param> public DocumentCategorizerModel(string languageCode, IMaxentModel doccatModel, Dictionary <string, string> manifestInfoEntries, DocumentCategorizerFactory factory) : base(ComponentName, languageCode, manifestInfoEntries, factory) { artifactMap.Add(DoccatEntry, doccatModel); CheckArtifactMap(); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train(string languageCode, IObjectStream <DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return(new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory)); }