public void TestCustom() {
            var featureGenerators = new IFeatureGenerator[] {
                new BagOfWordsFeatureGenerator(),
                new NGramFeatureGenerator()
            };
            var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators);
            var model = Train(factory);

            Assert.NotNull(model);

            using (var data = new MemoryStream()) {
                model.Serialize(new UnclosableStream(data));

                data.Seek(0, SeekOrigin.Begin);

                var deserialized = new DocumentCategorizerModel(data);

                Assert.NotNull(deserialized);
                Assert.NotNull(deserialized.Factory);

                Assert.AreEqual(2, deserialized.Factory.FeatureGenerators.Length);
                Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType());
                Assert.AreEqual(typeof (NGramFeatureGenerator), deserialized.Factory.FeatureGenerators[1].GetType());

                Assert.AreEqual(typeof (SimpleTokenizer), deserialized.Factory.Tokenizer.GetType());
            }
        }
 private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) {
     return DocumentCategorizerME.Train(
         "x-unspecified",
         CreateSampleStream(),
         TrainingParameters.DefaultParameters(),
         factory ?? new DocumentCategorizerFactory());
 }
Exemple #3
0
 /// <summary>
 /// Trains document categorizer model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <param name="factory">The document categorizer factory.</param>
 /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
 public static DocumentCategorizerModel Train(
     string languageCode,
     IObjectStream <DocumentSample> samples,
     TrainingParameters parameters,
     DocumentCategorizerFactory factory)
 {
     return(Train(languageCode, samples, parameters, factory, null));
 }
        /// <summary>
        /// Trains document categorizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The document categorizer factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
        public static DocumentCategorizerModel Train(string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) {

            var manifestInfoEntries = new Dictionary<string, string>();

            var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators);
            var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);
            var model = trainer.Train(eventStream);

            return new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory);
        }
        /// <summary>
        /// Trains document categorizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The document categorizer factory.</param>
        /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
        public static DocumentCategorizerModel Train(
            string languageCode,
            IObjectStream<DocumentSample> samples,
            TrainingParameters parameters,
            DocumentCategorizerFactory factory) {

            return Train(languageCode, samples, parameters, factory, null);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="DocumentCategorizerModel"/> with the default parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="doccatModel">The doccat model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <param name="factory">The factory.</param>
        public DocumentCategorizerModel(string languageCode, IMaxentModel doccatModel, Dictionary<string, string> manifestInfoEntries, DocumentCategorizerFactory factory)
            : base(ComponentName, languageCode, manifestInfoEntries, factory) {

            artifactMap.Add(DoccatEntry, doccatModel);
            CheckArtifactMap();
        }
Exemple #7
0
 /// <summary>
 /// Initializes a new instance of the <see cref="DocumentCategorizerModel"/> with the default parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="doccatModel">The doccat model.</param>
 /// <param name="manifestInfoEntries">The manifest information entries.</param>
 /// <param name="factory">The factory.</param>
 public DocumentCategorizerModel(string languageCode, IMaxentModel doccatModel, Dictionary <string, string> manifestInfoEntries, DocumentCategorizerFactory factory)
     : base(ComponentName, languageCode, manifestInfoEntries, factory)
 {
     artifactMap.Add(DoccatEntry, doccatModel);
     CheckArtifactMap();
 }
Exemple #8
0
        /// <summary>
        /// Trains document categorizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The document categorizer factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
        public static DocumentCategorizerModel Train(string languageCode, IObjectStream <DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();

            var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators);
            var trainer     = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);
            var model       = trainer.Train(eventStream);

            return(new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory));
        }