public void TestCustom() {
            var featureGenerators = new IFeatureGenerator[] {
                new BagOfWordsFeatureGenerator(),
                new NGramFeatureGenerator()
            };
            var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators);
            var model = Train(factory);

            Assert.NotNull(model);

            using (var data = new MemoryStream()) {
                model.Serialize(new UnclosableStream(data));

                data.Seek(0, SeekOrigin.Begin);

                var deserialized = new DocumentCategorizerModel(data);

                Assert.NotNull(deserialized);
                Assert.NotNull(deserialized.Factory);

                Assert.AreEqual(2, deserialized.Factory.FeatureGenerators.Length);
                Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType());
                Assert.AreEqual(typeof (NGramFeatureGenerator), deserialized.Factory.FeatureGenerators[1].GetType());

                Assert.AreEqual(typeof (SimpleTokenizer), deserialized.Factory.Tokenizer.GetType());
            }
        }
        public void TestCustom()
        {
            var featureGenerators = new IFeatureGenerator[] {
                new BagOfWordsFeatureGenerator(),
                new NGramFeatureGenerator(),
                new NGramFeatureGenerator(2, 3)
            };
            var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators);
            var model   = Train(factory);

            Assert.NotNull(model);

            using (var data = new MemoryStream()) {
                model.Serialize(new UnclosableStream(data));

                data.Seek(0, SeekOrigin.Begin);

                var deserialized = new DocumentCategorizerModel(data);

                Assert.That(deserialized, Is.Not.Null);
                Assert.That(deserialized.Factory, Is.Not.Null);

                Assert.That(deserialized.Factory.FeatureGenerators.Length, Is.EqualTo(3));
                Assert.That(deserialized.Factory.FeatureGenerators[0], Is.InstanceOf <BagOfWordsFeatureGenerator>());
                Assert.That(deserialized.Factory.FeatureGenerators[1], Is.InstanceOf <NGramFeatureGenerator>());
                Assert.That(deserialized.Factory.FeatureGenerators[2], Is.InstanceOf <NGramFeatureGenerator>());

                Assert.That(deserialized.Factory.Tokenizer, Is.InstanceOf <SimpleTokenizer>());
            }
        }
 private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) {
     return DocumentCategorizerME.Train(
         "x-unspecified",
         CreateSampleStream(),
         TrainingParameters.DefaultParameters(),
         factory ?? new DocumentCategorizerFactory());
 }
 public DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null)
 {
     return(DocumentCategorizerME.Train("en", GetSentimentModelStream(),
                                        TrainingParameters.DefaultParameters(),
                                        factory != null ? factory : new DocumentCategorizerFactory()));
 }