public void TestCustom() { var featureGenerators = new IFeatureGenerator[] { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator() }; var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators); var model = Train(factory); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.NotNull(deserialized); Assert.NotNull(deserialized.Factory); Assert.AreEqual(2, deserialized.Factory.FeatureGenerators.Length); Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType()); Assert.AreEqual(typeof (NGramFeatureGenerator), deserialized.Factory.FeatureGenerators[1].GetType()); Assert.AreEqual(typeof (SimpleTokenizer), deserialized.Factory.Tokenizer.GetType()); } }
public void TestCustom() { var featureGenerators = new IFeatureGenerator[] { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator(), new NGramFeatureGenerator(2, 3) }; var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators); var model = Train(factory); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.That(deserialized, Is.Not.Null); Assert.That(deserialized.Factory, Is.Not.Null); Assert.That(deserialized.Factory.FeatureGenerators.Length, Is.EqualTo(3)); Assert.That(deserialized.Factory.FeatureGenerators[0], Is.InstanceOf <BagOfWordsFeatureGenerator>()); Assert.That(deserialized.Factory.FeatureGenerators[1], Is.InstanceOf <NGramFeatureGenerator>()); Assert.That(deserialized.Factory.FeatureGenerators[2], Is.InstanceOf <NGramFeatureGenerator>()); Assert.That(deserialized.Factory.Tokenizer, Is.InstanceOf <SimpleTokenizer>()); } }
public void TestDefault() { var model = Train(); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.NotNull(deserialized); Assert.NotNull(deserialized.Factory); Assert.AreEqual(1, deserialized.Factory.FeatureGenerators.Length); Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType()); Assert.AreEqual(typeof (WhitespaceTokenizer), deserialized.Factory.Tokenizer.GetType()); } }
public void TestDefault() { var model = Train(); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.That(deserialized, Is.Not.Null); Assert.That(deserialized.Factory, Is.Not.Null); Assert.That(deserialized.Factory.FeatureGenerators.Length, Is.EqualTo(1)); Assert.That(deserialized.Factory.FeatureGenerators[0], Is.InstanceOf <BagOfWordsFeatureGenerator>()); Assert.That(deserialized.Factory.Tokenizer, Is.InstanceOf <WhitespaceTokenizer>()); } }