/// <summary> /// Initializes a new instance of the <see cref="DocumentCategorizerME"/> with a document categorizer model. /// The default feature generation will be used. /// </summary> /// <param name="model">The model.</param> /// <exception cref="System.ArgumentNullException">model</exception> public DocumentCategorizerME(DocumentCategorizerModel model) { if (model == null) throw new ArgumentNullException("model"); cg = new DocumentCategorizerContextGenerator(model.Factory.FeatureGenerators); this.model = model; }
/// <summary> /// Initializes a new instance of the <see cref="DocumentCategorizerME"/> with a document categorizer model. /// The default feature generation will be used. /// </summary> /// <param name="model">The model.</param> /// <exception cref="System.ArgumentNullException">model</exception> public DocumentCategorizerME(DocumentCategorizerModel model) { if (model == null) { throw new ArgumentNullException("model"); } cg = new DocumentCategorizerContextGenerator(model.Factory.FeatureGenerators); this.model = model; }
public void TestDefault() { var model = Train(); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.NotNull(deserialized); Assert.NotNull(deserialized.Factory); Assert.AreEqual(1, deserialized.Factory.FeatureGenerators.Length); Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType()); Assert.AreEqual(typeof (WhitespaceTokenizer), deserialized.Factory.Tokenizer.GetType()); } }
public void TestCustom() { var featureGenerators = new IFeatureGenerator[] { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator() }; var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators); var model = Train(factory); Assert.NotNull(model); using (var data = new MemoryStream()) { model.Serialize(new UnclosableStream(data)); data.Seek(0, SeekOrigin.Begin); var deserialized = new DocumentCategorizerModel(data); Assert.NotNull(deserialized); Assert.NotNull(deserialized.Factory); Assert.AreEqual(2, deserialized.Factory.FeatureGenerators.Length); Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType()); Assert.AreEqual(typeof (NGramFeatureGenerator), deserialized.Factory.FeatureGenerators[1].GetType()); Assert.AreEqual(typeof (SimpleTokenizer), deserialized.Factory.Tokenizer.GetType()); } }