Пример #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="DocumentCategorizerME"/> with a document categorizer model.
        /// The default feature generation will be used.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <exception cref="System.ArgumentNullException">model</exception>
        public DocumentCategorizerME(DocumentCategorizerModel model) {
            if (model == null)
                throw new ArgumentNullException("model");

            cg = new DocumentCategorizerContextGenerator(model.Factory.FeatureGenerators);

            this.model = model;
        }
Пример #2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="DocumentCategorizerME"/> with a document categorizer model.
        /// The default feature generation will be used.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <exception cref="System.ArgumentNullException">model</exception>
        public DocumentCategorizerME(DocumentCategorizerModel model)
        {
            if (model == null)
            {
                throw new ArgumentNullException("model");
            }

            cg = new DocumentCategorizerContextGenerator(model.Factory.FeatureGenerators);

            this.model = model;
        }
        public void TestDefault() {
            var model = Train();

            Assert.NotNull(model);


            using (var data = new MemoryStream()) {
                model.Serialize(new UnclosableStream(data));

                data.Seek(0, SeekOrigin.Begin);

                var deserialized = new DocumentCategorizerModel(data);

                Assert.NotNull(deserialized);
                Assert.NotNull(deserialized.Factory);

                Assert.AreEqual(1, deserialized.Factory.FeatureGenerators.Length);
                Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType());

                Assert.AreEqual(typeof (WhitespaceTokenizer), deserialized.Factory.Tokenizer.GetType());
            }
        }
        public void TestCustom() {
            var featureGenerators = new IFeatureGenerator[] {
                new BagOfWordsFeatureGenerator(),
                new NGramFeatureGenerator()
            };
            var factory = new DocumentCategorizerFactory(SimpleTokenizer.Instance, featureGenerators);
            var model = Train(factory);

            Assert.NotNull(model);

            using (var data = new MemoryStream()) {
                model.Serialize(new UnclosableStream(data));

                data.Seek(0, SeekOrigin.Begin);

                var deserialized = new DocumentCategorizerModel(data);

                Assert.NotNull(deserialized);
                Assert.NotNull(deserialized.Factory);

                Assert.AreEqual(2, deserialized.Factory.FeatureGenerators.Length);
                Assert.AreEqual(typeof (BagOfWordsFeatureGenerator), deserialized.Factory.FeatureGenerators[0].GetType());
                Assert.AreEqual(typeof (NGramFeatureGenerator), deserialized.Factory.FeatureGenerators[1].GetType());

                Assert.AreEqual(typeof (SimpleTokenizer), deserialized.Factory.Tokenizer.GetType());
            }
        }