Пример #1
0
        public void TestPOSModelSerializationMaxent() {
            var posModel = POSTaggerMETest.TrainPOSModel();

            using (var stream = new MemoryStream()) {
                posModel.Serialize(new UnclosableStream(stream));

                stream.Seek(0, SeekOrigin.Begin);

                var recreated = new POSModel(stream);

                Assert.AreEqual(posModel.Language, recreated.Language);
                Assert.AreEqual(posModel.Manifest, recreated.Manifest);
                Assert.AreEqual(posModel.PosSequenceModel.GetType(), recreated.PosSequenceModel.GetType());
                Assert.AreEqual(posModel.Factory.GetType(), recreated.Factory.GetType());
            }
        }
Пример #2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model
        /// and the default beam size of 3.
        /// </summary>
        /// <param name="model">The model.</param>
        public POSTaggerME(POSModel model) {

            if (model == null)
                throw new ArgumentNullException("model");

            var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize);

            size = beamSize;

            modelPackage = model;

            TagDictionary = model.Factory.TagDictionary;
            
            ContextGenerator = model.Factory.GetPOSContextGenerator(beamSize);
            SequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel ?? new ML.BeamSearch<string>(beamSize, model.MaxentModel, 0);
        }
Пример #3
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model
        /// and the default beam size of 3.
        /// </summary>
        /// <param name="model">The model.</param>
        public POSTaggerME(POSModel model)
        {
            if (model == null)
            {
                throw new ArgumentNullException("model");
            }

            var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize);

            size = beamSize;

            modelPackage = model;

            TagDictionary = model.Factory.TagDictionary;

            ContextGenerator  = model.Factory.GetPOSContextGenerator(beamSize);
            SequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel ?? new ML.BeamSearch <string>(beamSize, model.MaxentModel, 0);
        }
Пример #4
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) {

            switch (modelType) {
                case ParserType.Chunking:
                    if (attachModel != null)
                        throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel");

                    Manifest[ParserTypeParameter] = "CHUNKING";
                    break;
                case ParserType.TreeInsert:
                    if (attachModel == null)
                        throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                            "attachModel");

                    Manifest[ParserTypeParameter] = "TREEINSERT";

                    artifactMap[EntryAttachModel] = attachModel;

                    break;
                default:
                    throw new ArgumentOutOfRangeException("modelType", "Unknown model type");
            }

            artifactMap[EntryBuildModel] = buildModel;
            artifactMap[EntryCheckModel] = checkModel;
            artifactMap[EntryParserTaggerModel] = parserTagger;
            artifactMap[EntryChunkerTaggerModel] = chunkerTagger;
            artifactMap[EntryHeadRules] = headRules;

            CheckArtifactMap();
        }
Пример #5
0
        /// <summary>
        /// Initializes a new instance of the <see cref="POSTaggerME" /> with the provided
        /// model and provided beam size.
        /// </summary>
        /// <param name="model">The model.</param>
        /// <param name="beamSize">Size of the beam.</param>
        /// <param name="cacheSize">Size of the cache.</param>
        /// <exception cref="System.ArgumentNullException"><paramref name="model"/></exception>
        /// <exception cref="System.InvalidOperationException">Unable to retrieve the model.</exception>
        public POSTaggerME(POSModel model, int beamSize, int cacheSize)
        {
            if (model == null)
            {
                throw new ArgumentNullException("model");
            }


            size         = beamSize;
            modelPackage = model;

            TagDictionary     = modelPackage.Factory.TagDictionary;
            ContextGenerator  = model.Factory.GetPOSContextGenerator(cacheSize);
            SequenceValidator = modelPackage.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel;

            if (this.model == null)
            {
                throw new InvalidOperationException("Unable to retrieve the model.");
            }
        }
Пример #6
0
        public POSTaggerME(POSModel model, int beamSize, int cacheSize) {
            if (model == null)
                throw new ArgumentNullException("model");


            size = beamSize;
            modelPackage = model;

            TagDictionary = modelPackage.Factory.TagDictionary;
            ContextGenerator = model.Factory.GetPOSContextGenerator(cacheSize);
            SequenceValidator = modelPackage.Factory.GetSequenceValidator();

            this.model = model.PosSequenceModel;

            if (this.model == null) {
                throw new InvalidOperationException("Unable to retrieve the model.");
            }


        }
Пример #7
0
 public ParserModel UpdateTaggerModel(POSModel taggerModel) {
     return new ParserModel(Language, BuildModel, CheckModel, AttachModel, taggerModel, ParserChunkerModel, HeadRules, ParserType);
 }
Пример #8
0
        /// <summary>
        /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions.
        /// </summary>
        /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" />
        /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register
        /// the proper serialization/deserialization methods for an new extension.
        /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks>
        protected override void CreateArtifactSerializers() {
            base.CreateArtifactSerializers();
            // note from OpenNLP (for future adaptations)

            // In 1.6.x the headrules artifact is serialized with the new API
            // which uses the Serializable interface
            // This change is not backward compatible with the 1.5.x models.
            // In order to load 1.5.x model the English headrules serializer must be
            // put on the serializer map.

            RegisterArtifactType(".headrules",
                (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream),
                stream => HeadRulesManager.Deserialize(Language, stream));

            RegisterArtifactType(".postagger", (artifact, stream) => {
                var model = artifact as POSModel;
                if (model == null)
                    throw new InvalidOperationException();

                model.Serialize(stream);
            }, stream => {
                var model = new POSModel(stream);

                // The 1.6.x models write the non-default beam size into the model itself.
                // In 1.5.x the parser configured the beam size when the model was loaded,
                // this is not possible anymore with the new APIs
                if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize))
                    return new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory);

                return model;
            });

            RegisterArtifactType(".chunker", (artifact, stream) => {
                var model = artifact as ChunkerModel;
                if (model == null)
                    throw new InvalidOperationException();

                model.Serialize(stream);
            }, stream => {
                var model = new ChunkerModel(stream);

                if (model.Version.Major == 1 && model.Version.Minor == 5) {
                    return new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory());
                }

                return model;
            });
        }
Пример #9
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            Dictionary<string, string> manifestInfoEntries) : this(

                languageCode,
                buildModel,
                checkModel,
                null,
                parserTagger,
                chunkerTagger,
                headRules,
                ParserType.Chunking,
                manifestInfoEntries) {

        }
Пример #10
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType) : this(

                languageCode,
                buildModel,
                checkModel,
                attachModel,
                parserTagger,
                chunkerTagger,
                headRules,
                modelType,
                null) {

        }
Пример #11
0
 /// <summary>
 /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model
 /// and the default beam size of 3.
 /// </summary>
 /// <param name="model">The model.</param>
 public POSTaggerME(POSModel model) : this(model, DefaultBeamSize, 0)
 {
 }