public void TestPOSModelSerializationMaxent() { var posModel = POSTaggerMETest.TrainPOSModel(); using (var stream = new MemoryStream()) { posModel.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var recreated = new POSModel(stream); Assert.AreEqual(posModel.Language, recreated.Language); Assert.AreEqual(posModel.Manifest, recreated.Manifest); Assert.AreEqual(posModel.PosSequenceModel.GetType(), recreated.PosSequenceModel.GetType()); Assert.AreEqual(posModel.Factory.GetType(), recreated.Factory.GetType()); } }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model /// and the default beam size of 3. /// </summary> /// <param name="model">The model.</param> public POSTaggerME(POSModel model) { if (model == null) throw new ArgumentNullException("model"); var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize); size = beamSize; modelPackage = model; TagDictionary = model.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(beamSize); SequenceValidator = model.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel ?? new ML.BeamSearch<string>(beamSize, model.MaxentModel, 0); }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model /// and the default beam size of 3. /// </summary> /// <param name="model">The model.</param> public POSTaggerME(POSModel model) { if (model == null) { throw new ArgumentNullException("model"); } var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize); size = beamSize; modelPackage = model; TagDictionary = model.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(beamSize); SequenceValidator = model.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel ?? new ML.BeamSearch <string>(beamSize, model.MaxentModel, 0); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException("modelType", "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME" /> with the provided /// model and provided beam size. /// </summary> /// <param name="model">The model.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="cacheSize">Size of the cache.</param> /// <exception cref="System.ArgumentNullException"><paramref name="model"/></exception> /// <exception cref="System.InvalidOperationException">Unable to retrieve the model.</exception> public POSTaggerME(POSModel model, int beamSize, int cacheSize) { if (model == null) { throw new ArgumentNullException("model"); } size = beamSize; modelPackage = model; TagDictionary = modelPackage.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(cacheSize); SequenceValidator = modelPackage.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel; if (this.model == null) { throw new InvalidOperationException("Unable to retrieve the model."); } }
public POSTaggerME(POSModel model, int beamSize, int cacheSize) { if (model == null) throw new ArgumentNullException("model"); size = beamSize; modelPackage = model; TagDictionary = modelPackage.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(cacheSize); SequenceValidator = modelPackage.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel; if (this.model == null) { throw new InvalidOperationException("Unable to retrieve the model."); } }
public ParserModel UpdateTaggerModel(POSModel taggerModel) { return new ParserModel(Language, BuildModel, CheckModel, AttachModel, taggerModel, ParserChunkerModel, HeadRules, ParserType); }
/// <summary> /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions. /// </summary> /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" /> /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register /// the proper serialization/deserialization methods for an new extension. /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks> protected override void CreateArtifactSerializers() { base.CreateArtifactSerializers(); // note from OpenNLP (for future adaptations) // In 1.6.x the headrules artifact is serialized with the new API // which uses the Serializable interface // This change is not backward compatible with the 1.5.x models. // In order to load 1.5.x model the English headrules serializer must be // put on the serializer map. RegisterArtifactType(".headrules", (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream), stream => HeadRulesManager.Deserialize(Language, stream)); RegisterArtifactType(".postagger", (artifact, stream) => { var model = artifact as POSModel; if (model == null) throw new InvalidOperationException(); model.Serialize(stream); }, stream => { var model = new POSModel(stream); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize)) return new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory); return model; }); RegisterArtifactType(".chunker", (artifact, stream) => { var model = artifact as ChunkerModel; if (model == null) throw new InvalidOperationException(); model.Serialize(stream); }, stream => { var model = new ChunkerModel(stream); if (model.Version.Major == 1 && model.Version.Minor == 5) { return new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory()); } return model; }); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, Dictionary<string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, ParserType.Chunking, manifestInfoEntries) { }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model /// and the default beam size of 3. /// </summary> /// <param name="model">The model.</param> public POSTaggerME(POSModel model) : this(model, DefaultBeamSize, 0) { }