/// <summary> /// Initializes a new instance of the <see cref="ChunkerME"/> with the specified <see cref="ChunkerModel"/>. /// </summary> /// <param name="model">The chunker model.</param> public ChunkerME(ChunkerModel model) { contextGenerator = model.Factory.GetContextGenerator(); sequenceValidator = model.Factory.GetSequenceValidator(); this.model = model.ChunkerSequenceModel ?? new BeamSearch(model.BeamSize, model.MaxentModel); }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var manifestInfoEntries = new Dictionary <string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel meModel = null; ML.Model.ISequenceClassificationModel <string> scModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(), factory.CreateSequenceCodec()); var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); meModel = nfTrainer.Train(eventStream); break; case TrainerType.EventModelSequenceTrainer: var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); meModel = nsTrainer.Train(sampleStream); break; case TrainerType.SequenceTrainer: var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); scModel = sqTrainer.Train(sequenceStream); break; default: throw new InvalidOperationException("Unexpected trainer type!"); } if (scModel != null) { return(new TokenNameFinderModel( languageCode, scModel, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec)); } return(new TokenNameFinderModel( languageCode, meModel, beamSize, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec)); }
/// <summary> /// Initializes the current instance with the specified model and the specified beam size. /// </summary> /// <param name="model">The model for this chunker</param> /// <param name="beamSize">The size of the beam that should be used when decoding sequences.</param> /// <param name="sequenceValidator">The <see cref="ISequenceValidator{String}"/> to determines whether the outcome is valid for the preceding sequence. This can be used to implement constraints on what sequences are valid..</param> /// <param name="contextGenerator">The context generator.</param> internal ChunkerME(ChunkerModel model, int beamSize, ISequenceValidator<string> sequenceValidator, IChunkerContextGenerator contextGenerator) { // This method is marked as deprecated in the OpenNLP, but it is required in the Parser, // I could change the cg in the factory, but its not ideal in this situation (i think) :P this.sequenceValidator = sequenceValidator; this.contextGenerator = contextGenerator; this.model = model.ChunkerSequenceModel ?? new BeamSearch(beamSize, model.MaxentModel); }
/// <summary> /// Initializes the current instance with the specified model and the specified beam size. /// </summary> /// <param name="model">The model for this chunker</param> /// <param name="beamSize">The size of the beam that should be used when decoding sequences.</param> /// <param name="sequenceValidator">The <see cref="ISequenceValidator{String}"/> to determines whether the outcome is valid for the preceding sequence. This can be used to implement constraints on what sequences are valid..</param> /// <param name="contextGenerator">The context generator.</param> internal ChunkerME(ChunkerModel model, int beamSize, ISequenceValidator <string> sequenceValidator, IChunkerContextGenerator contextGenerator) { // This method is marked as deprecated in the OpenNLP, but it is required in the Parser, // I could change the cg in the factory, but its not ideal in this situation (i think) :P this.sequenceValidator = sequenceValidator; this.contextGenerator = contextGenerator; this.model = model.ChunkerSequenceModel ?? new BeamSearch(beamSize, model.MaxentModel); }
public LemmatizerME(LemmatizerModel model) { if (model == null) { throw new ArgumentNullException(nameof(model)); } contextGenerator = model.Factory.GetContextGenerator(); sequenceValidator = model.Factory.GetSequenceValidator(); // Knuppe: In the original implementation there is condition to recreate the beamsearch object, but // the condition is impossible to occur, due to the getLemmatizerSequenceModel() method logic this.model = model.LemmatizerSequenceModel; }
/// <summary> /// Trains a lemmatizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training /// operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="LemmatizerModel" /> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static LemmatizerModel Train(string languageCode, IObjectStream <LemmaSample> samples, TrainingParameters parameters, LemmatizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var cg = factory.GetContextGenerator(); var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } IMaxentModel model = null; ML.Model.ISequenceClassificationModel <string> seqModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var s1 = new LemmaSampleEventStream(samples, cg); var t1 = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); model = t1.Train(s1); break; case TrainerType.EventModelSequenceTrainer: var s2 = new LemmaSampleSequenceStream(samples, cg); var t2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); model = t2.Train(s2); break; case TrainerType.SequenceTrainer: var s3 = new LemmaSampleSequenceStream(samples, cg); var t3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); seqModel = t3.Train(s3); break; default: throw new NotSupportedException("Trainer type is not supported."); } return(model != null ? new LemmatizerModel(languageCode, model, beamSize, manifestInfoEntries, factory) : new LemmatizerModel(languageCode, seqModel, manifestInfoEntries, factory)); }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream <POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor) { //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize); var contextGenerator = factory.GetPOSContextGenerator(); var manifestInfoEntries = new Dictionary <string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel posModel = null; ML.Model.ISequenceClassificationModel <string> seqPosModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var es = new POSSampleEventStream(samples, contextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); posModel = trainer.Train(es); break; case TrainerType.EventModelSequenceTrainer: var ss = new POSSampleSequenceStream(samples, contextGenerator); var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); posModel = trainer2.Train(ss); break; case TrainerType.SequenceTrainer: var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss2 = new POSSampleSequenceStream(samples, contextGenerator); seqPosModel = trainer3.Train(ss2); break; default: throw new NotSupportedException("Trainer type is not supported."); } if (posModel != null) { return(new POSModel(languageCode, posModel, manifestInfoEntries, factory)); } return(new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory)); }
/// <summary> /// Trains a chunker model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>The trained <see cref="ChunkerModel"/> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor) { var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } var manifestInfoEntries = new Dictionary <string, string>(); IMaxentModel chunkerModel = null; ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null; switch (trainerType) { case TrainerType.SequenceTrainer: var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator()); seqChunkerModel = st.Train(ss); break; case TrainerType.EventModelTrainer: var es = new ChunkerEventStream(samples, factory.GetContextGenerator()); var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); chunkerModel = et.Train(es); break; default: throw new NotSupportedException("Trainer type is not supported."); } var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); return(chunkerModel != null ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory) : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory)); }
/// <summary> /// Initializes a new instance of the <see cref="NameFinderME"/> using the given <see cref="TokenNameFinderModel"/>. /// </summary> /// <param name="model">The model.</param> /// <exception cref="System.ArgumentNullException">model</exception> public NameFinderME(TokenNameFinderModel model) { if (model == null) { throw new ArgumentNullException(nameof(model)); } sequenceCodec = model.Factory.CreateSequenceCodec(); sequenceValidator = sequenceCodec.CreateSequenceValidator(); this.model = model.NameFinderSequenceModel; contextGenerator = model.Factory.CreateContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator(); contextGenerator.AddFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model /// and the default beam size of 3. /// </summary> /// <param name="model">The model.</param> public POSTaggerME(POSModel model) { if (model == null) { throw new ArgumentNullException("model"); } var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize); size = beamSize; modelPackage = model; TagDictionary = model.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(beamSize); SequenceValidator = model.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel ?? new ML.BeamSearch <string>(beamSize, model.MaxentModel, 0); }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME" /> with the provided /// model and provided beam size. /// </summary> /// <param name="model">The model.</param> /// <param name="beamSize">Size of the beam.</param> /// <param name="cacheSize">Size of the cache.</param> /// <exception cref="System.ArgumentNullException"><paramref name="model"/></exception> /// <exception cref="System.InvalidOperationException">Unable to retrieve the model.</exception> public POSTaggerME(POSModel model, int beamSize, int cacheSize) { if (model == null) { throw new ArgumentNullException("model"); } size = beamSize; modelPackage = model; TagDictionary = modelPackage.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(cacheSize); SequenceValidator = modelPackage.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel; if (this.model == null) { throw new InvalidOperationException("Unable to retrieve the model."); } }
/// <summary> /// Initializes a new instance of the <see cref="POSTaggerME"/> with the provided model /// and the default beam size of 3. /// </summary> /// <param name="model">The model.</param> public POSTaggerME(POSModel model) { if (model == null) throw new ArgumentNullException("model"); var beamSize = model.Manifest.Get(Parameters.BeamSize, DefaultBeamSize); size = beamSize; modelPackage = model; TagDictionary = model.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(beamSize); SequenceValidator = model.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel ?? new ML.BeamSearch<string>(beamSize, model.MaxentModel, 0); }
public POSTaggerME(POSModel model, int beamSize, int cacheSize) { if (model == null) throw new ArgumentNullException("model"); size = beamSize; modelPackage = model; TagDictionary = modelPackage.Factory.TagDictionary; ContextGenerator = model.Factory.GetPOSContextGenerator(cacheSize); SequenceValidator = modelPackage.Factory.GetSequenceValidator(); this.model = model.PosSequenceModel; if (this.model == null) { throw new InvalidOperationException("Unable to retrieve the model."); } }
/// <summary> /// Initializes a new instance of the <see cref="NameFinderME"/> using the given <see cref="TokenNameFinderModel"/>. /// </summary> /// <param name="model">The model.</param> /// <exception cref="System.ArgumentNullException">model</exception> public NameFinderME(TokenNameFinderModel model) { if (model == null) throw new ArgumentNullException("model"); sequenceCodec = model.Factory.CreateSequenceCodec(); sequenceValidator = sequenceCodec.CreateSequenceValidator(); this.model = model.NameFinderSequenceModel; contextGenerator = model.Factory.CreateContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator(); contextGenerator.AddFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }