/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var manifestInfoEntries = new Dictionary <string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel meModel = null; ML.Model.ISequenceClassificationModel <string> scModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(), factory.CreateSequenceCodec()); var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); meModel = nfTrainer.Train(eventStream); break; case TrainerType.EventModelSequenceTrainer: var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); meModel = nsTrainer.Train(sampleStream); break; case TrainerType.SequenceTrainer: var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); scModel = sqTrainer.Train(sequenceStream); break; default: throw new InvalidOperationException("Unexpected trainer type!"); } if (scModel != null) { return(new TokenNameFinderModel( languageCode, scModel, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec)); } return(new TokenNameFinderModel( languageCode, meModel, beamSize, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec)); }
/// <summary> /// Trains a lemmatizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training /// operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="LemmatizerModel" /> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static LemmatizerModel Train(string languageCode, IObjectStream <LemmaSample> samples, TrainingParameters parameters, LemmatizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var cg = factory.GetContextGenerator(); var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } IMaxentModel model = null; ML.Model.ISequenceClassificationModel <string> seqModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var s1 = new LemmaSampleEventStream(samples, cg); var t1 = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); model = t1.Train(s1); break; case TrainerType.EventModelSequenceTrainer: var s2 = new LemmaSampleSequenceStream(samples, cg); var t2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); model = t2.Train(s2); break; case TrainerType.SequenceTrainer: var s3 = new LemmaSampleSequenceStream(samples, cg); var t3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); seqModel = t3.Train(s3); break; default: throw new NotSupportedException("Trainer type is not supported."); } return(model != null ? new LemmatizerModel(languageCode, model, beamSize, manifestInfoEntries, factory) : new LemmatizerModel(languageCode, seqModel, manifestInfoEntries, factory)); }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream <POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor) { //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize); var contextGenerator = factory.GetPOSContextGenerator(); var manifestInfoEntries = new Dictionary <string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel posModel = null; ML.Model.ISequenceClassificationModel <string> seqPosModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var es = new POSSampleEventStream(samples, contextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); posModel = trainer.Train(es); break; case TrainerType.EventModelSequenceTrainer: var ss = new POSSampleSequenceStream(samples, contextGenerator); var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); posModel = trainer2.Train(ss); break; case TrainerType.SequenceTrainer: var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss2 = new POSSampleSequenceStream(samples, contextGenerator); seqPosModel = trainer3.Train(ss2); break; default: throw new NotSupportedException("Trainer type is not supported."); } if (posModel != null) { return(new POSModel(languageCode, posModel, manifestInfoEntries, factory)); } return(new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory)); }
/// <summary> /// Trains a chunker model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>The trained <see cref="ChunkerModel"/> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ChunkerModel Train(string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory, Monitor monitor) { var trainerType = TrainerFactory.GetTrainerType(parameters); if (!trainerType.HasValue) { throw new InvalidOperationException("The trainer was not specified."); } var manifestInfoEntries = new Dictionary <string, string>(); IMaxentModel chunkerModel = null; ML.Model.ISequenceClassificationModel <string> seqChunkerModel = null; switch (trainerType) { case TrainerType.SequenceTrainer: var st = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss = new ChunkSampleSequenceStream(samples, factory.GetContextGenerator()); seqChunkerModel = st.Train(ss); break; case TrainerType.EventModelTrainer: var es = new ChunkerEventStream(samples, factory.GetContextGenerator()); var et = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); chunkerModel = et.Train(es); break; default: throw new NotSupportedException("Trainer type is not supported."); } var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); return(chunkerModel != null ? new ChunkerModel(languageCode, chunkerModel, beamSize, manifestInfoEntries, factory) : new ChunkerModel(languageCode, seqChunkerModel, manifestInfoEntries, factory)); }