public static POSModel TrainModel(string path, ModelType mt) { FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read); WordTagSampleStream stream = new WordTagSampleStream(fs); TrainingParameters trainParams = new TrainingParameters(); trainParams.Set(Parameters.Iterations, "100"); trainParams.Set(Parameters.Cutoff, "0"); switch (mt) { case ModelType.Maxent: trainParams.Set(Parameters.Algorithm, "MAXENT"); break; case ModelType.Perceptron: trainParams.Set(Parameters.Algorithm, "PERCEPTRON"); break; default: throw new NotSupportedException(); } return(POSTaggerME.Train(TRAINING_LANGUAGE, stream, trainParams, new POSTaggerFactory())); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { var dict = BuildDictionary(samples, rules, parameters); samples.Reset(); var manifestInfoEntries = new Dictionary <string, string>(); // build //System.err.println("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict); var buildReportMap = new Dictionary <string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // tag var posTaggerParams = parameters.GetNamespace("tagger"); if (!posTaggerParams.Contains(Parameters.BeamSize)) { posTaggerParams.Set(Parameters.BeamSize, "10"); } var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory()); samples.Reset(); // chunk var chunkModel = ChunkerME.Train(languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory()); samples.Reset(); // check //System.err.println("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary <string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries)); }
internal static POSModel TrainPosModel(ModelType type = ModelType.Maxent) { var p = new TrainingParameters(); switch (type) { case ModelType.Maxent: p.Set(Parameters.Algorithm, "MAXENT"); break; case ModelType.Perceptron: p.Set(Parameters.Algorithm, "PERCEPTRON"); break; default: throw new NotSupportedException(); } p.Set(Parameters.Iterations, "100"); p.Set(Parameters.Cutoff, "5"); return(POSTaggerME.Train("en", CreateSampleStream(), p, new POSTaggerFactory())); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); System.Diagnostics.Debug.Print("Building dictionary"); var dictionary = BuildDictionary(samples, rules, parameters); samples.Reset(); // tag var posModel = POSTaggerME.Train( languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory(), monitor); samples.Reset(); // chunk var chunkModel = ChunkerME.Train( languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ChunkerFactory(), monitor); samples.Reset(); // build System.Diagnostics.Debug.Print("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary); var buildReportMap = new Dictionary <string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // check System.Diagnostics.Debug.Print("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary <string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); samples.Reset(); // attach System.Diagnostics.Debug.Print("Training attacher"); var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach); var attachReportMap = new Dictionary <string, string>(); var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor); var attachModel = attachTrainer.Train(attachEvents); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach"); return(new ParserModel( languageCode, buildModel, checkModel, attachModel, posModel, chunkModel, rules, ParserType.TreeInsert, manifestInfoEntries)); }