private void StartTrainButton_Click(object sender, RoutedEventArgs eventArgs) { if (Network == null || TrainingData == null) { MessageBox.Show("You didn't create network or get data points!\n"); } else { try { TrainingParameters p = GetParameters(); StandardTrainingObserver sno = Network.Dimensions == 2 ? new StandardTrainingObserver() : null; Network.Observer = sno; Network.Train(p); IErrorCalculator err = new QuantizationErrorCalculator() { DistanceCalculator = Network.DistanceCalculator }; SomInformation.Text += $"\nSom trained. Quantization error: {err.CalculateError(Network.Neurons, TrainingData):F} "; if (sno != null) { DisplayIfTwoDimensional(sno); } } catch (Exception e) { MessageBox.Show(e.Message); } } }
/// <summary> /// save specific model into final model location, delete temporary models and return model path /// </summary> /// <param name="trParams">Training parameters</param> /// <param name="tpl">Temporary stored model information</param> /// <returns></returns> private static string saveModel(TrainingParameters trParams, Tuple <double, double, string> tpl) { //extract file name from temp_models dir var tempModelsDir = Path.GetDirectoryName(tpl.Item3); var dirInfo = new DirectoryInfo(tempModelsDir); //folder for final model var finDir = trParams.ModelFinalLocation; //in case directory doesn't exist if (!Directory.Exists(finDir)) { Directory.CreateDirectory(finDir); } //copy best model to final location var fName = Path.GetFileName(tpl.Item3); var fullPath = Path.Combine(finDir, fName); File.Copy(tpl.Item3, fullPath); //delete temp_modes folder MLFactory.DeleteAllFiles(tempModelsDir); var bestModelPath = $"{MLFactory.m_MLModelFolder}\\{fName}";//return always relative path return(bestModelPath); }
private TrainingParameters GetParameters() { int epochs = Int32.Parse(TrainingEpochs.Text); int iterations = Int32.Parse(TrainingIterations.Text); TrainingParameters p = p = new KMeansTrainingParameters() { Epochs = epochs, TrainingData = TrainingData }; if (Network is SelfOrganizingMap) { int kmax = TrainingData.Count * epochs; double minLearningRate = double.Parse(TrainingLearningRateMin.Text, CultureInfo.InvariantCulture); double maxLearningRate = double.Parse(TrainingLearningRateMax.Text, CultureInfo.InvariantCulture); p = new SomTrainingParameters() { LearningRate = new DeclineExponentially(kmax, minLearningRate, maxLearningRate), TirednessMechanism = GetTiredMechanism(), NeighbourhoodFunction = GetNeighbourhoodFunction(kmax), NumberOfIterations = iterations, TrainingData = TrainingData }; } return(p); }
private void UpdateWeightsAdam(TrainingParameters trainingParameters, int timeStep) { int numberOfWeightMatrices = _weights.Count; for (int i = 0; i < numberOfWeightMatrices; i++) { int numberOfWeightRows = _weights[i].GetLength(0); int numberOfWeightColumns = _weights[i].GetLength(1); for (int j = 0; j < numberOfWeightRows; j++) { for (int k = 0; k < numberOfWeightColumns; k++) { timeStep += 1; _m = _Beta1 * _m + (1 - _Beta1) * _deltaWeightMatrices[i][j, k]; double mt = _m / (1 - Math.Pow(_Beta1, timeStep)); _v = _Beta2 * _v + (1 - _Beta2) * Math.Pow(_deltaWeightMatrices[i][j, k], 2); double vt = _v / (1 - Math.Pow(_Beta2, timeStep)); var deltaWeight = trainingParameters.learningRate * (mt / Math.Sqrt(vt + _epsilon)); _weights[i][j, k] -= deltaWeight + _previousDeltaWeights[i][j, k]; _previousDeltaWeights[i][j, k] = deltaWeight; } } } }
public void TestEverything() { using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) { var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); var sdFactory = new SentenceDetectorFactory("en", true, null, null); var stream = new SentenceSampleStream(new PlainTextByLineStream(file)); var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams); Assert.AreEqual("en", model.Language); Assert.AreEqual(model.UseTokenEnd, true); var sMe = new SentenceDetectorME(model); // test the SharpNL sentences SentenceDetectorMETest.EvalSentences(sMe); var sFile = Path.GetTempFileName(); model.Serialize(new FileStream(sFile, FileMode.Create)); var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile)); var jMe = new JavaSDME(jModel2); // test the Java OpenNLP sentences. JavaEvalSentences(jMe); // first try?! Yes! ;-) } }
private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) { return DocumentCategorizerME.Train( "x-unspecified", CreateSampleStream(), TrainingParameters.DefaultParameters(), factory ?? new DocumentCategorizerFactory()); }
public void TestPerceptronOnPrepAttachDataWithStepSizeDecrease() { var trainParams = new TrainingParameters(); trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron); trainParams.Set(Parameters.Cutoff, "1"); trainParams.Set(Parameters.Iterations, "500"); trainParams.Set(Parameters.StepSizeDecrease, "0.06"); var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null); var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream()); /* * The java test gives an error too, soo.... for now i'll assume that is correct :P * * java.lang.AssertionError: expected:<0.7756870512503095> but was:<0.7766773953948998> * at org.junit.Assert.fail(Assert.java:91) * at org.junit.Assert.failNotEquals(Assert.java:645) * at org.junit.Assert.assertEquals(Assert.java:441) * at org.junit.Assert.assertEquals(Assert.java:510) */ //PrepAttachDataUtility.TestModel(model, 0.7756870512503095); < OpenNLP value PrepAttachDataUtility.TestModel(model, 0.77742015350334237); }
public void TestSimpleTraining() { IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>( new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }), new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }), new DocumentSample("0", new[] { "x", "y", "z" }), new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }), new DocumentSample("0", new[] { "x", "y", "z", "7", "8" })); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() })); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); var last = sortedScoreMap.Last(); Assert.AreEqual("1", last.Value[0]); }
/// <summary> /// Determines whether the specified train parameters are valid. /// </summary> /// <param name="trainParams">The train parameters.</param> /// <returns><c>true</c> if the specified train parameters are valid; otherwise, <c>false</c>.</returns> public static bool IsValid(TrainingParameters trainParams) { if (!trainParams.IsValid()) { return(false); } var algorithmName = trainParams.Get(Parameters.Algorithm); if (!(builtInTrainers.ContainsKey(algorithmName) || GetTrainerType(trainParams) != null)) { return(false); } var dataIndexer = trainParams.Get(Parameters.DataIndexer); if (dataIndexer != null) { switch (dataIndexer) { case Parameters.DataIndexers.OnePass: case Parameters.DataIndexers.TwoPass: break; default: return(false); } } return(true); }
public static POSModel TrainModel(string path, ModelType mt) { FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read); WordTagSampleStream stream = new WordTagSampleStream(fs); TrainingParameters trainParams = new TrainingParameters(); trainParams.Set(Parameters.Iterations, "100"); trainParams.Set(Parameters.Cutoff, "0"); switch (mt) { case ModelType.Maxent: trainParams.Set(Parameters.Algorithm, "MAXENT"); break; case ModelType.Perceptron: trainParams.Set(Parameters.Algorithm, "PERCEPTRON"); break; default: throw new NotSupportedException(); } return(POSTaggerME.Train(TRAINING_LANGUAGE, stream, trainParams, new POSTaggerFactory())); }
/// <summary> /// Gets the sequence model trainer. /// </summary> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="reportMap">The report map.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The <see cref="ISequenceTrainer"/> trainer object.</returns> /// <exception cref="System.InvalidOperationException">Trainer type couldn't be determined!</exception> public static ISequenceTrainer GetSequenceModelTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor) { var trainerType = parameters.Get(Parameters.Algorithm); ISequenceTrainer trainer = null; if (trainerType != null) { if (builtInTrainers.ContainsKey(trainerType)) { trainer = CreateBuiltinTrainer <ISequenceTrainer>(trainerType, monitor); } if (customTrainers.ContainsKey(trainerType)) { trainer = CreateCustomTrainer <ISequenceTrainer>(trainerType, monitor); } } if (trainer == null) { throw new InvalidOperationException("Trainer type couldn't be determined!"); } trainer.Init(parameters, reportMap); return(trainer); }
public void testSimpleTraining() { IObjectStream<DocumentSample> samples = new GenericObjectStream<DocumentSample>(new[] { new DocumentSample("1", new[] {"a", "b", "c"}), new DocumentSample("1", new[] {"a", "b", "c", "1", "2"}), new DocumentSample("1", new[] {"a", "b", "c", "3", "4"}), new DocumentSample("0", new[] {"x", "y", "z"}), new DocumentSample("0", new[] {"x", "y", "z", "5", "6"}), new DocumentSample("0", new[] {"x", "y", "z", "7", "8"}) }); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "100"); param.Set(Parameters.Cutoff, "0"); var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory()); var doccat = new DocumentCategorizerME(model); var aProbs = doccat.Categorize("a"); Assert.AreEqual("1", doccat.GetBestCategory(aProbs)); var bProbs = doccat.Categorize("x"); Assert.AreEqual("0", doccat.GetBestCategory(bProbs)); //test to make sure sorted map's last key is cat 1 because it has the highest score. var sortedScoreMap = doccat.SortedScoreMap("a"); foreach (var pair in sortedScoreMap) { Assert.AreEqual("1", pair.Value[0]); break; } }
//private string _filePath; public ArtificialNeuralNetwork(LayerStructure layerStructure, TrainingParameters trainingParameters, IDataSet dataSet, IOptimizationStrategy strategy, IInitialRandomDistributionType randomDistribution) { _layerStructure = layerStructure; _trainingParameters = trainingParameters; DataSet = dataSet; _strategy = strategy; _strategy.LayerStructure = layerStructure; _randomDistribution = randomDistribution; CreateDataSets(); SetWeights(); InitPreviousDeltaWeights(); SetBiases(); InitPreviousDeltaBiases(); _strategy.FetchInitialWeightsAndBiases(ref _weights, ref _biases); _strategy.FetchPreviousDeltaWeightsAndBiases(ref _previousDeltaWeights, ref _previousDeltaBiases); _savePath = Path.GetTempPath(); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="iterations">The number of training iterations.</param> /// <param name="cutoff">The min number of times a feature must be seen.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, int iterations, int cutoff) { var param = new TrainingParameters(); param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); return(Train(monitor, languageCode, samples, rules, param)); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { var dict = BuildDictionary(samples, rules, parameters); samples.Reset(); var manifestInfoEntries = new Dictionary <string, string>(); // build //System.err.println("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict); var buildReportMap = new Dictionary <string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // tag var posTaggerParams = parameters.GetNamespace("tagger"); if (!posTaggerParams.Contains(Parameters.BeamSize)) { posTaggerParams.Set(Parameters.BeamSize, "10"); } var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory()); samples.Reset(); // chunk var chunkModel = ChunkerME.Train(languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory()); samples.Reset(); // check //System.err.println("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary <string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries)); }
private ClassifierFacade RunTrainingForSelectedCategoriesImpl(TrainingParameters trainingParameters) { var categories = trainingParameters.SelectedCategories.ToArray(); var learningSet = _dataProvider.GetLearningSetForCategories(categories); var layers = _globalTrainerConfiguration.HiddenLayers.ToList(); int outputLayerSize = categories.Length; layers.Add(outputLayerSize); var trainer = new Trainer(new TrainerConfiguration { Layers = layers.ToArray(), InputsOutputsData = learningSet.TrainingData.ToInputOutputsDataNative(), }, _skipPhaseRequest, _guiLogger); trainer.RunTraining1(trainingParameters.Training1Parameters); trainer.RunTraining2(trainingParameters.Training2Parameters); trainer.CheckAccuracy(learningSet.TestData.ToInputOutputsDataNative()); var classifierConfiguration = new ClassifierConfiguration { Categories = categories }; var classifier = new Classifier(trainer.NeuralNetwork, classifierConfiguration, _guiLogger); var classifierFacade = new ClassifierFacade(_dataProvider, classifier); return(classifierFacade); }
private void UpdateBiasesAdam(TrainingParameters trainingParameters, int timeStep) { int numberOfBiasMatrices = _biases.Count; for (int i = 0; i < numberOfBiasMatrices; i++) { int numberOfBiasRows = _biases[i].GetLength(0); for (int j = 0; j < numberOfBiasRows; j++) { timeStep += 1; _m = _Beta1 * _m + (1 - _Beta1) * _deltaBiasMatrices[i][j]; double mt = _m / (1 - Math.Pow(_Beta1, timeStep)); _v = _Beta2 * _v + (1 - _Beta2) * Math.Pow(_deltaBiasMatrices[i][j], 2); double vt = _v / (1 - Math.Pow(_Beta2, timeStep)); var deltaBias = trainingParameters.learningRate * (mt / Math.Sqrt(vt + _epsilon)); _biases[i][j] -= deltaBias + _previousDeltaBiases[i][j]; _previousDeltaBiases[i][j] = deltaBias; } } }
/// <summary> /// Gets the event trainer. /// </summary> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="reportMap">The report map.</param> /// <param name="monitor">A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>The <see cref="IEventTrainer" /> trainer object.</returns> /// <exception cref="System.InvalidOperationException"> /// Unable to retrieve the trainer from the training parameters. /// or /// The constructor of the trainer must have a standard constructor. /// </exception> public static IEventTrainer GetEventTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor) { var algorithm = parameters.Get(Parameters.Algorithm); if (algorithm == null) { AbstractEventTrainer trainer = new GIS(monitor); trainer.Init(parameters, reportMap); return(trainer); } var trainerType = GetTrainerType(parameters); if (trainerType.HasValue && trainerType.Value == TrainerType.EventModelTrainer) { var type = GetTrainer(algorithm); if (type == null) { throw new InvalidOperationException("Unable to retrieve the trainer from the training parameters."); } var ctor = type.GetConstructor(new [] { typeof(Monitor) }); if (ctor == null) { throw new InvalidOperationException("The constructor of the trainer must have a standard constructor."); } var trainer = (IEventTrainer)ctor.Invoke(new object[] { monitor }); trainer.Init(parameters, reportMap); return(trainer); } return(null); }
public void TestOnlyWithNamesWithTypes() { using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file)); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary <string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = WhitespaceTokenizer.Instance.Tokenize( "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " + "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander"); var names = nameFinder.Find(sentence); Assert.AreEqual(new Span(0, 2, "person"), names[0]); Assert.AreEqual(new Span(2, 4, "person"), names[1]); Assert.AreEqual(new Span(4, 6, "person"), names[2]); Assert.True(!HasOtherAsOutcome(model)); } }
public void TestOnlyWithEntitiesWithTypes() { using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file)); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary <string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama"); var names = nameFinder.Find(sentence); Assert.AreEqual(new Span(0, 1, "organization"), names[0]); Assert.AreEqual(new Span(1, 3, "location"), names[1]); Assert.AreEqual(new Span(3, 5, "person"), names[2]); Assert.False(HasOtherAsOutcome(model)); } }
public void TestNameFinderWithMultipleTypes() { using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file)); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary <string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has", "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to", "show", "solidarity", "with", "the", "country", "'", "s", "president", "in", "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear", "weapons", "programs", "." }; var names = nameFinder.Find(sentence); Assert.AreEqual(4, names.Length); Assert.AreEqual(new Span(0, 4, "location"), names[0]); Assert.AreEqual(new Span(5, 7, "person"), names[1]); Assert.AreEqual(new Span(10, 12, "location"), names[2]); Assert.AreEqual(new Span(28, 30, "location"), names[3]); /* * These asserts are not needed because the equality comparer handles the Type * assertEquals("location", names1[0].getType()); * assertEquals("person", names1[1].getType()); * assertEquals("location", names1[2].getType()); * assertEquals("location", names1[3].getType()); */ sentence = new[] { "Scott", "Snyder", "is", "the", "director", "of", "the", "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "." }; names = nameFinder.Find(sentence); Assert.AreEqual(2, names.Length); Assert.AreEqual(new Span(0, 2, "person"), names[0]); Assert.AreEqual(new Span(7, 15, "organization"), names[1]); /* * * assertEquals("person", names2[0].getType()); * assertEquals("organization", names2[1].getType()); * */ } }
/// <summary> /// Trains a chunker model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ChunkerModel"/> object.</returns> /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ChunkerModel Train( string languageCode, IObjectStream <ChunkSample> samples, TrainingParameters parameters, ChunkerFactory factory) { return(Train(languageCode, samples, parameters, factory, null)); }
/// <summary> /// Initializes a new instance of the <see cref="ParserCrossEvaluator"/> class. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="parserType">The parser model type.</param> /// <param name="monitors">The training monitors.</param> /// <param name="headRules">The headrules.</param> public ParserCrossEvaluator(string languageCode, TrainingParameters parameters, AbstractHeadRules headRules, ParserType parserType, params IEvaluationMonitor <Parse>[] monitors) { this.languageCode = languageCode; this.parameters = parameters; this.headRules = headRules; this.parserType = parserType; this.monitors = monitors; }
public void Setup() { mlParams = new TrainingParameters(); mlParams.Set(Parameters.Algorithm, GIS.MaxEntropy); mlParams.Set(Parameters.Iterations, "10"); mlParams.Set(Parameters.Cutoff, "5"); TrainerFactory.RegisterTrainer("Dummy", typeof(DummyTrainer)); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { return(Train(languageCode, samples, rules, parameters, null)); }
/// <summary> /// Trains a name finder model. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train( string languageCode, IObjectStream <NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory) { return(Train(languageCode, DefaultType, samples, parameters, factory)); }
/// <summary> /// Trains sentence detection model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <returns>The trained <see cref="SentenceModel"/> object.</returns> public static SentenceModel Train( string languageCode, IObjectStream <SentenceSample> samples, SentenceDetectorFactory factory, TrainingParameters parameters) { return(Train(languageCode, samples, factory, parameters, null)); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train( string languageCode, IObjectStream <DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory) { return(Train(languageCode, samples, parameters, factory, null)); }
public void Setup() { var p = new TrainingParameters(); p.Set(Parameters.Iterations, "70"); p.Set(Parameters.Cutoff, "1"); var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory()); chunker = new ChunkerME(chunkerModel); }
/// <summary> /// Initializes a new instance of the <see cref="TokenNameFinderCrossValidator"/> class. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type"><c>null</c> or an override type for all types in the training data.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="listeners">The listeners.</param> public TokenNameFinderCrossValidator(string languageCode, string type, TrainingParameters parameters, params IEvaluationMonitor <NameSample>[] listeners) { this.languageCode = languageCode; this.type = type; this.parameters = parameters; this.listeners = listeners; factory = new TokenNameFinderFactory(); FMeasure = new FMeasure <Span>(); }
public static TokenizerModel CreateMaxentTokenModel() { using (var data = Tests.OpenFile("/opennlp/tools/tokenize/token.train")) { var samples = new TokenSampleStream(new PlainTextByLineStream(data)); var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); return TokenizerME.Train(samples, new TokenizerFactory("en", null, true), mlParams); } }
public void Setup() { mlParams = new TrainingParameters(); mlParams.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt); mlParams.Set(Parameters.Iterations, "10"); mlParams.Set(Parameters.Cutoff, "5"); TrainerFactory.RegisterTrainer("Dummy", typeof(DummyTrainer)); }
public static TokenizerModel CreateMaxentTokenModel() { using (var data = Tests.OpenFile("/opennlp/tools/tokenize/token.train")) { var samples = new TokenSampleStream(new PlainTextByLineStream(data)); var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); return(TokenizerME.Train(samples, new TokenizerFactory("en", null, true), mlParams)); } }
public void TestNameFinder() { using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1")); var param = new TrainingParameters(); param.Set(Parameters.Iterations, "70"); param.Set(Parameters.Cutoff, "1"); var model = NameFinderME.Train( "en", sampleStream, param, new TokenNameFinderFactory(null, new Dictionary<string, object>())); var nameFinder = new NameFinderME(model); // now test if it can detect the sample sentences var sentence = new[] { "Alisa", "appreciated", "the", "hint", "and", "enjoyed", "a", "delicious", "traditional", "meal." }; var names = nameFinder.Find(sentence); Assert.AreEqual(1, names.Length); Assert.AreEqual(new Span(0, 1, Type), names[0]); sentence = new[] { "Hi", "Mike", ",", "it's", "Stefanie", "Schmidt", "." }; names = nameFinder.Find(sentence); Assert.AreEqual(2, names.Length); Assert.AreEqual(new Span(1, 2, Type), names[0]); Assert.AreEqual(new Span(4, 6, Type), names[1]); } }
public void TestMaxentOnPrepAttachDataWithParamsDefault() { var reportMap = new Dictionary<string, string>(); var trainParams = new TrainingParameters(); trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt); var trainer = TrainerFactory.GetEventTrainer(trainParams, reportMap, null); var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream()); PrepAttachDataUtility.TestModel(model, 0.8086159940579352d); }
public void TestMaxentOnPrepAttachDataWithParams() { var reportMap = new Dictionary<string, string>(); var trainParams = new TrainingParameters(); trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt); trainParams.Set(Parameters.DataIndexer, Parameters.DataIndexers.TwoPass); trainParams.Set(Parameters.Cutoff, "1"); var trainer = TrainerFactory.GetEventTrainer(trainParams, reportMap, null); var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream()); PrepAttachDataUtility.TestModel(model, 0.7997028967566229d); }
/// <summary> /// Initializes a new instance of the <see cref="ChunkerCrossValidator"/> class. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="parameters">The parameters.</param> /// <param name="factory">The factory.</param> /// <param name="listeners">The listeners.</param> public ChunkerCrossValidator( string languageCode, TrainingParameters parameters, ChunkerFactory factory, params IEvaluationMonitor<ChunkSample>[] listeners) { chunkerFactory = factory; FMeasure = new FMeasure<Span>(); this.languageCode = languageCode; this.parameters = parameters; this.listeners = listeners; }
public void TestPerceptronOnPrepAttachDataWithTolerance() { var trainParams = new TrainingParameters(); trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron); trainParams.Set(Parameters.Cutoff, "1"); trainParams.Set(Parameters.Iterations, "500"); trainParams.Set(Parameters.Tolerance, "0.0001"); var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null); var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream()); PrepAttachDataUtility.TestModel(model, 0.7677642980935875); }
public void TestPerceptronOnPrepAttachDataWithSkippedAveraging() { var trainParams = new TrainingParameters(); trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron); trainParams.Set(Parameters.Cutoff, "1"); trainParams.Set(Parameters.UseSkippedAveraging, "true"); var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null); var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream()); PrepAttachDataUtility.TestModel(model, 0.773706362961129); }
public void TestWithNameEvaluationErrorListener() { using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) { var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1")); var parameters = new TrainingParameters(); parameters.Set(Parameters.Iterations, "70"); parameters.Set(Parameters.Cutoff, "1"); parameters.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt); var cv = new TokenNameFinderCrossValidator("en", Type, parameters, new NameEvaluationErrorListener()); cv.Evaluate(sampleStream, 2); Assert.NotNull(cv.FMeasure); } }
public RankScore(ActivationMethod activationMethod, Ms2DetectorType ms2DetectorType, Enzyme enzyme, Protocol protocol) { if (activationMethod == ActivationMethod.HCD && enzyme == Enzyme.Trypsin) { var paramFile = Properties.Resources.HCD_Trypsin; var stream = new MemoryStream(); var writer = new StreamWriter(stream); writer.Write(paramFile); writer.Flush(); stream.Position = 0; _trainingParameters = new TrainingParameters(stream); } else { throw new ArgumentException("No parameter file available for selected arguments."); } }
internal static POSModel TrainPOSModel(ModelType type = ModelType.Maxent) { var p = new TrainingParameters(); switch (type) { case ModelType.Maxent: p.Set(Parameters.Algorithm, "MAXENT"); break; case ModelType.Perceptron: p.Set(Parameters.Algorithm, "PERCEPTRON"); break; default: throw new NotSupportedException(); } p.Set(Parameters.Iterations, "100"); p.Set(Parameters.Cutoff, "5"); return POSTaggerME.Train("en", CreateSampleStream(), p, new POSTaggerFactory()); }
public void TestSentenceDetector() { using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) { var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); var sdFactory = new SentenceDetectorFactory("en", true, null, null); var stream = new SentenceSampleStream(new PlainTextByLineStream(file)); var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams); Assert.AreEqual("en", model.Language); Assert.AreEqual(model.UseTokenEnd, true); EvalSentences(new SentenceDetectorME(model)); } }
public static TokenizerModel CreateSimpleMaxentTokenModel() { var samples = new List<TokenSample> { new TokenSample("year", new[] {new Span(0, 4)}), new TokenSample("year,", new[] {new Span(0, 4), new Span(4, 5)}), new TokenSample("it,", new[] {new Span(0, 2), new Span(2, 3)}), new TokenSample("it", new[] {new Span(0, 2)}), new TokenSample("yes", new[] {new Span(0, 3)}), new TokenSample("yes,", new[] {new Span(0, 3), new Span(3, 4)}) }; var mlParams = new TrainingParameters(); mlParams.Set(Parameters.Iterations, "100"); mlParams.Set(Parameters.Cutoff, "0"); return TokenizerME.Train( new CollectionObjectStream<TokenSample>(samples), new TokenizerFactory("en", null, true), mlParams); }
public void AbbreviationDefaultBehaviorTest() { var samples = "Test E-mail met zowel letsel als 12. Toedracht in het onderwerp." + Environment.NewLine + "Dit is een 2e regel met een tel. 011-4441444 erin." + Environment.NewLine + "Dit is een 2e regel." + Environment.NewLine + "Dit is een 2e regel." + Environment.NewLine + Environment.NewLine + "Dit is een 2e regel met een tel. 033-1333123 erin!" + Environment.NewLine + "Test E-mail met zowel winst als 12. toedracht in het onderwerp." + Environment.NewLine + "Dit is een 2e regel!" + Environment.NewLine + "Dit is een 2e regel." + Environment.NewLine; var stringsToIgnoreDictionary = new SharpNL.Dictionary.Dictionary(false) { {"12. Toedracht"}, {"Tel."}, }; var trainingParameters = new TrainingParameters(); trainingParameters.Set(Parameters.Algorithm, "MAXENT"); trainingParameters.Set(Parameters.TrainerType, "Event"); trainingParameters.Set(Parameters.Iterations, "100"); trainingParameters.Set(Parameters.Cutoff, "5"); char[] eos = { '.', '?', '!' }; var sdFactory = new SentenceDetectorFactory("nl", true, stringsToIgnoreDictionary, eos); var stringReader = new StringReader(samples); var stream = new SentenceSampleStream(new PlainTextByLineStream(stringReader)); var sentenceModel = SentenceDetectorME.Train("nl", stream, sdFactory, trainingParameters); var sentenceDetectorMe = new SentenceDetectorME(sentenceModel); var sentences = sentenceDetectorMe.SentDetect(samples); var expected = samples.Split(new []{ Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); Assert.AreEqual(8, sentences.Length); for (var i = 0; i < sentences.Length; i++) Assert.AreEqual(expected[i], sentences[i]); }
public void Setup() { var sParams = new TrainingParameters(); sParams.Set(Parameters.Iterations, "70"); sParams.Set(Parameters.Cutoff, "1"); var jParams = new opennlp.tools.util.TrainingParameters(); jParams.put("Iterations", "70"); jParams.put("Cutoff", "1"); var sModel = ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), sParams, new ChunkerFactory()); var jModel = opennlp.tools.chunker.ChunkerME.train("en", JavaSampleStream(), jParams, new opennlp.tools.chunker.ChunkerFactory()); Assert.NotNull(sModel); Assert.NotNull(jModel); sChunker = new ChunkerME(sModel); jChunker = new opennlp.tools.chunker.ChunkerME(jModel); }
public void TestPerceptronOnPrepAttachDataWithStepSizeDecrease() { var trainParams = new TrainingParameters(); trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron); trainParams.Set(Parameters.Cutoff, "1"); trainParams.Set(Parameters.Iterations, "500"); trainParams.Set(Parameters.StepSizeDecrease, "0.06"); var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null); var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream()); /* * The java test gives an error too, soo.... for now i'll assume that is correct :P * * java.lang.AssertionError: expected:<0.7756870512503095> but was:<0.7766773953948998> at org.junit.Assert.fail(Assert.java:91) at org.junit.Assert.failNotEquals(Assert.java:645) at org.junit.Assert.assertEquals(Assert.java:441) at org.junit.Assert.assertEquals(Assert.java:510) */ //PrepAttachDataUtility.TestModel(model, 0.7756870512503095); < OpenNLP value PrepAttachDataUtility.TestModel(model, 0.77742015350334237); }
/// <summary> /// Trains a name finder model. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { return Train(languageCode, DefaultType, samples, parameters, factory, monitor); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { var dict = BuildDictionary(samples, rules, parameters); samples.Reset(); var manifestInfoEntries = new Dictionary<string, string>(); // build //System.err.println("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict); var buildReportMap = new Dictionary<string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // tag var posTaggerParams = parameters.GetNamespace("tagger"); if (!posTaggerParams.Contains(Parameters.BeamSize)) posTaggerParams.Set(Parameters.BeamSize, "10"); var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory()); samples.Reset(); // chunk var chunkModel = ChunkerME.Train(languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory()); samples.Reset(); // check //System.err.println("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary<string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); return new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train(string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) { var manifestInfoEntries = new Dictionary<string, string>(); var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var model = trainer.Train(eventStream); return new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary<string, string>(); #if DEBUG System.Diagnostics.Debug.Print("Building dictionary"); #endif var dictionary = BuildDictionary(samples, rules, parameters); samples.Reset(); // tag var posModel = POSTaggerME.Train( languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory(), monitor); samples.Reset(); // chunk var chunkModel = ChunkerME.Train( languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory(), monitor); samples.Reset(); // build #if DEBUG System.Diagnostics.Debug.Print("Training builder"); #endif var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary); var buildReportMap = new Dictionary<string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // check #if DEBUG System.Diagnostics.Debug.Print("Training checker"); #endif var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary<string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); samples.Reset(); // attach #if DEBUG System.Diagnostics.Debug.Print("Training attacher"); #endif var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach); var attachReportMap = new Dictionary<string, string>(); var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor); var attachModel = attachTrainer.Train(attachEvents); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach"); return new ParserModel( languageCode, buildModel, checkModel, attachModel, posModel, chunkModel, rules, ParserType.TreeInsert, manifestInfoEntries); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="iterations">The number of training iterations.</param> /// <param name="cutoff">The min number of times a feature must be seen.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train(string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, int iterations, int cutoff, Monitor monitor) { var param = new TrainingParameters(); param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture)); param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture)); return Train(languageCode, samples, rules, param, monitor); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { return Train(languageCode, samples, rules, parameters, null); }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream<POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor) { //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize); var contextGenerator = factory.GetPOSContextGenerator(); var manifestInfoEntries = new Dictionary<string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); switch (trainerType) { case TrainerType.EventModelTrainer: var es = new POSSampleEventStream(samples, contextGenerator); var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); var eventModel = trainer.Train(es); return new POSModel(languageCode, eventModel, manifestInfoEntries, factory); case TrainerType.EventModelSequenceTrainer: var ss = new POSSampleSequenceStream(samples, contextGenerator); var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); var seqModel = trainer2.Train(ss); return new POSModel(languageCode, seqModel, manifestInfoEntries, factory); case TrainerType.SequenceTrainer: var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); // TODO: This will probably cause issue, since the feature generator uses the outcomes array var ss2 = new POSSampleSequenceStream(samples, contextGenerator); var seqPosModel = trainer3.Train(ss2); return new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory); default: throw new NotSupportedException("Trainer type is not supported."); } }
/// <summary> /// Trains a Part of Speech model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The sentence detector factory.</param> /// <returns>The trained <see cref="POSModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static POSModel Train(string languageCode, IObjectStream<POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory) { return Train(languageCode, samples, parameters, factory, null); }
/// <summary> /// Trains document categorizer model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="factory">The document categorizer factory.</param> /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns> public static DocumentCategorizerModel Train( string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory) { return Train(languageCode, samples, parameters, factory, null); }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) { var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize); var manifestInfoEntries = new Dictionary<string, string>(); var trainerType = TrainerFactory.GetTrainerType(parameters); IMaxentModel meModel = null; ML.Model.ISequenceClassificationModel<string> seqModel = null; switch (trainerType) { case TrainerType.EventModelTrainer: var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(), factory.CreateSequenceCodec()); var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor); meModel = nfTrainer.Train(eventStream); break; case TrainerType.EventModelSequenceTrainer: var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor); meModel = nsTrainer.Train(sampleStream); break; case TrainerType.SequenceTrainer: var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator()); var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor); seqModel = sqTrainer.Train(sequenceStream); break; default: throw new InvalidOperationException("Unexpected trainer type!"); } if (seqModel != null) { return new TokenNameFinderModel( languageCode, seqModel, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec, factory); } return new TokenNameFinderModel( languageCode, meModel, beamSize, factory.FeatureGenerator, factory.Resources, manifestInfoEntries, factory.SequenceCodec, factory); }
/// <summary> /// Trains a name finder model with the given parameters. /// </summary> /// <param name="languageCode">The language of the training data.</param> /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param> /// <param name="samples">The training samples.</param> /// <param name="parameters">The machine learning train parameters.</param> /// <param name="factory">The name finder factory.</param> /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns> public static TokenNameFinderModel Train( string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory) { return Train(languageCode, type, samples, parameters, factory, null); }